Browse code

add queryGeneTable function to help translate genes

LiNk-NY authored on 19/04/2022 22:39:39
Showing 4 changed files

... ...
@@ -19,6 +19,7 @@ export(loadStudy)
19 19
 export(molecularData)
20 20
 export(molecularProfiles)
21 21
 export(mutationData)
22
+export(queryGeneTable)
22 23
 export(removeDataCache)
23 24
 export(removePackCache)
24 25
 export(sampleLists)
... ...
@@ -116,6 +116,8 @@ utils::globalVariables(c("clinicalAttributeId", "value", "sampleId"))
116 116
 #'
117 117
 #' getGenePanel(api = cbio, genePanelId = "IMPACT341")
118 118
 #'
119
+#' queryGeneTable(api = cbio, by = "entrezGeneId", genes = 7157)
120
+#'
119 121
 #' @export
120 122
 cBioPortal <- function(
121 123
     hostname = "www.cbioportal.org",
... ...
@@ -405,27 +407,6 @@ searchOps <- function(api, keyword) {
405 407
         value = TRUE, ignore.case = TRUE)
406 408
 }
407 409
 
408
-#' @name cBioPortal
409
-#'
410
-#' @section API Metadata:
411
-#'     * geneTable - Get a table of all genes by 'entrezGeneId' or
412
-#'     'hugoGeneSymbol'
413
-#'
414
-#' @param pageSize numeric(1) The number of rows in the table to return
415
-#'
416
-#' @param pageNumber numeric(1) The pagination page number
417
-#'
418
-#' @param ... Additional arguments to lower level API functions
419
-#'
420
-#' @export
421
-geneTable <- function(api, pageSize = 1000, pageNumber = 0, ...) {
422
-    if (missing(api))
423
-        stop("Provide a valid 'api' from 'cBioPortal()'")
424
-
425
-    .invoke_bind(api, "getAllGenesUsingGET", FALSE, pageSize = pageSize,
426
-        pageNumber = pageNumber, ...)
427
-}
428
-
429 410
 #' @name cBioPortal
430 411
 #'
431 412
 #' @section Sample Data:
... ...
@@ -485,6 +466,33 @@ allSamples <- function(api, studyId = NA_character_) {
485 466
         studyId = list(studyId = studyId))
486 467
 }
487 468
 
469
+#' @name cBioPortal
470
+#'
471
+#' @section Sample Data:
472
+#'     * getSampleInfo - Obtain sample metadata for a particular `studyId` or
473
+#'     `sampleListId`
474
+#' @export
475
+getSampleInfo <-
476
+    function(api, studyId = NA_character_, sampleListIds = NULL,
477
+        projection = c("SUMMARY", "ID", "DETAILED", "META"))
478
+{
479
+    if (missing(api))
480
+        stop("Provide a valid 'api' from 'cBioPortal()'")
481
+    projection <- match.arg(projection)
482
+    if (!is.null(sampleListIds))
483
+        queryobj <- list(sampleListIds = sampleListIds)
484
+    else
485
+        queryobj <- list(sampleIdentifiers =
486
+            as.data.frame(
487
+                allSamples(api, studyId)[, c("sampleId", "studyId")]
488
+            )
489
+        )
490
+
491
+    .invoke_bind(api = api, name = "fetchSamplesUsingPOST", use_cache = FALSE,
492
+        projection = projection, sampleIdentifiers = queryobj
493
+    )
494
+}
495
+
488 496
 #' @name cBioPortal
489 497
 #'
490 498
 #' @section API Metadata:
... ...
@@ -517,8 +525,8 @@ getGenePanel <- function(api, genePanelId = NA_character_) {
517 525
 #' @name cBioPortal
518 526
 #'
519 527
 #' @section Gene Panels:
520
-#'     * genePanelMolecular - get gene panel data for a paricular
521
-#'     `molecularProfileId` and `sampleListId` combination
528
+#'     * genePanelMolecular - get gene panel data for a particular
529
+#'     `molecularProfileId` and either a vector of `sampleListId` or `sampleId`
522 530
 #'
523 531
 #' @param sampleListId character(1) A sample list identifier as obtained from
524 532
 #'     `sampleLists()``
... ...
@@ -548,8 +556,8 @@ genePanelMolecular <-
548 556
 #' @name cBioPortal
549 557
 #'
550 558
 #' @section Gene Panels:
551
-#'     * getGenePanelMolecular - get gene panel data for a combination of
552
-#'     `molecularProfileId` and `sampleListId` vectors
559
+#'     * getGenePanelMolecular - get gene panel data for multiple
560
+#'     `molecularProfileId`s and a vector of `sampleIds`
553 561
 #'
554 562
 #' @export
555 563
 getGenePanelMolecular <-
... ...
@@ -578,52 +586,53 @@ getGenePanelMolecular <-
578 586
 
579 587
 #' @name cBioPortal
580 588
 #'
581
-#' @section Sample Data:
582
-#'     * getSampleInfo - Obtain sample metadata for a particular `studyId` or
583
-#'     `sampleListId`
589
+#' @section API Metadata:
590
+#'     * geneTable - Get a table of all genes by 'entrezGeneId' and
591
+#'     'hugoGeneSymbol'
592
+#'
593
+#' @param pageSize numeric(1) The number of rows in the table to return
594
+#'
595
+#' @param pageNumber numeric(1) The pagination page number
596
+#'
597
+#' @param ... Additional arguments to lower level API functions
598
+#'
584 599
 #' @export
585
-getSampleInfo <-
586
-    function(api, studyId = NA_character_, sampleListIds = NULL,
587
-        projection = c("SUMMARY", "ID", "DETAILED", "META"))
588
-{
600
+geneTable <- function(api, pageSize = 1000, pageNumber = 0, ...) {
589 601
     if (missing(api))
590 602
         stop("Provide a valid 'api' from 'cBioPortal()'")
591
-    projection <- match.arg(projection)
592
-    if (!is.null(sampleListIds))
593
-        queryobj <- list(sampleListIds = sampleListIds)
594
-    else
595
-        queryobj <- list(sampleIdentifiers =
596
-            as.data.frame(
597
-                allSamples(api, studyId)[, c("sampleId", "studyId")]
598
-            )
599
-        )
600 603
 
601
-    .invoke_bind(api = api, name = "fetchSamplesUsingPOST", use_cache = FALSE,
602
-        projection = projection, sampleIdentifiers = queryobj
603
-    )
604
+    .invoke_bind(api, "getAllGenesUsingGET", FALSE, pageSize = pageSize,
605
+        pageNumber = pageNumber, ...)
604 606
 }
605 607
 
606
-.resolveFeatures <- function(api, by, genes, genePanelId) {
607
-    isSingleNA <- function(x) { length(x) == 1L && is.na(x) }
608
+#' @name cBioPortal
609
+#'
610
+#' @section API Metadata:
611
+#'     * queryGeneTable - Get a table for only the `genes` or `genePanelId` of
612
+#'     interest. Gene inputs are identified with the `by` argument
613
+#'
614
+#' @export
615
+queryGeneTable <- function(
616
+        api,
617
+        by = c("entrezGeneId", "hugoGeneSymbol"),
618
+        genes = NA_character_,
619
+        genePanelId = NA_character_
620
+) {
621
+    all.na <- function(x) all(is.na(x))
608 622
 
609
-    if (isSingleNA(genes) && isSingleNA(genePanelId))
623
+    if (all.na(genes) && all.na(genePanelId))
610 624
         stop("Provide either 'genes' or 'genePanelId'")
611 625
 
626
+    by <- match.arg(by)
612 627
     geneIdType <- switch(
613
-        by, entrezGeneId = "ENTREZ_GENE_ID", 'HUGO_GENE_SYMBOL'
628
+        by, entrezGeneId = "ENTREZ_GENE_ID", hugoGeneSymbol = 'HUGO_GENE_SYMBOL'
614 629
     )
615 630
 
616
-    feats <- genes
617
-    if (identical(by, "hugoGeneSymbol") && !all(is.na(genes)))
618
-        feats <- .invoke_bind(api, "fetchGenesUsingPOST", TRUE,
631
+    if (!all.na(genes))
632
+        .invoke_bind(api, "fetchGenesUsingPOST", TRUE,
619 633
             geneIdType = geneIdType, geneIds = as.character(genes))
620 634
     else
621
-        feats <- tibble::tibble(entrezGeneId = genes)
622
-
623
-    if (isSingleNA(genes))
624
-        feats <- getGenePanel(api, genePanelId = genePanelId)
625
-
626
-    feats
635
+        getGenePanel(api, genePanelId = genePanelId)
627 636
 }
628 637
 
629 638
 #' @name cBioPortal
... ...
@@ -657,7 +666,7 @@ getDataByGenes <-
657 666
 
658 667
     by <- match.arg(by)
659 668
 
660
-    feats <- .resolveFeatures(api, by, genes, genePanelId)
669
+    feats <- queryGeneTable(api, by, genes, genePanelId)
661 670
 
662 671
     digi <- digest::digest(
663 672
         list("getDataByGenes", api, studyId, feats, sampleIds,
... ...
@@ -145,7 +145,7 @@ removePackCache <- function(cancer_study_id, dry.run = TRUE) {
145 145
     else if (is.null(sampleIds))
146 146
         sampleIds <- allSamples(api, studyId)[["sampleId"]]
147 147
 
148
-    feats <- .resolveFeatures(
148
+    feats <- queryGeneTable(
149 149
         api = api, by = by, genes = genes, genePanelId = genePanelId
150 150
     )
151 151
     digi <- digest::digest(
... ...
@@ -8,15 +8,16 @@
8 8
 \alias{mutationData}
9 9
 \alias{molecularData}
10 10
 \alias{searchOps}
11
-\alias{geneTable}
12 11
 \alias{samplesInSampleLists}
13 12
 \alias{sampleLists}
14 13
 \alias{allSamples}
14
+\alias{getSampleInfo}
15 15
 \alias{genePanels}
16 16
 \alias{getGenePanel}
17 17
 \alias{genePanelMolecular}
18 18
 \alias{getGenePanelMolecular}
19
-\alias{getSampleInfo}
19
+\alias{geneTable}
20
+\alias{queryGeneTable}
20 21
 \alias{getDataByGenes}
21 22
 \title{The R interface to the cBioPortal API Data Service}
22 23
 \usage{
... ...
@@ -53,14 +54,19 @@ molecularData(
53 54
 
54 55
 searchOps(api, keyword)
55 56
 
56
-geneTable(api, pageSize = 1000, pageNumber = 0, ...)
57
-
58 57
 samplesInSampleLists(api, sampleListIds = NA_character_)
59 58
 
60 59
 sampleLists(api, studyId = NA_character_)
61 60
 
62 61
 allSamples(api, studyId = NA_character_)
63 62
 
63
+getSampleInfo(
64
+  api,
65
+  studyId = NA_character_,
66
+  sampleListIds = NULL,
67
+  projection = c("SUMMARY", "ID", "DETAILED", "META")
68
+)
69
+
64 70
 genePanels(api)
65 71
 
66 72
 getGenePanel(api, genePanelId = NA_character_)
... ...
@@ -74,11 +80,13 @@ genePanelMolecular(
74 80
 
75 81
 getGenePanelMolecular(api, molecularProfileIds = NA_character_, sampleIds)
76 82
 
77
-getSampleInfo(
83
+geneTable(api, pageSize = 1000, pageNumber = 0, ...)
84
+
85
+queryGeneTable(
78 86
   api,
79
-  studyId = NA_character_,
80
-  sampleListIds = NULL,
81
-  projection = c("SUMMARY", "ID", "DETAILED", "META")
87
+  by = c("entrezGeneId", "hugoGeneSymbol"),
88
+  genes = NA_character_,
89
+  genePanelId = NA_character_
82 90
 )
83 91
 
84 92
 getDataByGenes(
... ...
@@ -127,12 +135,6 @@ type for data retrieval for details see API documentation}
127 135
 \item{keyword}{character(1) Keyword or pattern for searching through
128 136
 available operations}
129 137
 
130
-\item{pageSize}{numeric(1) The number of rows in the table to return}
131
-
132
-\item{pageNumber}{numeric(1) The pagination page number}
133
-
134
-\item{...}{Additional arguments to lower level API functions}
135
-
136 138
 \item{sampleListIds}{character() A vector of 'sampleListId' as obtained from
137 139
 `sampleLists`}
138 140
 
... ...
@@ -144,13 +146,19 @@ from the `genePanels` function}
144 146
 \item{sampleListId}{character(1) A sample list identifier as obtained from
145 147
 `sampleLists()``}
146 148
 
149
+\item{pageSize}{numeric(1) The number of rows in the table to return}
150
+
151
+\item{pageNumber}{numeric(1) The pagination page number}
152
+
153
+\item{...}{Additional arguments to lower level API functions}
154
+
155
+\item{by}{character(1) Either 'entrezGeneId' or 'hugoGeneSymbol' for row
156
+metadata (default: 'entrezGeneId')}
157
+
147 158
 \item{genes}{character() Either Entrez gene identifiers or Hugo gene
148 159
 symbols. When included, the 'by' argument indicates the type of
149 160
 identifier provided and 'genePanelId' is ignored. Preference is
150 161
 given to Entrez IDs due to faster query responses.}
151
-
152
-\item{by}{character(1) Either 'entrezGeneId' or 'hugoGeneSymbol' for row
153
-metadata (default: 'entrezGeneId')}
154 162
 }
155 163
 \value{
156 164
 cBioPortal: An API object of class 'cBioPortal'
... ...
@@ -182,10 +190,6 @@ This section of the documentation lists the functions that
182 190
     * searchOps - Search through API operations with a keyword
183 191
 
184 192
 
185
-    * geneTable - Get a table of all genes by 'entrezGeneId' or
186
-    'hugoGeneSymbol'
187
-
188
-
189 193
     * sampleLists - obtain all `sampleListIds` for a particular `studyId`
190 194
 
191 195
 
... ...
@@ -193,6 +197,14 @@ This section of the documentation lists the functions that
193 197
 
194 198
 
195 199
     * genePanels - Show all available gene panels
200
+
201
+
202
+    * geneTable - Get a table of all genes by 'entrezGeneId' and
203
+    'hugoGeneSymbol'
204
+
205
+
206
+    * queryGeneTable - Get a table for only the `genes` or `genePanelId` of
207
+    interest. Gene inputs are identified with the `by` argument
196 208
 }
197 209
 
198 210
 \section{Patient Data}{
... ...
@@ -231,12 +243,12 @@ This section of the documentation lists the functions that
231 243
     * getGenePanels - Obtain the gene panel for a particular 'genePanelId'
232 244
 
233 245
 
234
-    * genePanelMolecular - get gene panel data for a paricular
235
-    `molecularProfileId` and `sampleListId` combination
246
+    * genePanelMolecular - get gene panel data for a particular
247
+    `molecularProfileId` and either a vector of `sampleListId` or `sampleId`
236 248
 
237 249
 
238
-    * getGenePanelMolecular - get gene panel data for a combination of
239
-    `molecularProfileId` and `sampleListId` vectors
250
+    * getGenePanelMolecular - get gene panel data for multiple
251
+    `molecularProfileId`s and a vector of `sampleIds`
240 252
 }
241 253
 
242 254
 \section{Genes}{
... ...
@@ -287,6 +299,8 @@ genePanels(api = cbio)
287 299
 
288 300
 getGenePanel(api = cbio, genePanelId = "IMPACT341")
289 301
 
302
+queryGeneTable(api = cbio, by = "entrezGeneId", genes = 7157)
303
+
290 304
 
291 305
 getDataByGenes(
292 306
     cbio, studyId = "acc_tcga", genes = 1:3,