Bioconductor Code: TCGAbiolinks

Browse code

Update

Merge branch 'devel' into RELEASE_3_17

# Conflicts:
# DESCRIPTION

Tiago Silva authored on 05/10/2023 16:52:55
Showing 5 changed files

DESCRIPTION index bc6febda..81356cba 100644
R/classifiers.R index fc6ae177..ac0129af 100644
R/clinical.R index 9ddba41d..94401e18 100644
R/prepare.R index 0dc40765..c7bbb34a 100644
vignettes/classifiers.Rmd index fcd40cd8..8ed46b1c 100755

History View file @ 40ea78f4

@@ -1,7 +1,7 @@
                      Package: TCGAbiolinks
                      Type: Package
                      Title: TCGAbiolinks: An R/Bioconductor package for integrative analysis with GDC data
                     -Version: 2.28.3
                     +Version: 2.28.4
                      Date: 2023-06-06
                      Author: Antonio Colaprico,
                          Tiago Chedraoui Silva,

R/classifiers.R

History View file @ 40ea78f4

@@ -51,20 +51,21 @@ gliomaClassifier <- function(data){
                          data(list = models, package = "TCGAbiolinksGUI.data",envir = env)
                          for(i in models){
                     -        model <- get(i,envir = env)
                     +        message("------------------------------------------------------")
                     +        message("Model: ",i)
                     +        model <- get(i, envir = env)
                              # If it is a Summarized Experiment object
                              # keep only probes used in the model
                              aux <- met[,colnames(met) %in% colnames(model$trainingData),drop = FALSE]
+                    -
                              # This should not happen!
                              if(any(apply(aux,2,function(x) all(is.na(x))))) {
                     -            print("NA columns")
                     +            message("o Probes has NA value for all samples. Setting to 0.5 since model does not accept NA")
                                  aux[,apply(aux,2,function(x) all(is.na(x)))] <- 0.5
+                             }
                              if(any(apply(aux,2,function(x) any(is.na(x))))) {
                     -            print("NA values")
                     +            message("o Probes has NA values for some samples. Setting values a the median of the sample since model does not accept NA ")
                                  colMedians <- colMedians(aux,na.rm = TRUE)
                                  x <- which(is.na(aux),arr.ind = TRUE)
                                  for(l in 1:nrow(x)){
@@ -72,6 +73,15 @@ gliomaClassifier <- function(data){
+                                 }
+                             }
                     +        # For missing probes add values to 0.5
                     +        missing_probes <- setdiff(colnames(model$trainingData), colnames(met))
                     +        if(length(missing_probes) > 0) {
                     +            message("o Probes are missing. Setting dummy probes to matrix with 0.5 value to all samples.")
                     +            missing_probes_matrix <- matrix(rep(0.5, nrow(met) * length(missing_probes)),nrow = nrow(met))
                     +            colnames(missing_probes_matrix) <- missing_probes
                     +            aux <- bind_cols(aux,missing_probes_matrix)
                     +        }
+                    +
                              pred <- predict(model, aux)
                              pred.prob <- predict(model, aux, type = "prob")
                              colnames(pred.prob) <- paste0(i,"_", colnames(pred.prob))

R/clinical.R

History View file @ 40ea78f4

@@ -296,7 +296,7 @@ GDCquery_clinic <- function(
                                  if("treatments" %in% colnames(df)){
                                      treatments <- rbindlist(df$treatments,fill = TRUE)
                                      df$treatments <- NULL
                     -                treatments$submitter_id <- gsub("_treatment(_[0-9])?","", treatments$submitter_id)
                     +                treatments$submitter_id <- gsub("_treatment(_[0-9])?|_treatment([0-9])?","", treatments$submitter_id)
                                      treatments <- treatments[,-c("updated_datetime", "state", "created_datetime")]
                                      # we have now two types of treatment
@@ -402,7 +402,8 @@ GDCquery_clinic <- function(
                                  df$project <- project
                                  df <- df %>% dplyr::relocate(project)
+                             }
                     -        if(nrow(results) != nrow(df)){
+                    +
                     +        if (nrow(results) != nrow(df)) {
                                  stop("Error: API returned more information")
+                             }

R/prepare.R

History View file @ 40ea78f4

@@ -139,7 +139,7 @@ GDCprepare <- function(
+                         }
                          cases <- ifelse(
                     -        grepl("TCGA|TARGET|CGCI-HTMCP-CC",query$results[[1]]$project %>% unlist()),
                     +        grepl("TCGA|TARGET|CGCI-HTMCP-CC|CPTAC-2",query$results[[1]]$project %>% unlist()),
                              query$results[[1]]$cases,
                              query$results[[1]]$sample.submitter_id
+                         )
@@ -478,35 +478,21 @@ readmiRNAIsoformQuantification <- function (files, cases){
                          setDF(df)
+                     }
+                    +
                      readSimpleNucleotideVariationMaf <- function(files){
                     -    ret <- plyr::adply(.data = files,.margins = 1,.fun = function(f){
                     -        readr::read_tsv(
                     -            f,
                     -            comment = "#",
                     -            col_types = readr::cols(
                     -                Entrez_Gene_Id = col_integer(),
                     -                Start_Position = col_integer(),
                     -                End_Position = col_integer(),
                     -                t_depth = col_integer(),
                     -                t_ref_count = col_integer(),
                     -                t_alt_count = col_integer(),
                     -                n_depth = col_integer(),
                     -                TRANSCRIPT_STRAND = col_integer(),
                     -                PICK = col_integer(),
                     -                miRNA = col_character(),
                     -                TSL = col_integer(),
                     -                HGVS_OFFSET = col_integer()
                     -            ),
                     -            progress = TRUE
                     -        )
                     -    })
                     -    if(ncol(ret) == 1) {
                     -        ret <- plyr::adply(.data = files,.margins = 1,.fun = function(f){
                     -            read_tsv(
                     -                f,
                     +    ret <- files |>
                     +        purrr::map_dfr(.f = function(x) {
                     +            tab <- readr::read_tsv(
                     +                x,
                     +                show_col_types = FALSE,
                                      comment = "#",
                     -                col_types = cols(
                     +                col_types = readr::cols(
                     +                    SOMATIC =  col_character(),
                     +                    PUBMED = col_character(),
                     +                    miRNA = col_character(),
                     +                    HGVS_OFFSET = col_integer(),
                     +                    PHENO = col_character(),
                                          Entrez_Gene_Id = col_integer(),
                                          Start_Position = col_integer(),
                                          End_Position = col_integer(),
@@ -514,16 +500,18 @@ readSimpleNucleotideVariationMaf <- function(files){
                                          t_ref_count = col_integer(),
                                          t_alt_count = col_integer(),
                                          n_depth = col_integer(),
                     -                    ALLELE_NUM = col_integer(),
                                          TRANSCRIPT_STRAND = col_integer(),
                                          PICK = col_integer(),
                                          TSL = col_integer(),
                     -                    HGVS_OFFSET = col_integer(),
                     -                    MINIMISED = col_integer()),
                     -                progress = TRUE
                     -            )
                     +                    DISTANCE = col_integer()
                     +                ))
+                    +
                     +            # empty MAF file
                     +            # https://portal.gdc.cancer.gov/files/7917fcbe-cb66-447d-8ea8-3a324feee3fa
                     +            if(nrow(tab) == 0) {return (NULL)}
                     +            tab
                              })
                     -    }
+                    +
                          return(ret)
+                     }

vignettes/classifiers.Rmd

History View file @ 40ea78f4

@@ -55,25 +55,25 @@ the same result as the paper.
                      ```{r, eval = FALSE, message = FALSE, results = "hide"}
                      query <- GDCquery(
                     -  project = "TCGA-GBM",
                     -  data.category = "DNA methylation",
                     -  barcode = c("TCGA-06-0122","TCGA-14-1456"),
                     -  platform = "Illumina Human Methylation 27",
                     -  legacy = TRUE
                     +    project = "TCGA-GBM",
                     +    data.category = "DNA Methylation",
                     +    barcode = c("TCGA-06-0122","TCGA-14-1456"),
                     +    platform = "Illumina Human Methylation 27",
                     +    data.type = "Methylation Beta Value"
+                     )
                      GDCdownload(query)
                     -data.hg19 <- GDCprepare(query)
                     +dnam <- GDCprepare(query)
                      ```
                      ```{r, eval = FALSE}
                     -assay(data.hg19)[1:5,1:2]
                     +assay(dnam)[1:5,1:2]
                      ```
                      ## Function
                      <hr>
                      ```{r, eval = FALSE}
                     -classification <- gliomaClassifier(data.hg19)
                     +classification <- gliomaClassifier(dnam)
                      ```
                      ## Results