Bioconductor Code: immunogenViewer

Browse code

Consistent column names, add new function addImmunogenList()

Katharina Waury authored on 08/02/2025 20:00:07
Showing 8 changed files

NAMESPACE index 4ba797b..2ba7d23 100644
R/addImmunogen.R index 775a223..3d4faf5 100644
R/addImmunogenList.R index 0000000..c23b53c
R/evaluateImmunogen.R index c35edc0..8c2f74e 100644
man/addImmunogenList.Rd index 0000000..2d7c8b3
man/evaluateImmunogen.Rd index 3195ea1..e34a05a 100644
tests/testthat/test-addImmunogenList.R index 0000000..594c1a3
vignettes/immunogenViewer_vignette.Rmd index e14072d..44d3dfc 100644

History View file @ d0ce529

@@ -1,6 +1,7 @@
                      # Generated by roxygen2: do not edit by hand
                      export(addImmunogen)
                     +export(addImmunogenList)
                      export(evaluateImmunogen)
                      export(getProteinFeatures)
                      export(plotImmunogen)

R/addImmunogen.R

History View file @ d0ce529

@@ -43,6 +43,9 @@ addImmunogen <- function(proteinDF, start=NULL, end=NULL, seq=NULL, name) {
                          # calculate immunogen end position
                          end <- start + nchar(seq) - 1
                     +    # check if immunogen length is within range
                     +    checkImmunogenRange(start, end, proteinLength)
+                    +
                        # if start and end position are provided, check validity of range
                        } else if (!is.null(start) & !is.null(end)) {
                          checkImmunogenRange(start, end, proteinLength)

R/addImmunogenList.R

History View file @ d0ce529

                     new file mode 100644
@@ -0,0 +1,50 @@
                     +#' Add multiple immunogens to the Protein DataFrame
                     +#'
                     +#' @param proteinDF Protein DataFrame created by call to getProteinFeatures()
                     +#' @param immunogenDF DataFrame where each row represents an immunogen.
                     +#'        Must contain columns: `start` (integer) and `end` (integer) or `seq` (string), and `name` (string).
                     +#'
                     +#' @description
                     +#' Calls `addImmunogen()` for each row in `immunogenDF` to add multiple immunogens
                     +#' to the given `proteinDF`.
                     +#'
                     +#' @return Updated Protein DataFrame with all immunogens added as new columns
                     +#' @export
                     +#'
                     +#' @examples
                     +#' proteinDF <- getProteinFeatures("P55087")
                     +#' immunogenDF <- data.frame(
                     +#'   start = c(10, 40, NA),
                     +#'   end = c(30, 60, NA),
                     +#'   seq = c(NA, NA, "RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"),
                     +#'   name = c("A12", "B34", "HPA"),
                     +#'   stringsAsFactors = FALSE
                     +#' )
                     +#' proteinDF <- addImmunogenList(proteinDF, immunogenDF)
                     +addImmunogenList <- function(proteinDF, immunogenDF) {
+                    +
                     +  # Check if required columns exist
                     +  if (!"name" %in% colnames(immunogenDF)) {
                     +    stop("The immunogen dataframe must contain a 'name' column.")
                     +  }
                     +  if (!("seq" %in% colnames(immunogenDF)) && !all(c("start", "end") %in% colnames(immunogenDF))) {
                     +    stop("The immunogen dataframe must contain either a 'seq' column or both 'start' and 'end' columns.")
                     +  }
+                    +
                     +  # Iterate through each row and add immunogen
                     +  for (i in seq_len(nrow(immunogenDF))) {
+                    +
                     +    row <- immunogenDF[i, ]
+                    +
                     +    # Call to addImmunogen based on available data
                     +    if (!is.na(row$start) && !is.na(row$end)) {
                     +      proteinDF <- addImmunogen(proteinDF, start = row$start, end = row$end, name = row$name)
                     +    } else if (!is.na(row$seq)) {
                     +      proteinDF <- addImmunogen(proteinDF, seq = as.character(row$seq), name = row$name)
                     +    } else {
                     +      stop("Invalid values. Please check your immunogen dataframe.")
                     +    }
                     +  }
+                    +
                     +  return(proteinDF)
                     +}

R/evaluateImmunogen.R

History View file @ d0ce529

@@ -8,8 +8,8 @@
                      #'
                      #' @description
                      #' By calling `evaluateImmunogen()`, the immunogens associated with a Protein DataFrame can be evaluated regarding
                     -#' their suitability fir antibody binding in natively folded proteins. By calling the function without specifying
                     -#' an immunogen, all immunogens of the current protein will be evaluated. The summary DataFrame contains one row per
                     +#' their suitability for antibody binding in natively folded proteins. By calling the function without specifying
                     +#' an immunogen, all immunogens of the current protein dataframe will be evaluated. The summary DataFrame contains one row per
                      #' evaluated immunogen.
                      #'
                      #' @examples
@@ -21,8 +21,8 @@
                      evaluateImmunogen <- function(proteinDF, immunogen=NULL) {
                        # feature column names of protein dataframe
                     -  features <- c("Uniprot", "Position", "Residue", "PTM", "disulfideBridge", "Membrane",
                     -                "Binding", "Disorder", "secondaryStructure", "solventAccessibility")
                     +  features <- c("Uniprot", "Position", "Residue", "PTM", "DisulfideBridge", "Membrane",
                     +                "ProteinBinding", "Disorder", "SecondaryStructure", "SolventAccessibility")
                        fullDF <- data.frame()
@@ -71,8 +71,8 @@ evaluateImmunogen <- function(proteinDF, immunogen=NULL) {
                      checkIfImmunogenExists <- function(colnamesDF, name) {
                        # feature column names of protein dataframe
                     -  features <- c("Uniprot", "Position", "Residue", "PTM", "disulfideBridge", "Membrane",
                     -                "Binding", "Disorder", "secondaryStructure", "solventAccessibility")
                     +  features <- c("Uniprot", "Position", "Residue", "PTM", "DisulfideBridge", "Membrane",
                     +                "ProteinBinding", "Disorder", "SecondaryStructure", "SolventAccessibility")
                        # raise error if immunogen names not present in dataframe or same as feature column name
                        if (!(name %in% colnamesDF)) {
@@ -136,28 +136,28 @@ createSummaryDataFrame <- function(immunogenDF, immunogen) {
                        proportionMembrane <- calculateRegionProportions(immunogenDF, "Membrane")
                        proportionDisorder <- calculateRegionProportions(immunogenDF, "Disorder")
                     -  proportionBinding <- calculateRegionProportions(immunogenDF, "Binding")
                     +  proportionBinding <- calculateRegionProportions(immunogenDF, "ProteinBinding")
                        # add secondary structure proportions
                     -  proportionsSecondaryStr <- prop.table(table(immunogenDF[["secondaryStructure"]]))
                     +  proportionsSecondaryStr <- prop.table(table(immunogenDF[["SecondaryStructure"]]))
                        proportionsSecondaryStr <- addMissingClasses(proportionsSecondaryStr, c("Helix", "Sheet", "Other"))
                        # add buried/exposed proportions
                     -  proportionsSolventAcc <- prop.table(table(immunogenDF[["solventAccessibility"]]))
                     +  proportionsSolventAcc <- prop.table(table(immunogenDF[["SolventAccessibility"]]))
                        proportionsSolventAcc <- addMissingClasses(proportionsSolventAcc, c("Buried", "Exposed"))
                        # create a summary dataframe
                        summaryDF <- data.frame(
                     -    Sum_PTM = sumPTM,
                     -    Sum_DisulfideBridges = sumBridge,
                     -    Proportion_Membrane = proportionMembrane,
                     -    Proportion_Disorder = proportionDisorder,
                     -    Proportion_Binding = proportionBinding,
                     -    Proportions_Helix = proportionsSecondaryStr["Helix"][[1]],
                     -    Proportions_Sheet = proportionsSecondaryStr["Sheet"][[1]],
                     -    Proportions_Coil = proportionsSecondaryStr["Other"][[1]],
                     -    Proportions_SolventAccessibility_Buried = proportionsSolventAcc["Buried"][[1]],
                     -    Proportions_SolventAccessibility_Exposed = proportionsSolventAcc["Exposed"][[1]]
                     +    SumPTM = sumPTM,
                     +    SumDisulfideBridges = sumBridge,
                     +    ProportionMembrane = proportionMembrane,
                     +    ProportionDisorder = proportionDisorder,
                     +    ProportionBinding = proportionBinding,
                     +    ProportionsHelix = proportionsSecondaryStr["Helix"][[1]],
                     +    ProportionsSheet = proportionsSecondaryStr["Sheet"][[1]],
                     +    ProportionsCoil = proportionsSecondaryStr["Other"][[1]],
                     +    ProportionsBuried = proportionsSolventAcc["Buried"][[1]],
                     +    ProportionsExposed = proportionsSolventAcc["Exposed"][[1]]
+                         )
                        # set all rownames to immunogen name

man/addImmunogenList.Rd

History View file @ d0ce529

                     new file mode 100644
@@ -0,0 +1,32 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/addImmunogenList.R
                     +\name{addImmunogenList}
                     +\alias{addImmunogenList}
                     +\title{Add multiple immunogens to the Protein DataFrame}
                     +\usage{
                     +addImmunogenList(proteinDF, immunogenDF)
                     +}
                     +\arguments{
                     +\item{proteinDF}{Protein DataFrame created by call to getProteinFeatures()}
+                    +
                     +\item{immunogenDF}{DataFrame where each row represents an immunogen.
                     +Must contain columns: `start` (integer) and `end` (integer) or `seq` (string), and `name` (string).}
                     +}
                     +\value{
                     +Updated Protein DataFrame with all immunogens added as new columns
                     +}
                     +\description{
                     +Calls `addImmunogen()` for each row in `immunogenDF` to add multiple immunogens
                     +to the given `proteinDF`.
                     +}
                     +\examples{
                     +proteinDF <- getProteinFeatures("P55087")
                     +immunogenDF <- data.frame(
                     +  start = c(10, 40, NA),
                     +  end = c(30, 60, NA),
                     +  seq = c(NA, NA, "RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"),
                     +  name = c("A12", "B34", "HPA"),
                     +  stringsAsFactors = FALSE
                     +)
                     +proteinDF <- addImmunogenList(proteinDF, immunogenDF)
                     +}

man/evaluateImmunogen.Rd

History View file @ d0ce529

@@ -16,8 +16,8 @@ Summary DataFrame providing statistics on immunogen
+                     }
                      \description{
                      By calling `evaluateImmunogen()`, the immunogens associated with a Protein DataFrame can be evaluated regarding
                     -their suitability fir antibody binding in natively folded proteins. By calling the function without specifying
                     -an immunogen, all immunogens of the current protein will be evaluated. The summary DataFrame contains one row per
                     +their suitability for antibody binding in natively folded proteins. By calling the function without specifying
                     +an immunogen, all immunogens of the current protein dataframe will be evaluated. The summary DataFrame contains one row per
                      evaluated immunogen.
+                     }
                      \examples{

tests/testthat/test-addImmunogenList.R

History View file @ d0ce529

                     new file mode 100644
@@ -0,0 +1,29 @@
                     +library(immunogenViewer)
                     +exampleDF <- immunogenViewer::getProteinFeatures("P55087")
                     +immunogenDF <- data.frame(
                     +  start = c(10, 40, NA),
                     +  end = c(30, 60, NA),
                     +  seq = c(NA, NA, "RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"),
                     +  name = c("A12", "B34", "HPA"),
                     +  stringsAsFactors = FALSE
                     +  )
+                    +
+                    +
                     +test_that("Dataframe is returned if successul", {
                     +  expect_s3_class(addImmunogenList(exampleDF, immunogenDF), "data.frame")
                     +})
+                    +
                     +test_that("Missing column raises errors", {
                     +  expect_error(addImmunogenList(exampleDF, data.frame(seq=c("RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"))),
                     +    "The immunogen dataframe must contain a 'name' column.")
                     +})
+                    +
                     +test_that("Missing column raises errors", {
                     +  expect_error(addImmunogenList(exampleDF, data.frame(name=c("A12"))),
                     +     "The immunogen dataframe must contain either a 'seq' column or both 'start' and 'end' columns.")
                     +})
+                    +
                     +test_that("Wrong immunogen dataframe raises errors", {
                     +  expect_error(addImmunogenList(exampleDF, data.frame(start = c(NA), end = c(NA), seq = c(NA), name = c("A12"))),
                     +      "Invalid values. Please check your immunogen dataframe.")
                     +})

vignettes/immunogenViewer_vignette.Rmd

History View file @ d0ce529

@@ -50,6 +50,29 @@ library(immunogenViewer)
                      To retrieve the features for the protein of interest the correct UniProt ID (also known as accession number) is required. If the UniProt ID is not known yet, one can search the [UniProtKB](https://www.uniprot.org/) using the gene or protein name. Be sure to select the UniProt ID of the correct organism and preferable search within reviewed SwissProt entries instead of unreviewed TrEMBL entries. Our example protein is the human protein TREM2 (UniProt ID: [Q9NZC2](https://www.uniprot.org/uniprotkb/Q9NZC2/entry)). Using `getProteinFeatures()` relevant features from UniProt and PredictProtein are retrieved. Interaction with UniProt is done using the Bioconductor package [UniProt.ws](https://bioconductor.org/packages/release/bioc/html/UniProt.ws.html). To see how the dataframe is structured, we will look at the returned dataframe.
                     +The following protein features are included:
+                    +
                     +-   Secondary Structure: Each residue is assigned as helix, sheet or other conformation based on predictions by RePROF. Source:
                     +    [PredictProtein](https://www.predictprotein.org/)
+                    +
                     +-   Solvent Accessibility: Each residue is assigned as exposed or buried based on predictions by RePROF. Source:
                     +    [PredictProtein](https://www.predictprotein.org/)
+                    +
                     +-   Membrane: Residues that are annotated as [transmembrane](https://www.uniprot.org/help/transmem) or
                     +    [intramembrane](https://www.uniprot.org/help/intramem). Source: [UniProtKB](https://www.uniprot.org/)
+                    +
                     +-   Protein Binding: Each residue that is predicted to bind proteins based on predictions by ProNA. Source:
                     +    [PredictProtein](https://www.predictprotein.org/)
+                    +
                     +-   Disorder: Each residue that is predicted to be disordered based on predictions by Meta-Disorder. Source: [PredictProtein](https://www.predictprotein.org/)
+                    +
                     +-   PTM: Residues that are annotated as [modified residues](https://www.uniprot.org/help/mod_res),
                     +    [lipidation](https://www.uniprot.org/help/lipid) or [glycosylation](https://www.uniprot.org/help/carbohyd). Source:
                     +    [UniProtKB](https://www.uniprot.org/)
+                    +
                     +-   Disulfide Bridges: Residues that are annotated as [disulfide bonds](https://www.uniprot.org/help/disulfid). Source: [UniProtKB](https://www.uniprot.org/)
+                    +
                     +To see how the dataframe is structured, we will look at the returned dataframe.
                      ```{r get-features}
                      protein <- getProteinFeatures("Q9NZC2")
@@ -71,7 +94,6 @@ protein <- addImmunogen(protein, seq = "HGQKPGTHPPSELD", name = "EB07921")
                      colnames(protein)
                      ```
+                    -
                      ### Renaming an immunogen
                      Already added immunogens can be renamed using `renameImmunogen()` if the provided start and end position are correct but the name should be updated. This way a typo can be corrected or a more informative name added instead of re-adding the immunogen. The column name in the protein dataframe is then updated.
@@ -82,7 +104,6 @@ protein <- renameImmunogen(protein, oldName = "ABIN2783734_", newName = "ABIN278
                      colnames(protein)
                      ```
+                    -
                      ### Removing an immunogen
                      A previously added immunogen can be removed from the protein dataframe using `removeImmunogen()`. The corresponding column is dropped from the protein dataframe.
@@ -103,7 +124,10 @@ plotProtein(protein)
                      ## Visualizing a specific immunogen
                     -If interested in one specific immunogen, one can visualize the relevant part of the protein sequence. In this plot the amino acid sequence of the immunogen is shown along the x axis while the same features as in the protein plot are included.
                     +If interested in one specific immunogen, one can visualize the relevant
                     +part of the protein sequence. In this plot the amino acid sequence of
                     +the immunogen is shown along the x axis while the same features as in
                     +the protein plot are included.
                      ```{r visualize_immunogen, fig.fullwidth=TRUE, fig.width=10, fig.height=10, out.width = "100%"}
                      plotImmunogen(protein, "ABIN2783734")
@@ -111,7 +135,30 @@ plotImmunogen(protein, "ABIN2783734")
                      ## Evaluating the immunogens
                     -Apart from visualizing specific immunogens, it is also possible to summarize the protein features within a specific immunogen. This can either be done for an immunogen of interest or for all immunogens added to a protein dataframe at once. The output is a summary dataframe that can be sorted by the feature columns. By sorting the most suitable immunogen, e.g., with the highest fraction of exposed residues, can be selected.
                     +Apart from visualizing specific immunogens, it is also possible to summarize the protein features within a specific immunogen. This can either be done for one immunogen of interest or for all immunogens added to a protein dataframe at once. The output is a summary dataframe that can be sorted by the feature columns. By sorting the most suitable immunogen, e.g., with the highest fraction of exposed residues, can be selected.
+                    +
                     +The following summary statistics are included:
+                    +
                     +-   SumPTM: Number of PTM residues within the immunogen.
+                    +
                     +-   SumDisulfideBridges: Number of disulfide bond residues within the immunogen.
+                    +
                     +-   ProportionMembrane: Proportion of immunogen residues that are annotated as membrane residues.
+                    +
                     +-   ProportionDisorder: Proportion of immunogen residues that are predicted as disordered.
+                    +
                     +-   ProportionBinding: Proportion of immunogen residues that are predicted to bind proteins.
+                    +
                     +-   ProportionsHelix: Proportion of immunogen residues that are predicted to form alpha helices.
+                    +
                     +-   ProportionsSheet: Proportion of immunogen residues that are predicted to form beta sheets.
+                    +
                     +-   ProportionsCoil: Proportion of immunogen residues that are predicted to form coils.
+                    +
                     +-   ProportionsBuried: Proportion of immunogen residues that are predicted to be buried.
+                    +
                     +-   ProportionsExposed: Proportion of immunogen residues that are predicted to be solvent exposed.
+                    +
                      ```{r evaluate}
                      immunogens <- evaluateImmunogen(protein)
@@ -121,17 +168,17 @@ DT::datatable(immunogens, width = "80%", options = list(scrollX = TRUE))
                      # Important Notes
                     -* The length of an immunogen has to be between 10 and 50 amino acids.
                     -* The secondary structure "Other" usually stand for coil structures.
                     -* For the call to `getProteinFeatures()` the taxonomy ID for the protein's species has to be set. The default is human (ID: 9606). If the protein of interest is from a different species, the correct taxonomy ID must be set as a parameter.
+                    -
                     -# References {.unnumbered}
                     +-   The length of an immunogen has to be between 10 and 50 amino acids.
                     +-   The secondary structure "Other" usually stand for coil structures.
                     +-   For the call to `getProteinFeatures()` the taxonomy ID for the
                     +    protein's species has to be set. The default is human (ID: 9606). If
                     +    the protein of interest is from a different species, the correct
                     +    taxonomy ID must be set as a parameter.
                     +# References {.unnumbered}
                      # Session info
                      ```{r sessioninfo}
                      sessionInfo()
                      ```
+                    -
+                    -