... | ... |
@@ -43,6 +43,9 @@ addImmunogen <- function(proteinDF, start=NULL, end=NULL, seq=NULL, name) { |
43 | 43 |
# calculate immunogen end position |
44 | 44 |
end <- start + nchar(seq) - 1 |
45 | 45 |
|
46 |
+ # check if immunogen length is within range |
|
47 |
+ checkImmunogenRange(start, end, proteinLength) |
|
48 |
+ |
|
46 | 49 |
# if start and end position are provided, check validity of range |
47 | 50 |
} else if (!is.null(start) & !is.null(end)) { |
48 | 51 |
checkImmunogenRange(start, end, proteinLength) |
49 | 52 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,50 @@ |
1 |
+#' Add multiple immunogens to the Protein DataFrame |
|
2 |
+#' |
|
3 |
+#' @param proteinDF Protein DataFrame created by call to getProteinFeatures() |
|
4 |
+#' @param immunogenDF DataFrame where each row represents an immunogen. |
|
5 |
+#' Must contain columns: `start` (integer) and `end` (integer) or `seq` (string), and `name` (string). |
|
6 |
+#' |
|
7 |
+#' @description |
|
8 |
+#' Calls `addImmunogen()` for each row in `immunogenDF` to add multiple immunogens |
|
9 |
+#' to the given `proteinDF`. |
|
10 |
+#' |
|
11 |
+#' @return Updated Protein DataFrame with all immunogens added as new columns |
|
12 |
+#' @export |
|
13 |
+#' |
|
14 |
+#' @examples |
|
15 |
+#' proteinDF <- getProteinFeatures("P55087") |
|
16 |
+#' immunogenDF <- data.frame( |
|
17 |
+#' start = c(10, 40, NA), |
|
18 |
+#' end = c(30, 60, NA), |
|
19 |
+#' seq = c(NA, NA, "RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"), |
|
20 |
+#' name = c("A12", "B34", "HPA"), |
|
21 |
+#' stringsAsFactors = FALSE |
|
22 |
+#' ) |
|
23 |
+#' proteinDF <- addImmunogenList(proteinDF, immunogenDF) |
|
24 |
+addImmunogenList <- function(proteinDF, immunogenDF) { |
|
25 |
+ |
|
26 |
+ # Check if required columns exist |
|
27 |
+ if (!"name" %in% colnames(immunogenDF)) { |
|
28 |
+ stop("The immunogen dataframe must contain a 'name' column.") |
|
29 |
+ } |
|
30 |
+ if (!("seq" %in% colnames(immunogenDF)) && !all(c("start", "end") %in% colnames(immunogenDF))) { |
|
31 |
+ stop("The immunogen dataframe must contain either a 'seq' column or both 'start' and 'end' columns.") |
|
32 |
+ } |
|
33 |
+ |
|
34 |
+ # Iterate through each row and add immunogen |
|
35 |
+ for (i in seq_len(nrow(immunogenDF))) { |
|
36 |
+ |
|
37 |
+ row <- immunogenDF[i, ] |
|
38 |
+ |
|
39 |
+ # Call to addImmunogen based on available data |
|
40 |
+ if (!is.na(row$start) && !is.na(row$end)) { |
|
41 |
+ proteinDF <- addImmunogen(proteinDF, start = row$start, end = row$end, name = row$name) |
|
42 |
+ } else if (!is.na(row$seq)) { |
|
43 |
+ proteinDF <- addImmunogen(proteinDF, seq = as.character(row$seq), name = row$name) |
|
44 |
+ } else { |
|
45 |
+ stop("Invalid values. Please check your immunogen dataframe.") |
|
46 |
+ } |
|
47 |
+ } |
|
48 |
+ |
|
49 |
+ return(proteinDF) |
|
50 |
+} |
... | ... |
@@ -8,8 +8,8 @@ |
8 | 8 |
#' |
9 | 9 |
#' @description |
10 | 10 |
#' By calling `evaluateImmunogen()`, the immunogens associated with a Protein DataFrame can be evaluated regarding |
11 |
-#' their suitability fir antibody binding in natively folded proteins. By calling the function without specifying |
|
12 |
-#' an immunogen, all immunogens of the current protein will be evaluated. The summary DataFrame contains one row per |
|
11 |
+#' their suitability for antibody binding in natively folded proteins. By calling the function without specifying |
|
12 |
+#' an immunogen, all immunogens of the current protein dataframe will be evaluated. The summary DataFrame contains one row per |
|
13 | 13 |
#' evaluated immunogen. |
14 | 14 |
#' |
15 | 15 |
#' @examples |
... | ... |
@@ -21,8 +21,8 @@ |
21 | 21 |
evaluateImmunogen <- function(proteinDF, immunogen=NULL) { |
22 | 22 |
|
23 | 23 |
# feature column names of protein dataframe |
24 |
- features <- c("Uniprot", "Position", "Residue", "PTM", "disulfideBridge", "Membrane", |
|
25 |
- "Binding", "Disorder", "secondaryStructure", "solventAccessibility") |
|
24 |
+ features <- c("Uniprot", "Position", "Residue", "PTM", "DisulfideBridge", "Membrane", |
|
25 |
+ "ProteinBinding", "Disorder", "SecondaryStructure", "SolventAccessibility") |
|
26 | 26 |
|
27 | 27 |
fullDF <- data.frame() |
28 | 28 |
|
... | ... |
@@ -71,8 +71,8 @@ evaluateImmunogen <- function(proteinDF, immunogen=NULL) { |
71 | 71 |
checkIfImmunogenExists <- function(colnamesDF, name) { |
72 | 72 |
|
73 | 73 |
# feature column names of protein dataframe |
74 |
- features <- c("Uniprot", "Position", "Residue", "PTM", "disulfideBridge", "Membrane", |
|
75 |
- "Binding", "Disorder", "secondaryStructure", "solventAccessibility") |
|
74 |
+ features <- c("Uniprot", "Position", "Residue", "PTM", "DisulfideBridge", "Membrane", |
|
75 |
+ "ProteinBinding", "Disorder", "SecondaryStructure", "SolventAccessibility") |
|
76 | 76 |
|
77 | 77 |
# raise error if immunogen names not present in dataframe or same as feature column name |
78 | 78 |
if (!(name %in% colnamesDF)) { |
... | ... |
@@ -136,28 +136,28 @@ createSummaryDataFrame <- function(immunogenDF, immunogen) { |
136 | 136 |
|
137 | 137 |
proportionMembrane <- calculateRegionProportions(immunogenDF, "Membrane") |
138 | 138 |
proportionDisorder <- calculateRegionProportions(immunogenDF, "Disorder") |
139 |
- proportionBinding <- calculateRegionProportions(immunogenDF, "Binding") |
|
139 |
+ proportionBinding <- calculateRegionProportions(immunogenDF, "ProteinBinding") |
|
140 | 140 |
|
141 | 141 |
# add secondary structure proportions |
142 |
- proportionsSecondaryStr <- prop.table(table(immunogenDF[["secondaryStructure"]])) |
|
142 |
+ proportionsSecondaryStr <- prop.table(table(immunogenDF[["SecondaryStructure"]])) |
|
143 | 143 |
proportionsSecondaryStr <- addMissingClasses(proportionsSecondaryStr, c("Helix", "Sheet", "Other")) |
144 | 144 |
|
145 | 145 |
# add buried/exposed proportions |
146 |
- proportionsSolventAcc <- prop.table(table(immunogenDF[["solventAccessibility"]])) |
|
146 |
+ proportionsSolventAcc <- prop.table(table(immunogenDF[["SolventAccessibility"]])) |
|
147 | 147 |
proportionsSolventAcc <- addMissingClasses(proportionsSolventAcc, c("Buried", "Exposed")) |
148 | 148 |
|
149 | 149 |
# create a summary dataframe |
150 | 150 |
summaryDF <- data.frame( |
151 |
- Sum_PTM = sumPTM, |
|
152 |
- Sum_DisulfideBridges = sumBridge, |
|
153 |
- Proportion_Membrane = proportionMembrane, |
|
154 |
- Proportion_Disorder = proportionDisorder, |
|
155 |
- Proportion_Binding = proportionBinding, |
|
156 |
- Proportions_Helix = proportionsSecondaryStr["Helix"][[1]], |
|
157 |
- Proportions_Sheet = proportionsSecondaryStr["Sheet"][[1]], |
|
158 |
- Proportions_Coil = proportionsSecondaryStr["Other"][[1]], |
|
159 |
- Proportions_SolventAccessibility_Buried = proportionsSolventAcc["Buried"][[1]], |
|
160 |
- Proportions_SolventAccessibility_Exposed = proportionsSolventAcc["Exposed"][[1]] |
|
151 |
+ SumPTM = sumPTM, |
|
152 |
+ SumDisulfideBridges = sumBridge, |
|
153 |
+ ProportionMembrane = proportionMembrane, |
|
154 |
+ ProportionDisorder = proportionDisorder, |
|
155 |
+ ProportionBinding = proportionBinding, |
|
156 |
+ ProportionsHelix = proportionsSecondaryStr["Helix"][[1]], |
|
157 |
+ ProportionsSheet = proportionsSecondaryStr["Sheet"][[1]], |
|
158 |
+ ProportionsCoil = proportionsSecondaryStr["Other"][[1]], |
|
159 |
+ ProportionsBuried = proportionsSolventAcc["Buried"][[1]], |
|
160 |
+ ProportionsExposed = proportionsSolventAcc["Exposed"][[1]] |
|
161 | 161 |
) |
162 | 162 |
|
163 | 163 |
# set all rownames to immunogen name |
164 | 164 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,32 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/addImmunogenList.R |
|
3 |
+\name{addImmunogenList} |
|
4 |
+\alias{addImmunogenList} |
|
5 |
+\title{Add multiple immunogens to the Protein DataFrame} |
|
6 |
+\usage{ |
|
7 |
+addImmunogenList(proteinDF, immunogenDF) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{proteinDF}{Protein DataFrame created by call to getProteinFeatures()} |
|
11 |
+ |
|
12 |
+\item{immunogenDF}{DataFrame where each row represents an immunogen. |
|
13 |
+Must contain columns: `start` (integer) and `end` (integer) or `seq` (string), and `name` (string).} |
|
14 |
+} |
|
15 |
+\value{ |
|
16 |
+Updated Protein DataFrame with all immunogens added as new columns |
|
17 |
+} |
|
18 |
+\description{ |
|
19 |
+Calls `addImmunogen()` for each row in `immunogenDF` to add multiple immunogens |
|
20 |
+to the given `proteinDF`. |
|
21 |
+} |
|
22 |
+\examples{ |
|
23 |
+proteinDF <- getProteinFeatures("P55087") |
|
24 |
+immunogenDF <- data.frame( |
|
25 |
+ start = c(10, 40, NA), |
|
26 |
+ end = c(30, 60, NA), |
|
27 |
+ seq = c(NA, NA, "RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"), |
|
28 |
+ name = c("A12", "B34", "HPA"), |
|
29 |
+ stringsAsFactors = FALSE |
|
30 |
+) |
|
31 |
+proteinDF <- addImmunogenList(proteinDF, immunogenDF) |
|
32 |
+} |
... | ... |
@@ -16,8 +16,8 @@ Summary DataFrame providing statistics on immunogen |
16 | 16 |
} |
17 | 17 |
\description{ |
18 | 18 |
By calling `evaluateImmunogen()`, the immunogens associated with a Protein DataFrame can be evaluated regarding |
19 |
-their suitability fir antibody binding in natively folded proteins. By calling the function without specifying |
|
20 |
-an immunogen, all immunogens of the current protein will be evaluated. The summary DataFrame contains one row per |
|
19 |
+their suitability for antibody binding in natively folded proteins. By calling the function without specifying |
|
20 |
+an immunogen, all immunogens of the current protein dataframe will be evaluated. The summary DataFrame contains one row per |
|
21 | 21 |
evaluated immunogen. |
22 | 22 |
} |
23 | 23 |
\examples{ |
24 | 24 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,29 @@ |
1 |
+library(immunogenViewer) |
|
2 |
+exampleDF <- immunogenViewer::getProteinFeatures("P55087") |
|
3 |
+immunogenDF <- data.frame( |
|
4 |
+ start = c(10, 40, NA), |
|
5 |
+ end = c(30, 60, NA), |
|
6 |
+ seq = c(NA, NA, "RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"), |
|
7 |
+ name = c("A12", "B34", "HPA"), |
|
8 |
+ stringsAsFactors = FALSE |
|
9 |
+ ) |
|
10 |
+ |
|
11 |
+ |
|
12 |
+test_that("Dataframe is returned if successul", { |
|
13 |
+ expect_s3_class(addImmunogenList(exampleDF, immunogenDF), "data.frame") |
|
14 |
+}) |
|
15 |
+ |
|
16 |
+test_that("Missing column raises errors", { |
|
17 |
+ expect_error(addImmunogenList(exampleDF, data.frame(seq=c("RFKEAFSKAAQQTKGSYMEVEDNRSQVETDD"))), |
|
18 |
+ "The immunogen dataframe must contain a 'name' column.") |
|
19 |
+}) |
|
20 |
+ |
|
21 |
+test_that("Missing column raises errors", { |
|
22 |
+ expect_error(addImmunogenList(exampleDF, data.frame(name=c("A12"))), |
|
23 |
+ "The immunogen dataframe must contain either a 'seq' column or both 'start' and 'end' columns.") |
|
24 |
+}) |
|
25 |
+ |
|
26 |
+test_that("Wrong immunogen dataframe raises errors", { |
|
27 |
+ expect_error(addImmunogenList(exampleDF, data.frame(start = c(NA), end = c(NA), seq = c(NA), name = c("A12"))), |
|
28 |
+ "Invalid values. Please check your immunogen dataframe.") |
|
29 |
+}) |
... | ... |
@@ -50,6 +50,29 @@ library(immunogenViewer) |
50 | 50 |
|
51 | 51 |
To retrieve the features for the protein of interest the correct UniProt ID (also known as accession number) is required. If the UniProt ID is not known yet, one can search the [UniProtKB](https://www.uniprot.org/) using the gene or protein name. Be sure to select the UniProt ID of the correct organism and preferable search within reviewed SwissProt entries instead of unreviewed TrEMBL entries. Our example protein is the human protein TREM2 (UniProt ID: [Q9NZC2](https://www.uniprot.org/uniprotkb/Q9NZC2/entry)). Using `getProteinFeatures()` relevant features from UniProt and PredictProtein are retrieved. Interaction with UniProt is done using the Bioconductor package [UniProt.ws](https://bioconductor.org/packages/release/bioc/html/UniProt.ws.html). To see how the dataframe is structured, we will look at the returned dataframe. |
52 | 52 |
|
53 |
+The following protein features are included: |
|
54 |
+ |
|
55 |
+- Secondary Structure: Each residue is assigned as helix, sheet or other conformation based on predictions by RePROF. Source: |
|
56 |
+ [PredictProtein](https://www.predictprotein.org/) |
|
57 |
+ |
|
58 |
+- Solvent Accessibility: Each residue is assigned as exposed or buried based on predictions by RePROF. Source: |
|
59 |
+ [PredictProtein](https://www.predictprotein.org/) |
|
60 |
+ |
|
61 |
+- Membrane: Residues that are annotated as [transmembrane](https://www.uniprot.org/help/transmem) or |
|
62 |
+ [intramembrane](https://www.uniprot.org/help/intramem). Source: [UniProtKB](https://www.uniprot.org/) |
|
63 |
+ |
|
64 |
+- Protein Binding: Each residue that is predicted to bind proteins based on predictions by ProNA. Source: |
|
65 |
+ [PredictProtein](https://www.predictprotein.org/) |
|
66 |
+ |
|
67 |
+- Disorder: Each residue that is predicted to be disordered based on predictions by Meta-Disorder. Source: [PredictProtein](https://www.predictprotein.org/) |
|
68 |
+ |
|
69 |
+- PTM: Residues that are annotated as [modified residues](https://www.uniprot.org/help/mod_res), |
|
70 |
+ [lipidation](https://www.uniprot.org/help/lipid) or [glycosylation](https://www.uniprot.org/help/carbohyd). Source: |
|
71 |
+ [UniProtKB](https://www.uniprot.org/) |
|
72 |
+ |
|
73 |
+- Disulfide Bridges: Residues that are annotated as [disulfide bonds](https://www.uniprot.org/help/disulfid). Source: [UniProtKB](https://www.uniprot.org/) |
|
74 |
+ |
|
75 |
+To see how the dataframe is structured, we will look at the returned dataframe. |
|
53 | 76 |
|
54 | 77 |
```{r get-features} |
55 | 78 |
protein <- getProteinFeatures("Q9NZC2") |
... | ... |
@@ -71,7 +94,6 @@ protein <- addImmunogen(protein, seq = "HGQKPGTHPPSELD", name = "EB07921") |
71 | 94 |
colnames(protein) |
72 | 95 |
``` |
73 | 96 |
|
74 |
- |
|
75 | 97 |
### Renaming an immunogen |
76 | 98 |
|
77 | 99 |
Already added immunogens can be renamed using `renameImmunogen()` if the provided start and end position are correct but the name should be updated. This way a typo can be corrected or a more informative name added instead of re-adding the immunogen. The column name in the protein dataframe is then updated. |
... | ... |
@@ -82,7 +104,6 @@ protein <- renameImmunogen(protein, oldName = "ABIN2783734_", newName = "ABIN278 |
82 | 104 |
colnames(protein) |
83 | 105 |
``` |
84 | 106 |
|
85 |
- |
|
86 | 107 |
### Removing an immunogen |
87 | 108 |
|
88 | 109 |
A previously added immunogen can be removed from the protein dataframe using `removeImmunogen()`. The corresponding column is dropped from the protein dataframe. |
... | ... |
@@ -103,7 +124,10 @@ plotProtein(protein) |
103 | 124 |
|
104 | 125 |
## Visualizing a specific immunogen |
105 | 126 |
|
106 |
-If interested in one specific immunogen, one can visualize the relevant part of the protein sequence. In this plot the amino acid sequence of the immunogen is shown along the x axis while the same features as in the protein plot are included. |
|
127 |
+If interested in one specific immunogen, one can visualize the relevant |
|
128 |
+part of the protein sequence. In this plot the amino acid sequence of |
|
129 |
+the immunogen is shown along the x axis while the same features as in |
|
130 |
+the protein plot are included. |
|
107 | 131 |
|
108 | 132 |
```{r visualize_immunogen, fig.fullwidth=TRUE, fig.width=10, fig.height=10, out.width = "100%"} |
109 | 133 |
plotImmunogen(protein, "ABIN2783734") |
... | ... |
@@ -111,7 +135,30 @@ plotImmunogen(protein, "ABIN2783734") |
111 | 135 |
|
112 | 136 |
## Evaluating the immunogens |
113 | 137 |
|
114 |
-Apart from visualizing specific immunogens, it is also possible to summarize the protein features within a specific immunogen. This can either be done for an immunogen of interest or for all immunogens added to a protein dataframe at once. The output is a summary dataframe that can be sorted by the feature columns. By sorting the most suitable immunogen, e.g., with the highest fraction of exposed residues, can be selected. |
|
138 |
+Apart from visualizing specific immunogens, it is also possible to summarize the protein features within a specific immunogen. This can either be done for one immunogen of interest or for all immunogens added to a protein dataframe at once. The output is a summary dataframe that can be sorted by the feature columns. By sorting the most suitable immunogen, e.g., with the highest fraction of exposed residues, can be selected. |
|
139 |
+ |
|
140 |
+The following summary statistics are included: |
|
141 |
+ |
|
142 |
+- SumPTM: Number of PTM residues within the immunogen. |
|
143 |
+ |
|
144 |
+- SumDisulfideBridges: Number of disulfide bond residues within the immunogen. |
|
145 |
+ |
|
146 |
+- ProportionMembrane: Proportion of immunogen residues that are annotated as membrane residues. |
|
147 |
+ |
|
148 |
+- ProportionDisorder: Proportion of immunogen residues that are predicted as disordered. |
|
149 |
+ |
|
150 |
+- ProportionBinding: Proportion of immunogen residues that are predicted to bind proteins. |
|
151 |
+ |
|
152 |
+- ProportionsHelix: Proportion of immunogen residues that are predicted to form alpha helices. |
|
153 |
+ |
|
154 |
+- ProportionsSheet: Proportion of immunogen residues that are predicted to form beta sheets. |
|
155 |
+ |
|
156 |
+- ProportionsCoil: Proportion of immunogen residues that are predicted to form coils. |
|
157 |
+ |
|
158 |
+- ProportionsBuried: Proportion of immunogen residues that are predicted to be buried. |
|
159 |
+ |
|
160 |
+- ProportionsExposed: Proportion of immunogen residues that are predicted to be solvent exposed. |
|
161 |
+ |
|
115 | 162 |
|
116 | 163 |
```{r evaluate} |
117 | 164 |
immunogens <- evaluateImmunogen(protein) |
... | ... |
@@ -121,17 +168,17 @@ DT::datatable(immunogens, width = "80%", options = list(scrollX = TRUE)) |
121 | 168 |
|
122 | 169 |
# Important Notes |
123 | 170 |
|
124 |
-* The length of an immunogen has to be between 10 and 50 amino acids. |
|
125 |
-* The secondary structure "Other" usually stand for coil structures. |
|
126 |
-* For the call to `getProteinFeatures()` the taxonomy ID for the protein's species has to be set. The default is human (ID: 9606). If the protein of interest is from a different species, the correct taxonomy ID must be set as a parameter. |
|
127 |
- |
|
128 |
-# References {.unnumbered} |
|
171 |
+- The length of an immunogen has to be between 10 and 50 amino acids. |
|
172 |
+- The secondary structure "Other" usually stand for coil structures. |
|
173 |
+- For the call to `getProteinFeatures()` the taxonomy ID for the |
|
174 |
+ protein's species has to be set. The default is human (ID: 9606). If |
|
175 |
+ the protein of interest is from a different species, the correct |
|
176 |
+ taxonomy ID must be set as a parameter. |
|
129 | 177 |
|
178 |
+# References {.unnumbered} |
|
130 | 179 |
|
131 | 180 |
# Session info |
132 | 181 |
|
133 | 182 |
```{r sessioninfo} |
134 | 183 |
sessionInfo() |
135 | 184 |
``` |
136 |
- |
|
137 |
- |