1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,44 @@ |
1 |
+ |
|
2 |
+ |
|
3 |
+# Must be 20nt long |
|
4 |
+#spacers <- c("GGTGCTGATGCTGTGTGATG", |
|
5 |
+# "GGTGCTGATAAAGTGTGATG") |
|
6 |
+.getModelScore <- function(spacers){ |
|
7 |
+ features <- .extractFeatures(spacers) |
|
8 |
+ score <- model_offset + model_weights%*%t(features) |
|
9 |
+ return(score) |
|
10 |
+} |
|
11 |
+ |
|
12 |
+model_weights <- c(0.14177385, |
|
13 |
+ 0.06966514, |
|
14 |
+ 0.04216254, |
|
15 |
+ 0.03303432, |
|
16 |
+ 0.02355430, |
|
17 |
+ -0.04746424, |
|
18 |
+ -0.04878001, |
|
19 |
+ -0.06981921, |
|
20 |
+ -0.07087756, |
|
21 |
+ -0.08160700) |
|
22 |
+model_offset <- 0.6505037 |
|
23 |
+ |
|
24 |
+ |
|
25 |
+#' @importFrom Biostrings DNAStringSet letterFrequency |
|
26 |
+.extractFeatures <- function(spacers){ |
|
27 |
+ gc <- rowSums(letterFrequency(DNAStringSet(substr(spacers,4,14)), |
|
28 |
+ c("G", "C")))/10 |
|
29 |
+ spacers <- DNAStringSet(spacers) |
|
30 |
+ mat <- as.matrix(DNAStringSet(spacers)) |
|
31 |
+ features <- list() |
|
32 |
+ features[[1]] <- gc |
|
33 |
+ features[[2]] <- mat[,20,drop=FALSE]=="G" |
|
34 |
+ features[[3]] <- mat[,3,drop=FALSE]=="T" | mat[3]=="A" |
|
35 |
+ features[[4]] <- mat[,12,drop=FALSE]=="G"| mat[12]=="A" |
|
36 |
+ features[[5]] <- mat[,6,drop=FALSE]=="G" |
|
37 |
+ features[[6]] <- mat[,4,drop=FALSE]=="T" | mat[4]=="A" |
|
38 |
+ features[[7]] <- mat[,18,drop=FALSE]=="G" | mat[18]=="A" |
|
39 |
+ features[[8]] <- mat[,5,drop=FALSE]=="C" | mat[5]=="A" |
|
40 |
+ features[[9]] <- mat[,14,drop=FALSE]=="G" |
|
41 |
+ features[[10]] <- mat[,15,drop=FALSE]=="A" |
|
42 |
+ features <- do.call(cbind,features) |
|
43 |
+ return(features) |
|
44 |
+} |