Browse code

Started integrating crisprRater

fortinj2 authored on 22/06/2022 21:34:48
Showing 1 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,44 @@
1
+
2
+
3
+# Must be 20nt long
4
+#spacers <- c("GGTGCTGATGCTGTGTGATG",
5
+#             "GGTGCTGATAAAGTGTGATG")
6
+.getModelScore <- function(spacers){
7
+    features <- .extractFeatures(spacers)
8
+    score <- model_offset + model_weights%*%t(features)
9
+    return(score)
10
+}
11
+
12
+model_weights <- c(0.14177385,
13
+                   0.06966514,
14
+                   0.04216254,
15
+                   0.03303432,
16
+                   0.02355430,
17
+                   -0.04746424,
18
+                   -0.04878001,
19
+                   -0.06981921,
20
+                   -0.07087756,
21
+                   -0.08160700)
22
+model_offset <- 0.6505037
23
+
24
+
25
+#' @importFrom Biostrings DNAStringSet letterFrequency
26
+.extractFeatures <- function(spacers){
27
+    gc <- rowSums(letterFrequency(DNAStringSet(substr(spacers,4,14)),
28
+                                  c("G", "C")))/10
29
+    spacers <- DNAStringSet(spacers)
30
+    mat <- as.matrix(DNAStringSet(spacers))
31
+    features <- list()
32
+    features[[1]] <- gc
33
+    features[[2]] <- mat[,20,drop=FALSE]=="G"
34
+    features[[3]] <- mat[,3,drop=FALSE]=="T" | mat[3]=="A"
35
+    features[[4]] <- mat[,12,drop=FALSE]=="G"| mat[12]=="A"
36
+    features[[5]] <- mat[,6,drop=FALSE]=="G"
37
+    features[[6]] <- mat[,4,drop=FALSE]=="T" | mat[4]=="A"
38
+    features[[7]] <- mat[,18,drop=FALSE]=="G" | mat[18]=="A"
39
+    features[[8]] <- mat[,5,drop=FALSE]=="C" | mat[5]=="A" 
40
+    features[[9]] <- mat[,14,drop=FALSE]=="G"
41
+    features[[10]] <- mat[,15,drop=FALSE]=="A"
42
+    features <- do.call(cbind,features)
43
+    return(features)
44
+}