... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: crisprScore |
2 |
-Version: 1.1.1 |
|
3 |
-Date: 2022-04-06 |
|
2 |
+Version: 1.1.2 |
|
3 |
+Date: 2022-06-22 |
|
4 | 4 |
Title: On-Target and Off-Target Scoring Algorithms for CRISPR gRNAs |
5 | 5 |
Authors@R: c( |
6 | 6 |
person("Jean-Philippe", "Fortin", email = "[email protected]", role = c("aut", "cre", "cph")), |
... | ... |
@@ -37,7 +37,6 @@ importFrom(reticulate,import_from_path) |
37 | 37 |
importFrom(reticulate,np_array) |
38 | 38 |
importFrom(reticulate,py_suppress_warnings) |
39 | 39 |
importFrom(reticulate,r_to_py) |
40 |
-importFrom(reticulate,source_python) |
|
41 | 40 |
importFrom(stats,complete.cases) |
42 | 41 |
importFrom(stats,predict) |
43 | 42 |
importFrom(stats,quantile) |
... | ... |
@@ -51,41 +51,45 @@ getAzimuthScores <- function(sequences, fork=FALSE){ |
51 | 51 |
stop("Positions 26 and 27 of the sequences must be G", |
52 | 52 |
" nucleotides (canonical PAM sequences required).") |
53 | 53 |
} |
54 |
- results <- basiliskRun(env=env_azimuth, |
|
55 |
- shared=FALSE, |
|
56 |
- fork=fork, |
|
57 |
- fun=.azimuth_python, |
|
58 |
- sequences=sequences) |
|
59 |
- return(results) |
|
60 |
-} |
|
61 |
- |
|
62 |
- |
|
63 |
-#' @importFrom reticulate source_python |
|
64 |
-#' @importFrom reticulate np_array |
|
65 |
-#' @importFrom reticulate import_from_path |
|
66 |
-.azimuth_python <- function(sequences){ |
|
67 |
- |
|
68 |
- dir <- system.file("python", |
|
69 |
- "azimuth", |
|
70 |
- package="crisprScore", |
|
71 |
- mustWork=TRUE) |
|
72 |
- azimuth <- import_from_path("getAzimuth", dir) |
|
73 | 54 |
|
74 | 55 |
df <- data.frame(sequence=sequences, |
75 | 56 |
score=NA_real_, |
76 | 57 |
stringsAsFactors=FALSE) |
77 | 58 |
good <- !grepl("N", sequences) |
78 | 59 |
sequences.valid <- sequences[good] |
79 |
- ns <- length(sequences.valid) |
|
80 |
- if (ns>0){ |
|
81 |
- if (ns==1){ |
|
82 |
- sequences.valid <- rep(sequences.valid,2) |
|
83 |
- scores <- azimuth$getAzimuth(np_array(sequences.valid)) |
|
60 |
+ |
|
61 |
+ #Saving to disk: |
|
62 |
+ dir <- tempdir() |
|
63 |
+ inputfile <- file.path(dir, "input.txt") |
|
64 |
+ outputfile <- file.path(dir, "output.txt") |
|
65 |
+ |
|
66 |
+ # Ready to get the scores |
|
67 |
+ env <- basilisk:::.obtainEnvironmentPath(env_azimuth) |
|
68 |
+ basilisk.utils::activateEnvironment(env) |
|
69 |
+ programFile <- system.file("python", |
|
70 |
+ "azimuth/getAzimuth.py", |
|
71 |
+ package="crisprScore", |
|
72 |
+ mustWork=TRUE) |
|
73 |
+ cmd <- paste0("python ", |
|
74 |
+ programFile, " ", |
|
75 |
+ inputfile, " ", |
|
76 |
+ outputfile) |
|
77 |
+ |
|
78 |
+ if (sum(good)>0){ |
|
79 |
+ if (sum(good)==1){ |
|
80 |
+ sequences.valid <- rep(sequences.valid, 2) |
|
81 |
+ } |
|
82 |
+ .dumpToFile(sequences.valid, inputfile) |
|
83 |
+ system(cmd) |
|
84 |
+ scores <- read.table(outputfile)[,1] |
|
85 |
+ if (sum(good)==1){ |
|
84 | 86 |
scores <- scores[1] |
85 |
- } else { |
|
86 |
- scores <- azimuth$getAzimuth(np_array(sequences.valid)) |
|
87 | 87 |
} |
88 | 88 |
df$score[good] <- scores |
89 | 89 |
} |
90 |
+ |
|
90 | 91 |
return(df) |
91 | 92 |
} |
93 |
+ |
|
94 |
+ |
|
95 |
+ |
... | ... |
@@ -59,35 +59,49 @@ getDeepCpf1Scores <- function(sequences, |
59 | 59 |
}, FUN.VALUE="character") |
60 | 60 |
} |
61 | 61 |
} |
62 |
- results <- basiliskRun(env=env_deepcpf1, |
|
63 |
- shared=FALSE, |
|
64 |
- fork=fork, |
|
65 |
- fun=.deepcpf1_python, |
|
66 |
- sequences=sequences) |
|
67 |
- return(results) |
|
68 |
-} |
|
69 |
- |
|
70 |
-#' @importFrom reticulate import_from_path |
|
71 |
-#' @importFrom reticulate np_array |
|
72 |
-#' @importFrom reticulate py_suppress_warnings |
|
73 |
-.deepcpf1_python <- function(sequences){ |
|
74 |
- |
|
75 |
- dir <- system.file("python", |
|
76 |
- "deepcpf1", |
|
77 |
- package="crisprScore", |
|
78 |
- mustWork=TRUE) |
|
79 |
- deepcpf1 <- import_from_path("getDeepCpf1", path=dir) |
|
80 |
- |
|
62 |
+ |
|
63 |
+ #Output data.frame |
|
81 | 64 |
df <- data.frame(sequence=sequences, |
82 | 65 |
score=NA_real_, |
83 | 66 |
stringsAsFactors=FALSE) |
84 | 67 |
good <- !grepl("N", sequences) |
85 | 68 |
sequences.valid <- sequences[good] |
86 |
- if (length(sequences.valid)>0){ |
|
87 |
- sequences_array <- np_array(sequences.valid) |
|
88 |
- scores <- py_suppress_warnings(deepcpf1$getDeepCpf1(sequences_array)) |
|
69 |
+ |
|
70 |
+ #Saving to disk: |
|
71 |
+ dir <- tempdir() |
|
72 |
+ inputfile <- file.path(dir, "input.txt") |
|
73 |
+ outputfile <- file.path(dir, "output.txt") |
|
74 |
+ |
|
75 |
+ # Ready to get the scores |
|
76 |
+ env <- basilisk:::.obtainEnvironmentPath(env_deepcpf1) |
|
77 |
+ basilisk.utils::activateEnvironment(env) |
|
78 |
+ programFile <- system.file("python", |
|
79 |
+ "deepcpf1/getDeepCpf1.py", |
|
80 |
+ package="crisprScore", |
|
81 |
+ mustWork=TRUE) |
|
82 |
+ cmd <- paste0("python ", |
|
83 |
+ programFile, " ", |
|
84 |
+ inputfile, " ", |
|
85 |
+ outputfile) |
|
86 |
+ if (sum(good)>0){ |
|
87 |
+ .dumpToFile(sequences.valid, inputfile) |
|
88 |
+ system(cmd) |
|
89 |
+ scores <- read.table(outputfile)[,1] |
|
89 | 90 |
scores <- scores/100 |
90 | 91 |
df$score[good] <- scores |
91 |
- } |
|
92 |
+ } |
|
93 |
+ |
|
92 | 94 |
return(df) |
93 | 95 |
} |
96 |
+ |
|
97 |
+.dumpToFile <- function(sequences, file){ |
|
98 |
+ write.table(sequences, |
|
99 |
+ file=file, |
|
100 |
+ quote=FALSE, |
|
101 |
+ col.names=FALSE, |
|
102 |
+ row.names=FALSE) |
|
103 |
+} |
|
104 |
+ |
|
105 |
+ |
|
106 |
+ |
|
107 |
+ |
... | ... |
@@ -1,7 +1,13 @@ |
1 |
+#sys.argv[1] should be the path of the file with input sequences |
|
2 |
+#sys.argv[2] should be the path of the file where to save sequence |
|
1 | 3 |
import sys |
2 | 4 |
import azimuth.model_comparison |
3 | 5 |
import numpy as np |
4 | 6 |
|
5 | 7 |
def getAzimuth(sequences): |
6 | 8 |
predictions = azimuth.model_comparison.predict(sequences, None, None) |
7 |
- return predictions |
|
8 | 9 |
\ No newline at end of file |
10 |
+ return predictions |
|
11 |
+ |
|
12 |
+sequences = np.loadtxt(sys.argv[1], dtype="U34", ndmin=1) |
|
13 |
+scores = getAzimuth(sequences) |
|
14 |
+np.savetxt(sys.argv[2], scores) |
... | ... |
@@ -1,3 +1,5 @@ |
1 |
+#sys.argv[1] should be the path of the file with input sequences |
|
2 |
+#sys.argv[2] should be the path of the file where to save sequence |
|
1 | 3 |
import os |
2 | 4 |
import numpy as np |
3 | 5 |
import warnings |
... | ... |
@@ -8,4 +10,8 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' |
8 | 10 |
|
9 | 11 |
def getDeepCpf1(sequences): |
10 | 12 |
results = deepcpf1(sequences) |
11 |
- return results |
|
12 | 13 |
\ No newline at end of file |
14 |
+ return results |
|
15 |
+ |
|
16 |
+sequences = np.loadtxt(sys.argv[1], dtype="U34", ndmin=1) |
|
17 |
+scores = getDeepCpf1(sequences) |
|
18 |
+np.savetxt(sys.argv[2], scores) |
|
13 | 19 |
\ No newline at end of file |
... | ... |
@@ -58,6 +58,7 @@ supported at the moment. |
58 | 58 |
\details{ |
59 | 59 |
\code{tss_df} details: |
60 | 60 |
This must be a \code{data.frame} that contains the following columns: |
61 |
+* tss_id: string specifying name of the TSS. |
|
61 | 62 |
* gene_symbol: string specifying sHGNC/HUGO gene identifier. |
62 | 63 |
* promoter: string specifying promoter ID (e.g. "P1" or "P2"). |
63 | 64 |
* transcripts: Ensembl transcript identifier. |