Browse code

Py2 scoring algorithms fixed

fortinj2 authored on 22/06/2022 23:58:51
Showing 7 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: crisprScore
2
-Version: 1.1.1
3
-Date: 2022-04-06
2
+Version: 1.1.2
3
+Date: 2022-06-22
4 4
 Title: On-Target and Off-Target Scoring Algorithms for CRISPR gRNAs
5 5
 Authors@R: c(
6 6
     person("Jean-Philippe", "Fortin", email = "[email protected]", role = c("aut", "cre", "cph")),
... ...
@@ -37,7 +37,6 @@ importFrom(reticulate,import_from_path)
37 37
 importFrom(reticulate,np_array)
38 38
 importFrom(reticulate,py_suppress_warnings)
39 39
 importFrom(reticulate,r_to_py)
40
-importFrom(reticulate,source_python)
41 40
 importFrom(stats,complete.cases)
42 41
 importFrom(stats,predict)
43 42
 importFrom(stats,quantile)
... ...
@@ -51,41 +51,45 @@ getAzimuthScores <- function(sequences, fork=FALSE){
51 51
         stop("Positions 26 and 27 of the sequences must be G",
52 52
              " nucleotides (canonical PAM sequences required).")
53 53
     }
54
-    results <- basiliskRun(env=env_azimuth,
55
-                           shared=FALSE,
56
-                           fork=fork,
57
-                           fun=.azimuth_python, 
58
-                           sequences=sequences)
59
-    return(results)
60
-}
61
-
62
-
63
-#' @importFrom reticulate source_python
64
-#' @importFrom reticulate np_array
65
-#' @importFrom reticulate import_from_path
66
-.azimuth_python <- function(sequences){
67
-
68
-    dir <- system.file("python",
69
-                       "azimuth",
70
-                       package="crisprScore",
71
-                       mustWork=TRUE)
72
-    azimuth <- import_from_path("getAzimuth", dir)
73 54
 
74 55
     df <- data.frame(sequence=sequences,
75 56
                      score=NA_real_,
76 57
                      stringsAsFactors=FALSE)
77 58
     good <- !grepl("N", sequences)
78 59
     sequences.valid <- sequences[good]
79
-    ns <- length(sequences.valid)
80
-    if (ns>0){
81
-        if (ns==1){
82
-            sequences.valid <- rep(sequences.valid,2)
83
-            scores <- azimuth$getAzimuth(np_array(sequences.valid))
60
+
61
+    #Saving to disk:
62
+    dir <- tempdir()
63
+    inputfile  <- file.path(dir, "input.txt")
64
+    outputfile <- file.path(dir, "output.txt")
65
+
66
+      # Ready to get the scores
67
+    env <- basilisk:::.obtainEnvironmentPath(env_azimuth)
68
+    basilisk.utils::activateEnvironment(env)
69
+    programFile <- system.file("python",
70
+                               "azimuth/getAzimuth.py",
71
+                               package="crisprScore",
72
+                               mustWork=TRUE)
73
+    cmd <- paste0("python ",
74
+                  programFile, " ",
75
+                  inputfile, " ",
76
+                  outputfile)
77
+
78
+    if (sum(good)>0){
79
+        if (sum(good)==1){
80
+            sequences.valid <- rep(sequences.valid, 2)
81
+        }
82
+        .dumpToFile(sequences.valid, inputfile)
83
+        system(cmd)
84
+        scores <- read.table(outputfile)[,1]
85
+        if (sum(good)==1){
84 86
             scores <- scores[1]
85
-        } else {
86
-            scores <- azimuth$getAzimuth(np_array(sequences.valid)) 
87 87
         }
88 88
         df$score[good] <- scores
89 89
     }
90
+
90 91
     return(df)
91 92
 }
93
+
94
+
95
+
... ...
@@ -59,35 +59,49 @@ getDeepCpf1Scores <- function(sequences,
59 59
             }, FUN.VALUE="character")
60 60
         }
61 61
     }
62
-    results <- basiliskRun(env=env_deepcpf1,
63
-                           shared=FALSE,
64
-                           fork=fork,
65
-                           fun=.deepcpf1_python, 
66
-                           sequences=sequences)
67
-    return(results)
68
-}
69
-
70
-#' @importFrom reticulate import_from_path
71
-#' @importFrom reticulate np_array
72
-#' @importFrom reticulate py_suppress_warnings
73
-.deepcpf1_python <- function(sequences){
74
-
75
-    dir <- system.file("python",
76
-                       "deepcpf1",
77
-                       package="crisprScore",
78
-                       mustWork=TRUE)
79
-    deepcpf1 <- import_from_path("getDeepCpf1", path=dir)
80
-    
62
+  
63
+    #Output data.frame
81 64
     df <- data.frame(sequence=sequences,
82 65
                      score=NA_real_,
83 66
                      stringsAsFactors=FALSE)
84 67
     good <- !grepl("N", sequences)
85 68
     sequences.valid <- sequences[good]
86
-    if (length(sequences.valid)>0){
87
-        sequences_array <- np_array(sequences.valid)
88
-        scores <- py_suppress_warnings(deepcpf1$getDeepCpf1(sequences_array))
69
+
70
+    #Saving to disk:
71
+    dir <- tempdir()
72
+    inputfile  <- file.path(dir, "input.txt")
73
+    outputfile <- file.path(dir, "output.txt")
74
+   
75
+    # Ready to get the scores
76
+    env <- basilisk:::.obtainEnvironmentPath(env_deepcpf1)
77
+    basilisk.utils::activateEnvironment(env)
78
+    programFile <- system.file("python",
79
+                               "deepcpf1/getDeepCpf1.py",
80
+                               package="crisprScore",
81
+                               mustWork=TRUE)
82
+    cmd <- paste0("python ",
83
+                  programFile, " ",
84
+                  inputfile, " ",
85
+                  outputfile)
86
+    if (sum(good)>0){
87
+        .dumpToFile(sequences.valid, inputfile)
88
+        system(cmd)
89
+        scores <- read.table(outputfile)[,1]
89 90
         scores <- scores/100
90 91
         df$score[good] <- scores
91
-    } 
92
+    }
93
+
92 94
     return(df)
93 95
 }
96
+
97
+.dumpToFile <- function(sequences, file){
98
+    write.table(sequences,
99
+              file=file,
100
+              quote=FALSE,
101
+              col.names=FALSE,
102
+              row.names=FALSE)
103
+}
104
+
105
+
106
+
107
+
... ...
@@ -1,7 +1,13 @@
1
+#sys.argv[1] should be the path of the file with input sequences
2
+#sys.argv[2] should be the path of the file where to save sequence
1 3
 import sys
2 4
 import azimuth.model_comparison
3 5
 import numpy as np 
4 6
 
5 7
 def getAzimuth(sequences):
6 8
 	predictions = azimuth.model_comparison.predict(sequences, None, None)
7
-	return predictions
8 9
\ No newline at end of file
10
+	return predictions
11
+
12
+sequences = np.loadtxt(sys.argv[1], dtype="U34", ndmin=1)
13
+scores = getAzimuth(sequences)
14
+np.savetxt(sys.argv[2], scores)
... ...
@@ -1,3 +1,5 @@
1
+#sys.argv[1] should be the path of the file with input sequences
2
+#sys.argv[2] should be the path of the file where to save sequence
1 3
 import os
2 4
 import numpy as np
3 5
 import warnings
... ...
@@ -8,4 +10,8 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
8 10
 
9 11
 def getDeepCpf1(sequences):	
10 12
 	results = deepcpf1(sequences)
11
-	return results
12 13
\ No newline at end of file
14
+	return results
15
+
16
+sequences = np.loadtxt(sys.argv[1], dtype="U34", ndmin=1)
17
+scores = getDeepCpf1(sequences)
18
+np.savetxt(sys.argv[2], scores)
13 19
\ No newline at end of file
... ...
@@ -58,6 +58,7 @@ supported at the moment.
58 58
 \details{
59 59
 \code{tss_df} details:
60 60
 This must be a \code{data.frame} that contains the following columns:
61
+* tss_id: string specifying name of the TSS.
61 62
 * gene_symbol: string specifying sHGNC/HUGO gene identifier.
62 63
 * promoter: string specifying promoter ID (e.g. "P1" or "P2").
63 64
 * transcripts: Ensembl transcript identifier.