Browse code

Linting

shians authored on 29/02/2024 05:18:23
Showing 1 changed files
... ...
@@ -15,7 +15,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
15 15
             chr = factor(reads$rname),
16 16
             pos = reads$pos,
17 17
             width = Biostrings::width(reads$seq),
18
-            center = as.integer(.data$pos + .data$width/2),
18
+            center = as.integer(.data$pos + .data$width / 2),
19 19
             cg_count = purrr::map_int(as.character(reads$seq), count_cg_cpp),
20 20
             crossings = purrr::map_int(reads$tag$ML, count_crossings),
21 21
             entropy = .data$crossings / .data$cg_count
Browse code

Updated tests

shians authored on 13/10/2023 00:31:06
Showing 1 changed files
... ...
@@ -1,4 +1,5 @@
1 1
 #' @importFrom Biostrings width
2
+# nocov start
2 3
 get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
3 4
     # helper functions ----
4 5
     count_crossings <- function(numbers) {
... ...
@@ -48,3 +49,4 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
48 49
 
49 50
     bind_rows(df_list)
50 51
 }
52
+# nocov end
Browse code

Added dependencies

Shians authored on 27/06/2023 02:00:44
Showing 1 changed files
... ...
@@ -1,3 +1,4 @@
1
+#' @importFrom Biostrings width
1 2
 get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
2 3
     # helper functions ----
3 4
     count_crossings <- function(numbers) {
... ...
@@ -13,10 +14,10 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
13 14
             chr = factor(reads$rname),
14 15
             pos = reads$pos,
15 16
             width = Biostrings::width(reads$seq),
16
-            center = as.integer(pos + width/2),
17
+            center = as.integer(.data$pos + .data$width/2),
17 18
             cg_count = purrr::map_int(as.character(reads$seq), count_cg_cpp),
18 19
             crossings = purrr::map_int(reads$tag$ML, count_crossings),
19
-            entropy = crossings / cg_count
20
+            entropy = .data$crossings / .data$cg_count
20 21
         )
21 22
     }
22 23
 
Browse code

Refactored modbam reading

Shians authored on 22/06/2023 07:40:28
Showing 1 changed files
... ...
@@ -5,10 +5,6 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
5 5
         return(crossings)
6 6
     }
7 7
 
8
-    read_chunk <- function(bam_file) {
9
-        Rsamtools::scanBam(bam_file, param = modbam_param())[[1]]
10
-    }
11
-
12 8
     parse_chunk <- function(reads, sample) {
13 9
         reads <- reads[!is.null(reads$seq)]
14 10
         tibble::tibble(
... ...
@@ -42,7 +38,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
42 38
     df_list <- list()
43 39
     open(bam_file)
44 40
     while (Rsamtools::isIncomplete(bam_file)) {
45
-        reads <- read_chunk(bam_file)
41
+        reads <- read_bam(bam_file)[[1]]
46 42
         df_list[[i]] <- parse_chunk(reads, sample)
47 43
         i <- i + 1
48 44
         cli::cli_progress_update(length(reads[[1]]))
Browse code

Fixed stuck progress bar

Shians authored on 09/06/2023 06:53:24
Showing 1 changed files
... ...
@@ -45,7 +45,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
45 45
         reads <- read_chunk(bam_file)
46 46
         df_list[[i]] <- parse_chunk(reads, sample)
47 47
         i <- i + 1
48
-        cli::cli_progress_update(length(reads[[1]][[1]]))
48
+        cli::cli_progress_update(length(reads[[1]]))
49 49
     }
50 50
     close(bam_file)
51 51
 
Browse code

Fixed incorrect function name

Shians authored on 09/06/2023 06:31:11
Showing 1 changed files
... ...
@@ -18,7 +18,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
18 18
             pos = reads$pos,
19 19
             width = Biostrings::width(reads$seq),
20 20
             center = as.integer(pos + width/2),
21
-            cg_count = purrr::map_int(as.character(reads$seq), count_cg),
21
+            cg_count = purrr::map_int(as.character(reads$seq), count_cg_cpp),
22 22
             crossings = purrr::map_int(reads$tag$ML, count_crossings),
23 23
             entropy = crossings / cg_count
24 24
         )
Browse code

Added missing namespace

Shians authored on 09/06/2023 06:25:46
Showing 1 changed files
... ...
@@ -18,7 +18,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
18 18
             pos = reads$pos,
19 19
             width = Biostrings::width(reads$seq),
20 20
             center = as.integer(pos + width/2),
21
-            cg_count = map_int(as.character(reads$seq), count_cg),
21
+            cg_count = purrr::map_int(as.character(reads$seq), count_cg),
22 22
             crossings = purrr::map_int(reads$tag$ML, count_crossings),
23 23
             entropy = crossings / cg_count
24 24
         )
Browse code

Added get_read_entropy feature

Shians authored on 09/06/2023 03:02:40
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,53 @@
1
+get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
2
+    # helper functions ----
3
+    count_crossings <- function(numbers) {
4
+        crossings <- sum(((numbers[-1] - 127.5) * (numbers[-length(numbers)] - 127.5)) < 0)
5
+        return(crossings)
6
+    }
7
+
8
+    read_chunk <- function(bam_file) {
9
+        Rsamtools::scanBam(bam_file, param = modbam_param())[[1]]
10
+    }
11
+
12
+    parse_chunk <- function(reads, sample) {
13
+        reads <- reads[!is.null(reads$seq)]
14
+        tibble::tibble(
15
+            sample = factor(sample),
16
+            read_name = reads$qname,
17
+            chr = factor(reads$rname),
18
+            pos = reads$pos,
19
+            width = Biostrings::width(reads$seq),
20
+            center = as.integer(pos + width/2),
21
+            cg_count = map_int(as.character(reads$seq), count_cg),
22
+            crossings = purrr::map_int(reads$tag$ML, count_crossings),
23
+            entropy = crossings / cg_count
24
+        )
25
+    }
26
+
27
+    # function body ----
28
+    fname <- fs::path_file(bam_path)
29
+        cli::cli_progress_bar(
30
+            glue::glue("Parsing file: {fname}"),
31
+            total = get_bam_total_reads(bam_path),
32
+            format_done = paste0(
33
+                "{.alert-success Data parsed: ", fname, " {.timestamp {cli::pb_elapsed}}}"),
34
+            format_failed = paste0(
35
+                "{.alert-danger Data parsing failed: ", fname, " {.timestamp {cli::pb_elapsed}}}"),
36
+            clear = FALSE
37
+        )
38
+
39
+    bam_file <- Rsamtools::BamFile(bam_path, yieldSize = 15000)
40
+
41
+    i <- 1
42
+    df_list <- list()
43
+    open(bam_file)
44
+    while (Rsamtools::isIncomplete(bam_file)) {
45
+        reads <- read_chunk(bam_file)
46
+        df_list[[i]] <- parse_chunk(reads, sample)
47
+        i <- i + 1
48
+        cli::cli_progress_update(length(reads[[1]][[1]]))
49
+    }
50
+    close(bam_file)
51
+
52
+    bind_rows(df_list)
53
+}