... | ... |
@@ -15,7 +15,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
15 | 15 |
chr = factor(reads$rname), |
16 | 16 |
pos = reads$pos, |
17 | 17 |
width = Biostrings::width(reads$seq), |
18 |
- center = as.integer(.data$pos + .data$width/2), |
|
18 |
+ center = as.integer(.data$pos + .data$width / 2), |
|
19 | 19 |
cg_count = purrr::map_int(as.character(reads$seq), count_cg_cpp), |
20 | 20 |
crossings = purrr::map_int(reads$tag$ML, count_crossings), |
21 | 21 |
entropy = .data$crossings / .data$cg_count |
... | ... |
@@ -1,4 +1,5 @@ |
1 | 1 |
#' @importFrom Biostrings width |
2 |
+# nocov start |
|
2 | 3 |
get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
3 | 4 |
# helper functions ---- |
4 | 5 |
count_crossings <- function(numbers) { |
... | ... |
@@ -48,3 +49,4 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
48 | 49 |
|
49 | 50 |
bind_rows(df_list) |
50 | 51 |
} |
52 |
+# nocov end |
... | ... |
@@ -1,3 +1,4 @@ |
1 |
+#' @importFrom Biostrings width |
|
1 | 2 |
get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
2 | 3 |
# helper functions ---- |
3 | 4 |
count_crossings <- function(numbers) { |
... | ... |
@@ -13,10 +14,10 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
13 | 14 |
chr = factor(reads$rname), |
14 | 15 |
pos = reads$pos, |
15 | 16 |
width = Biostrings::width(reads$seq), |
16 |
- center = as.integer(pos + width/2), |
|
17 |
+ center = as.integer(.data$pos + .data$width/2), |
|
17 | 18 |
cg_count = purrr::map_int(as.character(reads$seq), count_cg_cpp), |
18 | 19 |
crossings = purrr::map_int(reads$tag$ML, count_crossings), |
19 |
- entropy = crossings / cg_count |
|
20 |
+ entropy = .data$crossings / .data$cg_count |
|
20 | 21 |
) |
21 | 22 |
} |
22 | 23 |
|
... | ... |
@@ -5,10 +5,6 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
5 | 5 |
return(crossings) |
6 | 6 |
} |
7 | 7 |
|
8 |
- read_chunk <- function(bam_file) { |
|
9 |
- Rsamtools::scanBam(bam_file, param = modbam_param())[[1]] |
|
10 |
- } |
|
11 |
- |
|
12 | 8 |
parse_chunk <- function(reads, sample) { |
13 | 9 |
reads <- reads[!is.null(reads$seq)] |
14 | 10 |
tibble::tibble( |
... | ... |
@@ -42,7 +38,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
42 | 38 |
df_list <- list() |
43 | 39 |
open(bam_file) |
44 | 40 |
while (Rsamtools::isIncomplete(bam_file)) { |
45 |
- reads <- read_chunk(bam_file) |
|
41 |
+ reads <- read_bam(bam_file)[[1]] |
|
46 | 42 |
df_list[[i]] <- parse_chunk(reads, sample) |
47 | 43 |
i <- i + 1 |
48 | 44 |
cli::cli_progress_update(length(reads[[1]])) |
... | ... |
@@ -45,7 +45,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
45 | 45 |
reads <- read_chunk(bam_file) |
46 | 46 |
df_list[[i]] <- parse_chunk(reads, sample) |
47 | 47 |
i <- i + 1 |
48 |
- cli::cli_progress_update(length(reads[[1]][[1]])) |
|
48 |
+ cli::cli_progress_update(length(reads[[1]])) |
|
49 | 49 |
} |
50 | 50 |
close(bam_file) |
51 | 51 |
|
... | ... |
@@ -18,7 +18,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
18 | 18 |
pos = reads$pos, |
19 | 19 |
width = Biostrings::width(reads$seq), |
20 | 20 |
center = as.integer(pos + width/2), |
21 |
- cg_count = purrr::map_int(as.character(reads$seq), count_cg), |
|
21 |
+ cg_count = purrr::map_int(as.character(reads$seq), count_cg_cpp), |
|
22 | 22 |
crossings = purrr::map_int(reads$tag$ML, count_crossings), |
23 | 23 |
entropy = crossings / cg_count |
24 | 24 |
) |
... | ... |
@@ -18,7 +18,7 @@ get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
18 | 18 |
pos = reads$pos, |
19 | 19 |
width = Biostrings::width(reads$seq), |
20 | 20 |
center = as.integer(pos + width/2), |
21 |
- cg_count = map_int(as.character(reads$seq), count_cg), |
|
21 |
+ cg_count = purrr::map_int(as.character(reads$seq), count_cg), |
|
22 | 22 |
crossings = purrr::map_int(reads$tag$ML, count_crossings), |
23 | 23 |
entropy = crossings / cg_count |
24 | 24 |
) |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,53 @@ |
1 |
+get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) { |
|
2 |
+ # helper functions ---- |
|
3 |
+ count_crossings <- function(numbers) { |
|
4 |
+ crossings <- sum(((numbers[-1] - 127.5) * (numbers[-length(numbers)] - 127.5)) < 0) |
|
5 |
+ return(crossings) |
|
6 |
+ } |
|
7 |
+ |
|
8 |
+ read_chunk <- function(bam_file) { |
|
9 |
+ Rsamtools::scanBam(bam_file, param = modbam_param())[[1]] |
|
10 |
+ } |
|
11 |
+ |
|
12 |
+ parse_chunk <- function(reads, sample) { |
|
13 |
+ reads <- reads[!is.null(reads$seq)] |
|
14 |
+ tibble::tibble( |
|
15 |
+ sample = factor(sample), |
|
16 |
+ read_name = reads$qname, |
|
17 |
+ chr = factor(reads$rname), |
|
18 |
+ pos = reads$pos, |
|
19 |
+ width = Biostrings::width(reads$seq), |
|
20 |
+ center = as.integer(pos + width/2), |
|
21 |
+ cg_count = map_int(as.character(reads$seq), count_cg), |
|
22 |
+ crossings = purrr::map_int(reads$tag$ML, count_crossings), |
|
23 |
+ entropy = crossings / cg_count |
|
24 |
+ ) |
|
25 |
+ } |
|
26 |
+ |
|
27 |
+ # function body ---- |
|
28 |
+ fname <- fs::path_file(bam_path) |
|
29 |
+ cli::cli_progress_bar( |
|
30 |
+ glue::glue("Parsing file: {fname}"), |
|
31 |
+ total = get_bam_total_reads(bam_path), |
|
32 |
+ format_done = paste0( |
|
33 |
+ "{.alert-success Data parsed: ", fname, " {.timestamp {cli::pb_elapsed}}}"), |
|
34 |
+ format_failed = paste0( |
|
35 |
+ "{.alert-danger Data parsing failed: ", fname, " {.timestamp {cli::pb_elapsed}}}"), |
|
36 |
+ clear = FALSE |
|
37 |
+ ) |
|
38 |
+ |
|
39 |
+ bam_file <- Rsamtools::BamFile(bam_path, yieldSize = 15000) |
|
40 |
+ |
|
41 |
+ i <- 1 |
|
42 |
+ df_list <- list() |
|
43 |
+ open(bam_file) |
|
44 |
+ while (Rsamtools::isIncomplete(bam_file)) { |
|
45 |
+ reads <- read_chunk(bam_file) |
|
46 |
+ df_list[[i]] <- parse_chunk(reads, sample) |
|
47 |
+ i <- i + 1 |
|
48 |
+ cli::cli_progress_update(length(reads[[1]][[1]])) |
|
49 |
+ } |
|
50 |
+ close(bam_file) |
|
51 |
+ |
|
52 |
+ bind_rows(df_list) |
|
53 |
+} |