... | ... |
@@ -105,7 +105,7 @@ smooth_cigar (const std::vector<CigarPair> &cigar, int threshold) |
105 | 105 |
std::string printCigarPairs(const std::vector<CigarPair> &cigar) { |
106 | 106 |
std::stringstream ss; |
107 | 107 |
ss << cigar[0].len << "-" << cigar[0].op; |
108 |
- for (int i =1; i < cigar.size(); i++) { |
|
108 |
+ for (size_t i =1; i < cigar.size(); i++) { |
|
109 | 109 |
ss << "," << cigar[i].len << "-" << cigar[i].op; |
110 | 110 |
} |
111 | 111 |
return ss.str(); |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,112 @@ |
1 |
+#include "cigars.h" |
|
2 |
+ |
|
3 |
+#include <string> |
|
4 |
+#include <vector> |
|
5 |
+#include <sstream> |
|
6 |
+ |
|
7 |
+#include "bam.h" |
|
8 |
+ |
|
9 |
+/* take a bam entry, |
|
10 |
+ populate a vector with all of its CIGAR operations |
|
11 |
+ this is to mimic the output of bamnostic's default cigar from bamfile.fetch |
|
12 |
+*/ |
|
13 |
+std::vector<CigarPair> |
|
14 |
+generate_cigar_pairs(const bam1_t *b) |
|
15 |
+{ |
|
16 |
+ std::vector<CigarPair> |
|
17 |
+ cigar_pairs; |
|
18 |
+ |
|
19 |
+ // iterate over the cigar |
|
20 |
+ const uint32_t *cigar = bam1_cigar(b); |
|
21 |
+ for (int k = 0; k < (int)bam1_cigar_len(b); k++) { |
|
22 |
+ cigar_pairs.push_back((CigarPair){ |
|
23 |
+ (int)bam_cigar_op(cigar[k]), |
|
24 |
+ (int)bam_cigar_oplen(cigar[k]) |
|
25 |
+ }); |
|
26 |
+ } |
|
27 |
+ return cigar_pairs; |
|
28 |
+} |
|
29 |
+ |
|
30 |
+/* |
|
31 |
+cigar specification: |
|
32 |
+(lifted from the original sc_longread.py) |
|
33 |
+ |
|
34 |
+M BAM_CMATCH 0 |
|
35 |
+I BAM_CINS 1 |
|
36 |
+D BAM_CDEL 2 |
|
37 |
+N BAM_CREF_SKIP 3 |
|
38 |
+S BAM_CSOFT_CLIP 4 |
|
39 |
+H BAM_CHARD_CLIP 5 |
|
40 |
+P BAM_CPAD 6 |
|
41 |
+= BAM_CEQUAL 7 |
|
42 |
+X BAM_CDIFF 8 |
|
43 |
+B BAM_CBACK 9 |
|
44 |
+ |
|
45 |
+*/ |
|
46 |
+ |
|
47 |
+std::string |
|
48 |
+generate_cigar (const std::vector<CigarPair> &cigar) |
|
49 |
+{ |
|
50 |
+ /* |
|
51 |
+ takes a cigar as a vector of pairs of ints, |
|
52 |
+ converts it to CIGAR string format |
|
53 |
+ */ |
|
54 |
+ |
|
55 |
+ const char CIGAR_CODE[] = "MIDNSHP=XB"; |
|
56 |
+ std::string result = ""; |
|
57 |
+ |
|
58 |
+ for (const auto & pair : cigar) { |
|
59 |
+ result.append(std::to_string(pair.len)); |
|
60 |
+ result.push_back(CIGAR_CODE[pair.op]); |
|
61 |
+ } |
|
62 |
+ return result; |
|
63 |
+} |
|
64 |
+ |
|
65 |
+/* |
|
66 |
+takes a cigar as a vector of int pairs, |
|
67 |
+smooths it down, |
|
68 |
+returns the smooth cigar as a vector of int pairs |
|
69 |
+*/ |
|
70 |
+std::vector<CigarPair> |
|
71 |
+smooth_cigar (const std::vector<CigarPair> &cigar, int threshold) |
|
72 |
+{ |
|
73 |
+ std::vector<CigarPair> new_cigar = {cigar[0]}; |
|
74 |
+ |
|
75 |
+ for (int i = 1; i < (int)cigar.size(); i++) { |
|
76 |
+ if (new_cigar.back().op != 0) { |
|
77 |
+ new_cigar.push_back(cigar[i]); |
|
78 |
+ continue; |
|
79 |
+ } |
|
80 |
+ |
|
81 |
+ switch(cigar[i].op) { |
|
82 |
+ case 0: |
|
83 |
+ new_cigar.back().len += cigar[i].len; |
|
84 |
+ break; |
|
85 |
+ case 1: |
|
86 |
+ if (cigar[i].len > threshold) { |
|
87 |
+ new_cigar.push_back(cigar[i]); |
|
88 |
+ } |
|
89 |
+ break; |
|
90 |
+ case 2: |
|
91 |
+ if (cigar[i].len <= threshold) { |
|
92 |
+ new_cigar.back().len += cigar[i].len; |
|
93 |
+ } else { |
|
94 |
+ new_cigar.push_back(cigar[i]); |
|
95 |
+ } |
|
96 |
+ break; |
|
97 |
+ default: |
|
98 |
+ new_cigar.push_back(cigar[i]); |
|
99 |
+ } |
|
100 |
+ } |
|
101 |
+ |
|
102 |
+ return new_cigar; |
|
103 |
+} |
|
104 |
+ |
|
105 |
+std::string printCigarPairs(const std::vector<CigarPair> &cigar) { |
|
106 |
+ std::stringstream ss; |
|
107 |
+ ss << cigar[0].len << "-" << cigar[0].op; |
|
108 |
+ for (int i =1; i < cigar.size(); i++) { |
|
109 |
+ ss << "," << cigar[i].len << "-" << cigar[i].op; |
|
110 |
+ } |
|
111 |
+ return ss.str(); |
|
112 |
+} |
|
0 | 113 |
\ No newline at end of file |