Browse code

get_transcript_seq c++

Oliver Voogd authored on 29/06/2023 08:54:34
Showing 1 changed files
... ...
@@ -105,7 +105,7 @@ smooth_cigar (const std::vector<CigarPair> &cigar, int threshold)
105 105
 std::string printCigarPairs(const std::vector<CigarPair> &cigar) {
106 106
     std::stringstream ss;
107 107
     ss << cigar[0].len << "-" << cigar[0].op;
108
-    for (int i =1; i < cigar.size(); i++) {
108
+    for (size_t i =1; i < cigar.size(); i++) {
109 109
         ss << "," << cigar[i].len << "-" << cigar[i].op;
110 110
     }
111 111
     return ss.str();
Browse code

added utility files, removed duplicates

Oliver Voogd authored on 29/06/2023 08:17:24
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,112 @@
1
+#include "cigars.h"
2
+
3
+#include <string>
4
+#include <vector>
5
+#include <sstream>
6
+
7
+#include "bam.h"
8
+
9
+/*  take a bam entry,
10
+    populate a vector with all of its CIGAR operations
11
+    this is to mimic the output of bamnostic's default cigar from bamfile.fetch
12
+*/
13
+std::vector<CigarPair>
14
+generate_cigar_pairs(const bam1_t *b)
15
+{
16
+    std::vector<CigarPair>
17
+    cigar_pairs;
18
+
19
+    // iterate over the cigar
20
+    const uint32_t *cigar = bam1_cigar(b);
21
+    for (int k = 0; k < (int)bam1_cigar_len(b); k++) {
22
+        cigar_pairs.push_back((CigarPair){
23
+            (int)bam_cigar_op(cigar[k]),
24
+            (int)bam_cigar_oplen(cigar[k])
25
+        });
26
+    }
27
+    return cigar_pairs;
28
+}
29
+
30
+/*
31
+cigar specification:
32
+(lifted from the original sc_longread.py)
33
+
34
+M   BAM_CMATCH  0
35
+I   BAM_CINS    1
36
+D   BAM_CDEL    2
37
+N   BAM_CREF_SKIP   3
38
+S   BAM_CSOFT_CLIP  4
39
+H   BAM_CHARD_CLIP  5
40
+P   BAM_CPAD    6
41
+=   BAM_CEQUAL  7
42
+X   BAM_CDIFF   8
43
+B   BAM_CBACK   9
44
+
45
+*/
46
+
47
+std::string
48
+generate_cigar (const std::vector<CigarPair> &cigar)
49
+{
50
+  /*
51
+    takes a cigar as a vector of pairs of ints,
52
+    converts it to CIGAR string format
53
+  */
54
+
55
+  const char CIGAR_CODE[] = "MIDNSHP=XB";
56
+  std::string result = "";
57
+
58
+  for (const auto & pair : cigar) {
59
+    result.append(std::to_string(pair.len));
60
+    result.push_back(CIGAR_CODE[pair.op]);
61
+  }
62
+  return result;
63
+}
64
+
65
+/*
66
+takes a cigar as a vector of int pairs,
67
+smooths it down,
68
+returns the smooth cigar as a vector of int pairs
69
+*/
70
+std::vector<CigarPair>
71
+smooth_cigar (const std::vector<CigarPair> &cigar, int threshold)
72
+{
73
+    std::vector<CigarPair> new_cigar = {cigar[0]};
74
+
75
+    for (int i = 1; i < (int)cigar.size(); i++) {
76
+        if (new_cigar.back().op != 0) {
77
+            new_cigar.push_back(cigar[i]);
78
+            continue;
79
+        } 
80
+        
81
+        switch(cigar[i].op) {
82
+            case 0:
83
+                new_cigar.back().len += cigar[i].len;
84
+                break;
85
+            case 1:
86
+                if (cigar[i].len > threshold) {
87
+                    new_cigar.push_back(cigar[i]);
88
+                } 
89
+                break;
90
+            case 2:
91
+                if (cigar[i].len <= threshold) {
92
+                    new_cigar.back().len += cigar[i].len;
93
+                } else {
94
+                    new_cigar.push_back(cigar[i]);
95
+                }
96
+                break;
97
+            default:
98
+                new_cigar.push_back(cigar[i]);
99
+        }
100
+    }
101
+
102
+    return new_cigar;
103
+}
104
+
105
+std::string printCigarPairs(const std::vector<CigarPair> &cigar) {
106
+    std::stringstream ss;
107
+    ss << cigar[0].len << "-" << cigar[0].op;
108
+    for (int i =1; i < cigar.size(); i++) {
109
+        ss << "," << cigar[i].len << "-" << cigar[i].op;
110
+    }
111
+    return ss.str();
112
+}
0 113
\ No newline at end of file