src/utility/cigars.cpp
6882b281
 #include "cigars.h"
 
 #include <string>
 #include <vector>
 #include <sstream>
 
 #include "bam.h"
 
 /*  take a bam entry,
     populate a vector with all of its CIGAR operations
     this is to mimic the output of bamnostic's default cigar from bamfile.fetch
 */
 std::vector<CigarPair>
 generate_cigar_pairs(const bam1_t *b)
 {
     std::vector<CigarPair>
     cigar_pairs;
 
     // iterate over the cigar
     const uint32_t *cigar = bam1_cigar(b);
     for (int k = 0; k < (int)bam1_cigar_len(b); k++) {
         cigar_pairs.push_back((CigarPair){
             (int)bam_cigar_op(cigar[k]),
             (int)bam_cigar_oplen(cigar[k])
         });
     }
     return cigar_pairs;
 }
 
 /*
 cigar specification:
 (lifted from the original sc_longread.py)
 
 M   BAM_CMATCH  0
 I   BAM_CINS    1
 D   BAM_CDEL    2
 N   BAM_CREF_SKIP   3
 S   BAM_CSOFT_CLIP  4
 H   BAM_CHARD_CLIP  5
 P   BAM_CPAD    6
 =   BAM_CEQUAL  7
 X   BAM_CDIFF   8
 B   BAM_CBACK   9
 
 */
 
 std::string
 generate_cigar (const std::vector<CigarPair> &cigar)
 {
   /*
     takes a cigar as a vector of pairs of ints,
     converts it to CIGAR string format
   */
 
   const char CIGAR_CODE[] = "MIDNSHP=XB";
   std::string result = "";
 
   for (const auto & pair : cigar) {
     result.append(std::to_string(pair.len));
     result.push_back(CIGAR_CODE[pair.op]);
   }
   return result;
 }
 
 /*
 takes a cigar as a vector of int pairs,
 smooths it down,
 returns the smooth cigar as a vector of int pairs
 */
 std::vector<CigarPair>
 smooth_cigar (const std::vector<CigarPair> &cigar, int threshold)
 {
     std::vector<CigarPair> new_cigar = {cigar[0]};
 
     for (int i = 1; i < (int)cigar.size(); i++) {
         if (new_cigar.back().op != 0) {
             new_cigar.push_back(cigar[i]);
             continue;
         } 
         
         switch(cigar[i].op) {
             case 0:
                 new_cigar.back().len += cigar[i].len;
                 break;
             case 1:
                 if (cigar[i].len > threshold) {
                     new_cigar.push_back(cigar[i]);
                 } 
                 break;
             case 2:
                 if (cigar[i].len <= threshold) {
                     new_cigar.back().len += cigar[i].len;
                 } else {
                     new_cigar.push_back(cigar[i]);
                 }
                 break;
             default:
                 new_cigar.push_back(cigar[i]);
         }
     }
 
     return new_cigar;
 }
 
 std::string printCigarPairs(const std::vector<CigarPair> &cigar) {
     std::stringstream ss;
     ss << cigar[0].len << "-" << cigar[0].op;
eb33725e
     for (size_t i =1; i < cigar.size(); i++) {
6882b281
         ss << "," << cigar[i].len << "-" << cigar[i].op;
     }
     return ss.str();
 }