Alignment bam_to_alignment(const bam1_t *b, map<string, string>& rg_sample) { Alignment alignment; // get the sequence and qual int32_t lqseq = b->core.l_qseq; string sequence; sequence.resize(lqseq); uint8_t* qualptr = bam_get_qual(b); string quality;//(lqseq, 0); quality.assign((char*)qualptr, lqseq); // process the sequence into chars uint8_t* seqptr = bam_get_seq(b); for (int i = 0; i < lqseq; ++i) { sequence[i] = "=ACMGRSVTWYHKDBN"[bam_seqi(seqptr, i)]; } // get the read group and sample name uint8_t *rgptr = bam_aux_get(b, "RG"); char* rg = (char*) (rgptr+1); //if (!rg_sample string sname; if (!rg_sample.empty()) { sname = rg_sample[string(rg)]; } // Now name the read after the scaffold string read_name = bam_get_qname(b); // Decide if we are a first read (/1) or second (last) read (/2) if(b->core.flag & BAM_FREAD1) { read_name += "/1"; } if(b->core.flag & BAM_FREAD2) { read_name += "/2"; } // If we are marked as both first and last we get /1/2, and if we are marked // as neither the scaffold name comes through unchanged as the read name. // TODO: produce correct names for intermediate reads on >2 read scaffolds. // add features to the alignment alignment.set_name(read_name); alignment.set_sequence(sequence); alignment.set_quality(quality); // TODO: htslib doesn't wrap this flag for some reason. alignment.set_is_secondary(b->core.flag & BAM_FSECONDARY); if (sname.size()) { alignment.set_sample_name(sname); alignment.set_read_group(rg); } return alignment; }
bool get_next_alignment_from_fastq(gzFile fp, char* buffer, size_t len, Alignment& alignment) { alignment.Clear(); // handle name if (0!=gzgets(fp,buffer,len)) { buffer[strlen(buffer)-1] = '\0'; string name = buffer; name = name.substr(1); // trim off leading @ // keep trailing /1 /2 alignment.set_name(name); } else { return false; } // handle sequence if (0!=gzgets(fp,buffer,len)) { buffer[strlen(buffer)-1] = '\0'; alignment.set_sequence(buffer); } else { cerr << "[vg::alignment.cpp] error: incomplete fastq record" << endl; exit(1); } // handle "+" sep if (0!=gzgets(fp,buffer,len)) { } else { cerr << "[vg::alignment.cpp] error: incomplete fastq record" << endl; exit(1); } // handle quality if (0!=gzgets(fp,buffer,len)) { buffer[strlen(buffer)-1] = '\0'; string quality = string_quality_char_to_short(buffer); //cerr << string_quality_short_to_char(quality) << endl; alignment.set_quality(quality); } else { cerr << "[vg::alignment.cpp] error: incomplete fastq record" << endl; exit(1); } return true; }
void alignment_quality_char_to_short(Alignment& alignment) { alignment.set_quality(string_quality_char_to_short(alignment.quality())); }