void SamWriterPrivate::Write(const BamRecord& record) { #if PBBAM_AUTOVALIDATE Validator::Validate(record); #endif const auto rawRecord = internal::BamRecordMemory::GetRawData(record); // store bin number // min_shift=14 & n_lvls=5 are SAM/BAM "magic numbers" rawRecord->core.bin = hts_reg2bin(rawRecord->core.pos, bam_endpos(rawRecord.get()), 14, 5); // write record to file const int ret = sam_write1(file_.get(), header_.get(), rawRecord.get()); if (ret <= 0) throw std::runtime_error("could not write record"); }
bool BamRecordBuilder::BuildInPlace(BamRecord& record) const { // initialize with basic 'core data' PBBAM_SHARED_PTR<bam1_t> recordRawData = internal::BamRecordMemory::GetRawData(record); /* record.impl_.RawData().get();*/ if (!recordRawData || !recordRawData->data) throw std::runtime_error("BamRecord memory in invalid state"); recordRawData->core = core_; // setup variable length data const std::vector<uint8_t> encodedTags = BamTagCodec::Encode(tags_); const size_t nameLength = name_.size() + 1; const size_t numCigarOps = cigar_.size(); const size_t cigarLength = numCigarOps * sizeof(uint32_t); const size_t seqLength = sequence_.size(); const size_t qualLength = seqLength; const size_t tagLength = encodedTags.size(); const size_t dataLength = nameLength + cigarLength + seqLength + qualLength + tagLength; // realloc if necessary uint8_t* varLengthDataBlock = recordRawData->data; if (!varLengthDataBlock) throw std::runtime_error("BamRecord memory in invalid state"); size_t allocatedDataLength = recordRawData->m_data; if (allocatedDataLength < dataLength) { allocatedDataLength = dataLength; kroundup32(allocatedDataLength); varLengthDataBlock = (uint8_t*)realloc(varLengthDataBlock, allocatedDataLength); } recordRawData->data = varLengthDataBlock; recordRawData->l_data = dataLength; recordRawData->m_data = allocatedDataLength; size_t index = 0; // name memcpy(&varLengthDataBlock[index], name_.c_str(), nameLength); index += nameLength; // cigar if (cigarLength > 0) { std::vector<uint32_t> encodedCigar(numCigarOps); for (size_t i = 0; i < numCigarOps; ++i) { const CigarOperation& op = cigar_.at(i); encodedCigar[i] = op.Length() << BAM_CIGAR_SHIFT; const uint8_t type = static_cast<uint8_t>(op.Type()); if (type >= 8) throw std::runtime_error("invalid CIGAR op type: " + std::to_string(type)); encodedCigar[i] |= type; } memcpy(&varLengthDataBlock[index], &encodedCigar[0], cigarLength); index += cigarLength; // update bin after we've calculated cigar info const int32_t endPosition = bam_cigar2rlen(recordRawData->core.n_cigar, &encodedCigar[0]); recordRawData->core.bin = hts_reg2bin(core_.pos, endPosition, 14, 5); } // seq & qual if (seqLength > 0) { uint8_t* s = &varLengthDataBlock[index]; for (size_t i = 0; i < seqLength; ++i) s[i>>1] |= ( seq_nt16_table[static_cast<int>(sequence_.at(i))] << ((~i&1)<<2) ); index += seqLength; uint8_t* q = &varLengthDataBlock[index]; if (!qualities_.empty()) memset(q, 0xFF, seqLength); else { for (size_t i = 0; i < seqLength; ++i) q[i] = qualities_.at(i) - 33; } index += seqLength; }