Exemplo n.º 1
0
void SamWriterPrivate::Write(const BamRecord& record)
{
#if PBBAM_AUTOVALIDATE
    Validator::Validate(record);
#endif

    const auto rawRecord = internal::BamRecordMemory::GetRawData(record);

    // store bin number
    // min_shift=14 & n_lvls=5 are SAM/BAM "magic numbers"
    rawRecord->core.bin = hts_reg2bin(rawRecord->core.pos,
                                      bam_endpos(rawRecord.get()), 14, 5);

    // write record to file
    const int ret = sam_write1(file_.get(), header_.get(), rawRecord.get());
    if (ret <= 0)
        throw std::runtime_error("could not write record");
}
Exemplo n.º 2
0
bool BamRecordBuilder::BuildInPlace(BamRecord& record) const
{
    // initialize with basic 'core data'
    PBBAM_SHARED_PTR<bam1_t> recordRawData = internal::BamRecordMemory::GetRawData(record); /*   record.impl_.RawData().get();*/
    if (!recordRawData || !recordRawData->data)
        throw std::runtime_error("BamRecord memory in invalid state");    
    recordRawData->core = core_;

    // setup variable length data
    const std::vector<uint8_t> encodedTags = BamTagCodec::Encode(tags_);

    const size_t nameLength  = name_.size() + 1;
    const size_t numCigarOps = cigar_.size();
    const size_t cigarLength = numCigarOps * sizeof(uint32_t);
    const size_t seqLength   = sequence_.size();
    const size_t qualLength  = seqLength;
    const size_t tagLength   = encodedTags.size();
    const size_t dataLength  = nameLength + cigarLength + seqLength + qualLength + tagLength;

    // realloc if necessary
    uint8_t* varLengthDataBlock = recordRawData->data;
    if (!varLengthDataBlock)
        throw std::runtime_error("BamRecord memory in invalid state");
    size_t allocatedDataLength = recordRawData->m_data;
    if (allocatedDataLength < dataLength) {
        allocatedDataLength = dataLength;
        kroundup32(allocatedDataLength);
        varLengthDataBlock = (uint8_t*)realloc(varLengthDataBlock, allocatedDataLength);
    }
    recordRawData->data = varLengthDataBlock;
    recordRawData->l_data = dataLength;
    recordRawData->m_data = allocatedDataLength;

    size_t index = 0;

    // name
    memcpy(&varLengthDataBlock[index], name_.c_str(), nameLength);
    index += nameLength;

    // cigar
    if (cigarLength > 0) {
        std::vector<uint32_t> encodedCigar(numCigarOps);
        for (size_t i = 0; i < numCigarOps; ++i) {
            const CigarOperation& op = cigar_.at(i);
            encodedCigar[i] = op.Length() << BAM_CIGAR_SHIFT;
            const uint8_t type = static_cast<uint8_t>(op.Type());
            if (type >= 8)
                throw std::runtime_error("invalid CIGAR op type: " + std::to_string(type));
            encodedCigar[i] |= type;
        }
        memcpy(&varLengthDataBlock[index], &encodedCigar[0], cigarLength);
        index += cigarLength;

        // update bin after we've calculated cigar info
        const int32_t endPosition = bam_cigar2rlen(recordRawData->core.n_cigar, &encodedCigar[0]);
        recordRawData->core.bin = hts_reg2bin(core_.pos, endPosition, 14, 5);
    }

    // seq & qual
    if (seqLength > 0) {

        uint8_t* s = &varLengthDataBlock[index];
        for (size_t i = 0; i < seqLength; ++i)
            s[i>>1] |= ( seq_nt16_table[static_cast<int>(sequence_.at(i))] << ((~i&1)<<2) );
        index += seqLength;

        uint8_t* q = &varLengthDataBlock[index];
        if (!qualities_.empty())
            memset(q, 0xFF, seqLength);
        else {
            for (size_t i = 0; i < seqLength; ++i)
                q[i] = qualities_.at(i) - 33;
        }
        index += seqLength;
    }