// NOTE: We have 2 different sets of offsets because the copied, new file differs in size than the existing one.
//
//       Unsure which combination of write parameters were used on the original. Things like thread count,
//       compression level, etc. can effect compression ratio, BGZF block sizes, etc. even though the BAM record
//       content itself is equal. So we'll just track these index values separately, for now at least.
//
static
PbiRawData Test2Bam_ExistingIndex(void)
{
    PbiRawData index = Test2Bam_CoreIndexData();
    index.BasicData().fileOffset_ = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 };
    return index;
}
static
PbiRawData Test2Bam_NewIndex(void)
{
    PbiRawData index = Test2Bam_CoreIndexData();
    index.BasicData().fileOffset_ = { 33816576, 236126208, 391315456, 469106688, 537067520, 587792384, 867303424, 1182793728, 1449787392, 1582628864 };
    return index;
}
Example #3
0
    bool Accepts(const PbiRawData& idx, const size_t row) const
    {
        const auto& basicData = idx.BasicData();

        // see if row's RGID known
        const auto& rgId = basicData.rgId_.at(row);
        const auto rgFound = lookup_.find(rgId);
        if (rgFound == lookup_.end())
            return false;

        // see if row's ZMW known
        const auto& zmwPtr = rgFound->second;
        const auto zmw = basicData.holeNumber_.at(row);
        const auto zmwFound = zmwPtr->find(zmw);
        if (zmwFound == zmwPtr->end())
            return false;

        // see if row's QueryStart/QueryEnd known
        // CCS names already covered in lookup construction phase
        const auto& queryIntervals = zmwFound->second;
        const auto qStart = basicData.qStart_.at(row);
        const auto qEnd   = basicData.qEnd_.at(row);
        const auto queryInterval = std::make_pair(qStart, qEnd);
        return queryIntervals.find(queryInterval) != queryIntervals.end();
    }
Example #4
0
bool PbiQueryLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
{
    const auto& basicData = idx.BasicData();
    const auto& qStart = basicData.qStart_.at(row);
    const auto& qEnd   = basicData.qEnd_.at(row);
    const auto readLength = qEnd - qStart;
    return CompareHelper(readLength);
}
Example #5
0
bool PbiAlignedLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const
{
    const auto& mappedData = idx.MappedData();
    const auto& aEnd    = mappedData.aEnd_.at(row) ;
    const auto& aStart  = mappedData.aStart_.at(row);
    const auto aLength = aEnd - aStart;
    return CompareHelper(aLength);
}
Example #6
0
PbiIndexPrivate::PbiIndexPrivate(const PbiRawData& rawIndex)
    : filename_(rawIndex.Filename())
    , version_(rawIndex.Version())
    , sections_(rawIndex.FileSections())
    , numReads_(rawIndex.NumReads())
    , basicData_(rawIndex.BasicData())
    , mappedData_(rawIndex.MappedData())
    , referenceData_(rawIndex.ReferenceData())
    , barcodeData_(rawIndex.BarcodeData())
{ }
Example #7
0
bool PbiIdentityFilter::Accepts(const PbiRawData& idx, const size_t row) const
{
    const auto& mappedData = idx.MappedData();
    const auto& nMM  = mappedData.nMM_.at(row);
    const auto& nIndels = mappedData.NumDeletedAndInsertedBasesAt(row);
    const auto& nDel = nIndels.first;
    const auto& nIns = nIndels.second;

    const auto& basicData = idx.BasicData();
    const auto& qStart = basicData.qStart_.at(row);
    const auto& qEnd   = basicData.qEnd_.at(row);

    const auto readLength = qEnd - qStart;
    const auto nonMatches = nMM + nDel + nIns;
    const float identity  = 1.0 - (static_cast<float>(nonMatches)/static_cast<float>(readLength));

    return CompareHelper(identity);
}
Example #8
0
void PbiReferenceNameFilter::Initialize(const PbiRawData& idx) const
{
    const auto pbiFilename = idx.Filename();
    const auto bamFilename = pbiFilename.substr(0, pbiFilename.length() - 4);
    const auto bamFile = BamFile{ bamFilename };

    // single-value
    if (rnameWhitelist_ == boost::none) {
        const auto tId = bamFile.ReferenceId(rname_);
        subFilter_ = PbiReferenceIdFilter{ tId, cmp_ };
    }

    // multi-value whitelist
    else {
        subFilter_ = PbiFilter(PbiFilter::UNION);
        for (const auto& rname : rnameWhitelist_.get())
            subFilter_.Add(PbiReferenceIdFilter{ bamFile.ReferenceId(rname) });
    }
    initialized_ = true;
}
static
PbiRawData Test2Bam_CoreIndexData(void)

{
    PbiRawData rawData;
    rawData.Version(PbiFile::Version_3_0_1);
    rawData.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::REFERENCE);
    rawData.NumReads(10);

    PbiRawBasicData& basicData = rawData.BasicData();
    basicData.rgId_       = { -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594 };
    basicData.qStart_     = {48,387,0,9936,10232,7468,5557,7285,426,7064};
    basicData.qEnd_       = {1132,1134,344,10187,10394,8906,7235,8657,1045,7421};
    basicData.holeNumber_ = {49050,32328,32328,6469,6469,30983,13473,13473,19915,30983};
    basicData.readQual_   = {0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6};
    basicData.ctxtFlag_   = {0,0,0,0,0,0,0,0,0,0};
    basicData.fileOffset_ = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 };

    PbiRawMappedData& mappedData = rawData.MappedData();
    mappedData.tId_       = {0,0,0,0,0,0,0,0,0,0};
    mappedData.tStart_    = {0,302,675,2170,2203,3572,4506,4507,4592,4669};
    mappedData.tEnd_      = {471,1019,1026,2397,2326,5015,6125,5850,5203,5011};
    mappedData.aStart_    = {653,395,1,9960,10271,7468,5574,7285,441,7075};
    mappedData.aEnd_      = {1129,1134,344,10185,10394,8906,7235,8647,1040,7418};
    mappedData.revStrand_ = {0,1,0,1,0,1,1,0,1,0};
    mappedData.nM_        = {460,704,339,216,118,1394,1581,1313,583,333};
    mappedData.nMM_       = {0,0,0,0,0,0,0,0,0,0};
    mappedData.mapQV_     = {254,254,254,254,254,254,254,254,254,254};

    PbiRawReferenceData& referenceData = rawData.ReferenceData();
    referenceData.entries_ = {
        PbiReferenceEntry{0,0,10},
        PbiReferenceEntry{4294967295,4294967295,4294967295}
    };

    return rawData;
}
static
void ExpectRawIndicesEqual(const PbiRawData& expected, const PbiRawData& actual)
{
    // header data
    EXPECT_EQ(expected.Version(),      actual.Version());
    EXPECT_EQ(expected.FileSections(), actual.FileSections());
    EXPECT_EQ(expected.NumReads(),     actual.NumReads());

    // subread data
    const PbiRawBasicData& e = expected.BasicData();
    const PbiRawBasicData& a = actual.BasicData();
    EXPECT_EQ(e.rgId_,       a.rgId_);
    EXPECT_EQ(e.qStart_,     a.qStart_);
    EXPECT_EQ(e.qEnd_,       a.qEnd_);
    EXPECT_EQ(e.holeNumber_, a.holeNumber_);
    EXPECT_EQ(e.readQual_,   a.readQual_);
    EXPECT_EQ(e.ctxtFlag_,   a.ctxtFlag_);
    EXPECT_EQ(e.fileOffset_, a.fileOffset_);

    // mapped data
    EXPECT_EQ(expected.HasMappedData(), actual.HasMappedData());
    if (expected.HasMappedData() && actual.HasMappedData()) {
        const PbiRawMappedData& e = expected.MappedData();
        const PbiRawMappedData& a = actual.MappedData();
        EXPECT_EQ(e.tId_,       a.tId_);
        EXPECT_EQ(e.tStart_,    a.tStart_);
        EXPECT_EQ(e.tEnd_,      a.tEnd_);
        EXPECT_EQ(e.aStart_,    a.aStart_);
        EXPECT_EQ(e.aEnd_,      a.aEnd_);
        EXPECT_EQ(e.revStrand_, a.revStrand_);
        EXPECT_EQ(e.nM_,        a.nM_);
        EXPECT_EQ(e.nMM_,       a.nMM_);
        EXPECT_EQ(e.mapQV_,     a.mapQV_);
    }

    // reference data
    EXPECT_EQ(expected.HasReferenceData(), actual.HasReferenceData());
    if (expected.HasReferenceData() && actual.HasReferenceData()) {
        const PbiRawReferenceData& e = expected.ReferenceData();
        const PbiRawReferenceData& a = actual.ReferenceData();
        EXPECT_EQ(e.entries_, a.entries_);
    }

    // barcode data
    EXPECT_EQ(expected.HasBarcodeData(), actual.HasBarcodeData());
    if (expected.HasBarcodeData() && actual.HasBarcodeData()) {
        const PbiRawBarcodeData& e = expected.BarcodeData();
        const PbiRawBarcodeData& a = actual.BarcodeData();
        EXPECT_EQ(e.bcForward_,   a.bcForward_);
        EXPECT_EQ(e.bcReverse_,  a.bcReverse_);
        EXPECT_EQ(e.bcQual_,   a.bcQual_);
    }
}