Ejemplo n.º 1
0
void FragmentCollector::add(
    const alignment::BamTemplate &bamTemplate,
    const unsigned fragmentIndex,
    const unsigned barcodeIdx)
{
    ISAAC_ASSERT_MSG(2 >= bamTemplate.getFragmentCount(), "Expected paired or single-ended data");

    const alignment::FragmentMetadata &fragment = bamTemplate.getFragmentMetadata(fragmentIndex);
    ISAAC_THREAD_CERR_DEV_TRACE_CLUSTER_ID(fragment.getCluster().getId(), "FragmentCollector::add: " << fragment);
    FragmentBuffer::IndexRecord &recordStart =
        buffer_.initialize(fragment.getCluster().getId(), fragment.getReadIndex());
    recordStart.fStrandPos_ = fragment.getFStrandReferencePosition();
    storeBclAndCigar(fragment, recordStart);

    if (2 == bamTemplate.getFragmentCount())
    {
        const alignment::FragmentMetadata &mate = bamTemplate.getMateFragmentMetadata(fragment);
        if (fragment.isNoMatch())
        {
            ISAAC_ASSERT_MSG(mate.isNoMatch(), "If mate is not a no-match, fragment must be a shadow. fragment: "
                             << fragment << " mate:" << mate);
            recordStart.fragmentHeader() = io::FragmentHeader(bamTemplate, fragment, mate, barcodeIdx, 0);
        }
        else
        {
            const unsigned mateStorageBin = binIndexMap_.getBinIndex(mate.getFStrandReferencePosition());
            recordStart.fragmentHeader() = io::FragmentHeader(bamTemplate, fragment, mate, barcodeIdx,
                                                              mateStorageBin);
        }
    }
    else
    {
        recordStart.fragmentHeader() = io::FragmentHeader(bamTemplate, fragment, barcodeIdx);
    }
}
Ejemplo n.º 2
0
SequencingAdapter::SequencingAdapter(const flowcell::SequencingAdapterMetadata &adapterMetadata) :
    adapterMetadata_(adapterMetadata),
    kmerPositions_(oligo::getMaxKmer<unsigned>(adapterMatchBasesMin_) + 1, char(UNINITIALIZED_POSITION))
{

    ISAAC_ASSERT_MSG(adapterMetadata_.getSequence().size() < unsigned(std::numeric_limits<char>::max()), "Adapter sequence is too long");
    ISAAC_ASSERT_MSG((adapterMetadata_.isUnbounded() ||
                     adapterMetadata_.getSequence().size() <= adapterMetadata_.getClipLength()),
                     "Clip length cannot be shorter than the adapter sequence");

    oligo::KmerGenerator<unsigned short, std::string::const_iterator> kmerGenerator(
        adapterMetadata_.getSequence().begin(), adapterMetadata_.getSequence().end(), adapterMatchBasesMin_);
    std::string::const_iterator position = adapterMetadata_.getSequence().begin();
    unsigned short kmer = 0;
    while(kmerGenerator.next(kmer, position))
    {
        char &pos = kmerPositions_.at(kmer);
        if (UNINITIALIZED_POSITION == pos)
        {
            pos = std::distance(adapterMetadata_.getSequence().begin(), position);
        }
        else if (NON_UNIQUE_KMER_POSITION != pos)
        {
            pos = NON_UNIQUE_KMER_POSITION;
        }
    }
}
Ejemplo n.º 3
0
static void getBclBgzfCycleFilePath(
    const boost::filesystem::path &baseCallsPath,
    const unsigned lane,
    const unsigned cycle,
    const bool bci,
    boost::filesystem::path &result)
{
    ISAAC_ASSERT_MSG(lane <= bclBgzf::LANE_NUMBER_MAX, "Lane number " << lane << " must not exceed " << bclBgzf::LANE_NUMBER_MAX);
    ISAAC_ASSERT_MSG(cycle <= bclBgzf::CYCLE_NUMBER_MAX, "Cycle number should not exceeed " <<
                     bclBgzf::CYCLE_NUMBER_MAX << " digits");
    // Warning: all this mad code below is to avoid memory allocations during path formatting.
    // the result is expected to be pre-sized, else allocations will occur as usual.
    char laneFolder[100];
    /*const int laneFolderLength = */snprintf(laneFolder, sizeof(laneFolder), "%cL%03d", common::getDirectorySeparatorChar(), lane);

    char bclFileName[100];
    /*const int bclFileNameLength = */snprintf(bclFileName, sizeof(bclFileName),
                                           bci ? "%c%04d.bcl.bgzf.bci":"%c%04d.bcl.bgzf", common::getDirectorySeparatorChar(), cycle);

//    // boost 1.46 implementation of filesystem::path is coded to instantiate an std::string
//    // when doing append. Therefore have to jump through hoops to prevent memory allocations from happening
//    std::string & pathInternalStringRef = const_cast<std::string&>(result.string());
//    pathInternalStringRef = baseCallsPath.c_str();
//    pathInternalStringRef.append(laneFolder, laneFolder + laneFolderLength);
//    pathInternalStringRef.append(bclFileName, bclFileName + bclFileNameLength);
    result = baseCallsPath.c_str();
    result /= laneFolder;
    result /= bclFileName;
//    std::cerr << "formatted " << result << " out of " << laneFolder << ","
//        << cycleFolder << "," << bclFileName << "\n";
}
Ejemplo n.º 4
0
const std::vector<std::vector<unsigned > > SeedMemoryManager<KmerT>::getNotFoundMatchesCount(
    const flowcell::TileMetadataList &unprocessedTiles,
    const flowcell::BarcodeMetadataList &barcodeMetadataList,
    const ReadMetadataList &readMetadataList,
    const matchFinder::TileClusterInfo &foundMatches) const
{
    ISAAC_ASSERT_MSG(unprocessedTiles.front().getIndex() <= unprocessedTiles.back().getIndex(),
                     "Expected tiles ordered by index");
    std::vector<std::vector<unsigned > > ret(readMetadataList.size(), std::vector<unsigned>(
        unprocessedTiles.back().getIndex() + 1));

    BOOST_FOREACH(const flowcell::ReadMetadata &readMetadata, readMetadataList_)
    {
        const unsigned readIndex = readMetadata.getIndex();
        BOOST_FOREACH(const flowcell::TileMetadata &tileMetadata, unprocessedTiles)
        {
            const unsigned tileIndex = tileMetadata.getIndex();
            const std::vector<matchFinder::ClusterInfo> &oneTileInfo = foundMatches.at(tileIndex);
            // match only clusters where no matches were found so far
            ISAAC_ASSERT_MSG(oneTileInfo.size() == tileMetadata.getClusterCount(), "allTiles and foundMatches geometries must match");
            ret.at(readIndex).at(tileIndex) =
                std::count_if(oneTileInfo.begin(), oneTileInfo.end(),
                              boost::bind(&willLoadSeeds,
                                          boost::ref(barcodeMetadataList), _1, readIndex));
        }
    }
    return ret;
}
Ejemplo n.º 5
0
void BclBaseCallsSource::bclToClusters(
    const flowcell::TileMetadata &tileMetadata,
    alignment::BclClusters &bclData,
    const bool useLocsPositions) const
{

    ISAAC_THREAD_CERR << "Transposing Bcl data for " << tileMetadata.getClusterCount() << " bcl clusters" << std::endl;
    const clock_t startTranspose = clock();
    bclMapper_.transpose(bclData.addMoreClusters(tileMetadata.getClusterCount()));
    ISAAC_THREAD_CERR << "Transposing Bcl data done for " << bclData.getClusterCount() << " bcl clusters in " << (clock() - startTranspose) / 1000 << "ms" << std::endl;

    ISAAC_THREAD_CERR << "Extracting Pf values for " << tileMetadata.getClusterCount() << " bcl clusters" << std::endl;
    // gcc 4.4 has trouble figuring out which assignment implementation to use with back insert iterators
    filtersMapper_.getPf(std::back_inserter(bclData.pf()));

    ISAAC_ASSERT_MSG(bclData.pf().size() == bclData.getClusterCount(), "Mismatch between data " << bclData.getClusterCount() << " and pf " << bclData.pf().size() << "counts");
    ISAAC_THREAD_CERR << "Extracting Pf values done for " << bclData.getClusterCount() << " bcl clusters" << std::endl;

    if (bclData.storeXy())
    {
        ISAAC_THREAD_CERR << "Extracting Positions values for " << tileMetadata.getClusterCount() << " bcl clusters" << std::endl;
        if (!useLocsPositions)
        {
            clocsMapper_.getPositions(std::back_inserter(bclData.xy()));
        }
        else
        {
            locsMapper_.getPositions(std::back_inserter(bclData.xy()));
        }
        ISAAC_ASSERT_MSG(bclData.xy().size() == bclData.getClusterCount(), "Mismatch between data " << bclData.getClusterCount() << " and position " << bclData.xy().size() << "counts");
        ISAAC_THREAD_CERR << "Extracting Positions values done for " << bclData.getClusterCount() << " bcl clusters" << std::endl;
    }

}
Ejemplo n.º 6
0
void ParallelSeedLoader<ReaderT, KmerT>::loadTileCycle(
    const matchFinder::TileClusterInfo &tileClusterBarcode,
    rta::SingleCycleBclMapper<ReaderT> &threadBclMapper,
    std::vector<typename std::vector<Seed<KmerT> >::iterator> &destinationBegins,
    const flowcell::TileMetadata &tile,
    const unsigned cycle,
    std::vector<SeedMetadata>::const_iterator cycleSeedsBegin,
    const std::vector<SeedMetadata>::const_iterator cycleSeedsEnd)
{
    ISAAC_ASSERT_MSG(cycleSeedsEnd > cycleSeedsBegin, "Seed list cannot be empty");
    ISAAC_ASSERT_MSG((cycleSeedsEnd -1)->getReadIndex() == cycleSeedsBegin->getReadIndex(), "All seeds must belong to the same read");
    const unsigned readIndex = cycleSeedsBegin->getReadIndex();

    const std::vector<matchFinder::ClusterInfo> &clustersToDiscard = tileClusterBarcode.at(tile.getIndex());
    ISAAC_ASSERT_MSG(tile.getClusterCount() == clustersToDiscard.size(), "Found matches from a wrong tile/read");

    threadBclMapper.mapTileCycle(BaseT::flowcellLayout_, tile, cycle);

    while (cycleSeedsEnd != cycleSeedsBegin)
    {
        for (unsigned int clusterId = 0; tile.getClusterCount() > clusterId; ++clusterId)
        {
            const unsigned barcodeIndex = clustersToDiscard.at(clusterId).getBarcodeIndex();
            const unsigned referenceIndex = BaseT::barcodeMetadataList_.at(barcodeIndex).getReferenceIndex();
            char base = 0;
            threadBclMapper.get(clusterId, &base);
            if (flowcell::BarcodeMetadata::UNMAPPED_REFERENCE_INDEX != referenceIndex)
            {
                if (!clustersToDiscard.at(clusterId).isReadComplete(readIndex))
                {
                    Seed<KmerT> & forwardSeed = *destinationBegins[referenceIndex]++;
                    // skip those previously found to contain Ns
                    if (!forwardSeed.isNSeed())
                    {
                        if (oligo::getQuality(base) >= seedBaseQualityMin_)
                        {
                            KmerT forward = forwardSeed.getKmer();
                            const KmerT forwardBaseValue(base & oligo::BITS_PER_BASE_MASK);

                            forward <<= oligo::BITS_PER_BASE;
                            forward |= forwardBaseValue;

                            forwardSeed = Seed<KmerT>(forward, SeedId(tile.getIndex(), barcodeIndex, clusterId, cycleSeedsBegin->getIndex(), 0));
                        }
                        else
                        {
                            // we can't have holes. The Ns must be stored in such a way that
                            // they will be easy to remove later (after sorting)
                            forwardSeed = makeNSeed<KmerT>(tile.getIndex(), barcodeIndex, clusterId, 0 == cycleSeedsBegin->getIndex());
                        }
                    }
                }
            }
        }
        ++cycleSeedsBegin;
    }
}
Ejemplo n.º 7
0
SeedMemoryManager<KmerT>::SeedMemoryManager(
    const flowcell::BarcodeMetadataList &barcodeMetadataList,
    const ReadMetadataList &readMetadataList,
    const SeedMetadataList &seedMetadataList,
    const flowcell::TileMetadataList &allTiles
    )
    : barcodeMetadataList_(barcodeMetadataList)
    , readMetadataList_(readMetadataList)
    , seedMetadataList_(seedMetadataList)
    , notFoundMatchesCount_()

{
    ISAAC_ASSERT_MSG(!readMetadataList_.empty(), "Empty readMetadataList is not allowed");
    ISAAC_ASSERT_MSG(!seedMetadataList_.empty(), "Empty seedMetadataList is not allowed");
}
Ejemplo n.º 8
0
void Layout::getLaneAttribute<Layout::BclBgzf, BciFilePathAttributeTag>(
    const unsigned lane, boost::filesystem::path &result) const
{
    ISAAC_ASSERT_MSG(BclBgzf == format_, BciFilePathAttributeTag() << " is only allowed for bcl-bgzf flowcells");

    return getLaneBciFilePath(getBaseCallsPath(), lane, result);
}
Ejemplo n.º 9
0
static void getPositionsFilePath(
    const boost::filesystem::path &baseCallsPath,
    const bool patternedFlowcell,
    const unsigned lane,
    boost::filesystem::path &result)
{
    if (patternedFlowcell)
    {
        result = baseCallsPath.c_str();
        result /= "..";
        result /= "s.locs";

        return;
    }

    ISAAC_ASSERT_MSG(lane <= bclBgzf::LANE_NUMBER_MAX, "Lane number " << lane << " must not exceed " << bclBgzf::LANE_NUMBER_MAX);

    // Warning: all this mad code below is to avoid memory allocations during path formatting.
    // the result is expected to be pre-sized, else allocations will occur as usual.
    char laneFolder[100];
    // assuming Intensities folder is one level anove BaseCalls folder
    sprintf(laneFolder, "%c..%cL%03d", common::getDirectorySeparatorChar(), common::getDirectorySeparatorChar(), lane);

    // boost 1.46 implementation of filesystem::path is coded to instantiated std::string
    // when doing append. Therefore have to jump through hoops to prevent memory allocations from happening
//    std::string & pathInternalStringRef = const_cast<std::string&>(result.string());

    char filterFileName[100];
    sprintf(filterFileName, "%cs_%d.locs", common::getDirectorySeparatorChar(), lane);

    result = baseCallsPath.c_str();
    result /= laneFolder;
    result /= filterFileName;
}
Ejemplo n.º 10
0
unsigned UngappedAligner::alignUngapped(
    FragmentMetadata &fragmentMetadata,
    Cigar &cigarBuffer,
    const flowcell::ReadMetadata &readMetadata,
    const matchSelector::FragmentSequencingAdapterClipper &adapterClipper,
    const reference::ContigList &contigList,
    const isaac::reference::ContigAnnotations &contigAnnotations) const
{
    const unsigned cigarOffset = cigarBuffer.size();

// Don't reset alignment to preserve the seed-based anchors.
//    fragmentMetadata.resetAlignment();
    ISAAC_ASSERT_MSG(!fragmentMetadata.isAligned(), "alignUngapped is expected to be performend on a clean fragment");
    fragmentMetadata.resetClipping();

    const reference::Contig &contig = contigList[fragmentMetadata.contigId];

    const Read &read = fragmentMetadata.getRead();
    const bool reverse = fragmentMetadata.reverse;
    const std::vector<char> &sequence = read.getStrandSequence(reverse);
    const reference::Contig &reference = contig;

    std::vector<char>::const_iterator sequenceBegin = sequence.begin();
    std::vector<char>::const_iterator sequenceEnd = sequence.end();

    adapterClipper.clip(contig, fragmentMetadata, sequenceBegin, sequenceEnd);
    clipReadMasking(read, fragmentMetadata, sequenceBegin, sequenceEnd);

    clipReference(reference.size(), fragmentMetadata, sequenceBegin, sequenceEnd);

    const unsigned firstMappedBaseOffset = std::distance(sequence.begin(), sequenceBegin);
    if (firstMappedBaseOffset)
    {
        cigarBuffer.addOperation(firstMappedBaseOffset, Cigar::SOFT_CLIP);
    }

    const unsigned mappedBases = std::distance(sequenceBegin, sequenceEnd);
    if (mappedBases)
    {
        const Cigar::OpCode opCode = Cigar::ALIGN;
        cigarBuffer.addOperation(mappedBases, opCode);
    }

    const unsigned clipEndBases = std::distance(sequenceEnd, sequence.end());
    if (clipEndBases)
    {
        cigarBuffer.addOperation(clipEndBases, Cigar::SOFT_CLIP);
    }

    const unsigned ret = updateFragmentCigar(
        readMetadata, contigList, contigAnnotations, fragmentMetadata,
        fragmentMetadata.reverse, fragmentMetadata.contigId, fragmentMetadata.position, cigarBuffer, cigarOffset);

    if (!ret)
    {
        fragmentMetadata.setUnaligned();
    }

    return ret;
}
Ejemplo n.º 11
0
const unsigned& Layout::getAttribute<Layout::BclBgzf, TilesPerLaneMaxAttributeTag>(
    unsigned &result) const
{
    ISAAC_ASSERT_MSG(BclBgzf == format_, TilesPerLaneMaxAttributeTag() << " is only allowed for bcl-bgzf flowcells");

    const BclFlowcellData &data = boost::get<BclFlowcellData>(formatSpecificData_);

    return data.tilesPerLaneMax_;
}
Ejemplo n.º 12
0
void Layout::getLaneAttribute<Layout::BclBgzf, PositionsFilePathAttributeTag>(
    const unsigned lane, boost::filesystem::path &result) const
{
    ISAAC_ASSERT_MSG(BclBgzf == format_, PositionsFilePathAttributeTag() << " is only allowed for bcl-bgzf flowcells");

    const BclFlowcellData &data = boost::get<BclFlowcellData>(formatSpecificData_);

    return flowcell::getPositionsFilePath(getBaseCallsPath(), data.patternedFlowcell_, lane, result);
}
Ejemplo n.º 13
0
const bool &Layout::getAttribute<Layout::BclBgzf, PatternedFlowcellAttributeTag>(
    bool &result) const
{
    ISAAC_ASSERT_MSG(BclBgzf == format_, PatternedFlowcellAttributeTag() << " is only allowed for bcl-bgzf flowcells");

    const BclFlowcellData &data = boost::get<BclFlowcellData>(formatSpecificData_);

    return data.patternedFlowcell_;
}
Ejemplo n.º 14
0
void serialize<xml::XmlReader>(xml::XmlReader &reader, SortedReferenceMetadata &sortedReferenceMetadata, const unsigned int version)
{
    ISAAC_ASSERT_MSG(version == SortedReferenceMetadata::CURRENT_REFERENCE_FORMAT_VERSION, "Unexpected version requested: " << version);

    sortedReferenceMetadata.formatVersion_ = (reader+="SortedReference").nextChildElement("FormatVersion").readElementText();
    if (SortedReferenceMetadata::CURRENT_REFERENCE_FORMAT_VERSION < sortedReferenceMetadata.formatVersion_ ||
        SortedReferenceMetadata::OLDEST_SUPPORTED_REFERENCE_FORMAT_VERSION > sortedReferenceMetadata.formatVersion_)
    {
        BOOST_THROW_EXCEPTION(xml::XmlReaderException(
            (boost::format("Unexpected sorted reference FormatVersion: %s. FormatVersion must be in range [%d,%d]") %
            reader.getValue().string() % SortedReferenceMetadata::OLDEST_SUPPORTED_REFERENCE_FORMAT_VERSION %
            SortedReferenceMetadata::CURRENT_REFERENCE_FORMAT_VERSION).str()));
    }

    // SoftwareVersion is optional for older xml files
    if (reader++.checkName("SoftwareVersion"))
    {
        reader++;
    }

    // Contigs may not be present
    if (reader.checkName("Contigs"))
    {
        serialize(reader, sortedReferenceMetadata.contigs_, version);
        // advance if possible
        ++reader;
    }

    // Permutations may not be present
    if (reader && reader.checkName("Permutations"))
    {
        // only ABCD permutation is supported
        reader += "Permutation";
        if (reader["Name"] != "ABCD")
        {
            BOOST_THROW_EXCEPTION(xml::XmlReaderException(std::string("Only ABCD permutation masks are supported")));
        }

        serialize(reader, sortedReferenceMetadata.maskFiles_, version);
    }

    if (!sortedReferenceMetadata.maskFiles_.empty())
    {
        sortedReferenceMetadata.defaultMaskWidth_ = sortedReferenceMetadata.maskFiles_.begin()->second.at(0).maskWidth;
    }
    else
    {
        sortedReferenceMetadata.defaultMaskWidth_ = 0;
    }

    // As we were able to successfully read the file, bump format version up to the current to avoid confusion
    // when stored or merged
    sortedReferenceMetadata.formatVersion_ = SortedReferenceMetadata::CURRENT_REFERENCE_FORMAT_VERSION;
}
Ejemplo n.º 15
0
inline unsigned getLaneNumber(const flowcell::TileMetadataList& tiles)
{
    ISAAC_ASSERT_MSG(
        tiles.end()
        == std::find_if(
            tiles.begin(),
            tiles.end(),
            boost::bind(&flowcell::TileMetadata::getLane, _1)
            != tiles.front().getLane()),
        "Expected all tiles to belong to the same lane");
    return tiles.front().getLane();
}
Ejemplo n.º 16
0
void AlignWorkflow::findMatches(alignWorkflow::FoundMatchesMetadata &foundMatches) const
{
    alignWorkflow::FindMatchesTransition findMatchesTransition(
        flowcellLayoutList_,
        barcodeMetadataList_,
        allowVariableFastqLength_,
        cleanupIntermediary_,
        ignoreMissingBcls_,
        firstPassSeeds_,
        availableMemory_,
        clustersAtATimeMax_,
        tempDirectory_,
        demultiplexingStatsXmlPath_,
        coresMax_,
        repeatThreshold_,
        neighborhoodSizeThreshold_,
        ignoreNeighbors_,
        ignoreRepeats_,
        inputLoadersMax_,
        tempSaversMax_,
        memoryControl_,
        clusterIdList_,
        sortedReferenceMetadataList_);

    if (16 == seedLength_)
    {
        findMatchesTransition.perform<isaac::oligo::ShortKmerType>(foundMatches);
    }
    else if (32 == seedLength_)
    {
        findMatchesTransition.perform<oligo::KmerType>(foundMatches);
    }
    else if (64 == seedLength_)
    {
        findMatchesTransition.perform<oligo::LongKmerType>(foundMatches);
    }
    else
    {
        ISAAC_ASSERT_MSG(false, "Unexpected seed length " << seedLength_);
    }
}
Ejemplo n.º 17
0
reference::ReferencePosition getAlignmentPositionFromName(const std::size_t readNumber, const FragmentMetadata &fragment)
{
    // numbers are 1-based
    const auto name = getReadName(readNumber - 1, fragment);

    if (name.second == name.first)
    {
        return reference::ReferencePosition(reference::ReferencePosition::TooManyMatch);
    }

    if ('u' == *name.first)
    {
        ISAAC_ASSERT_MSG(false, common::makeFastIoString(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd()) << " " << fragment);
        return reference::ReferencePosition(reference::ReferencePosition::NoMatch);
    }
    return reference::ReferencePosition(
        std::atol(&*name.first + 2),
        std::atol(&*std::find(name.first + 2, name.second, ':') + 1),
        false,
        'r' == *name.first);
}
Ejemplo n.º 18
0
static void getFiltersFilePath(
    const boost::filesystem::path &baseCallsPath,
    const unsigned lane,
    boost::filesystem::path &result)
{
    ISAAC_ASSERT_MSG(lane <= bclBgzf::LANE_NUMBER_MAX, "Lane number " << lane << " must not exceed " << bclBgzf::LANE_NUMBER_MAX);

    // Warning: all this mad code below is to avoid memory allocations during path formatting.
    // the result is expected to be pre-sized, else allocations will occur as usual.
    char laneFolder[100];
    sprintf(laneFolder, "%cL%03d", common::getDirectorySeparatorChar(), lane);

    // boost 1.46 implementation of filesystem::path is coded to instantiated std::string
    // when doing append. Therefore have to jump through hoops to prevent memory allocations from happening
    std::string & pathInternalStringRef = const_cast<std::string&>(result.string());

    char filterFileName[100];
    sprintf(filterFileName, "%cs_%d.filter", common::getDirectorySeparatorChar(), lane);

    pathInternalStringRef = baseCallsPath.c_str();
    pathInternalStringRef.append(laneFolder).append(filterFileName);
}
Ejemplo n.º 19
0
void ExtractNeighborsWorkflow::run()
{
    const reference::SortedReferenceMetadata::MaskFiles &maskFiles =
        xml_.getMaskFileList(oligo::KmerTraits<KmerT>::KMER_BASES);
    if (maskFiles.empty())
    {
        BOOST_THROW_EXCEPTION(isaac::common::PreConditionException("No mask files in " + sortedReferenceMetadata_.string()));
    }

    const reference::SortedReferenceMetadata::Contigs contigs = xml_.getContigs();
    const std::vector<uint64_t> contigOffsets = reference::computeContigOffsets(contigs);

    std::vector<unsigned> karyotypes;
    karyotypes.reserve(xml_.getContigs().size());
    std::transform(xml_.getContigs().begin(), xml_.getContigs().end(), std::back_inserter(karyotypes),
                   boost::bind(&reference::SortedReferenceMetadata::Contig::karyotypeIndex_, _1));


    std::vector<bool> neighbors(reference::genomeLength(contigs), false);
    std::vector<bool> highRepeats(highRepeatsFilePath_.empty() ? 0 : reference::genomeLength(contigs), true);

    // there could be multiple mask widths in the xml. Just fail if there are.
    unsigned maskWidth = -1U;

    BOOST_FOREACH(const reference::SortedReferenceMetadata::MaskFile &maskFile, maskFiles)
    {
        if (-1U == maskWidth)
        {
            maskWidth = maskFile.maskWidth;
        }
        ISAAC_ASSERT_MSG(maskWidth == maskFile.maskWidth, "Mixed mask widths are not supported");
        scanMaskFile<KmerT>(maskFile, contigOffsets, karyotypes, neighbors, highRepeats);
    }

    dumpResults(neighbors, highRepeats);
}
Ejemplo n.º 20
0
std::string Cigar::toString(const unsigned offset, const unsigned length) const
{
    ISAAC_ASSERT_MSG(this->size() >= offset + length, "Requested end is outside of cigarBuffer");
    return toString(begin() + offset, begin() + offset + length);
}
Ejemplo n.º 21
0
std::string Cigar::toString(const std::vector<uint32_t> &cigarBuffer, unsigned offset, unsigned length)
{
    ISAAC_ASSERT_MSG(cigarBuffer.size() >= offset + length, "Requested end is outside of cigarBuffer");
    return toString(cigarBuffer.begin() + offset, cigarBuffer.begin() + offset + length);
}