void ExtractNeighborsWorkflow::run()
{
    const reference::SortedReferenceMetadata::MaskFiles &maskFiles =
        xml_.getMaskFileList(oligo::KmerTraits<KmerT>::KMER_BASES);
    if (maskFiles.empty())
    {
        BOOST_THROW_EXCEPTION(isaac::common::PreConditionException("No mask files in " + sortedReferenceMetadata_.string()));
    }

    const reference::SortedReferenceMetadata::Contigs contigs = xml_.getKaryotypeOrderedContigs();
    const std::vector<unsigned long> contigOffsets = reference::computeContigOffsets(contigs);
    std::vector<bool> neighbors(reference::genomeLength(contigs), false);
    std::vector<bool> highRepeats(highRepeatsFilePath_.empty() ? 0 : reference::genomeLength(contigs), true);

    // there could be mutliple mask widths in the xml. Just pick one.
    const unsigned maskWidth = xml_.getDefaultMaskWidth();

    BOOST_FOREACH(const reference::SortedReferenceMetadata::MaskFile &maskFile, maskFiles)
    {
        //Don't reprocess redundant mask files of different widths
        if (maskWidth == maskFile.maskWidth)
        {
            scanMaskFile<KmerT>(maskFile, contigOffsets, neighbors, highRepeats);
        }
    }

    dumpResults(neighbors, highRepeats);
}
예제 #2
0
void ExtractNeighborsWorkflow::run()
{
    const reference::SortedReferenceMetadata::MaskFiles &maskFiles =
        xml_.getMaskFileList(oligo::KmerTraits<KmerT>::KMER_BASES);
    if (maskFiles.empty())
    {
        BOOST_THROW_EXCEPTION(isaac::common::PreConditionException("No mask files in " + sortedReferenceMetadata_.string()));
    }

    const reference::SortedReferenceMetadata::Contigs contigs = xml_.getContigs();
    const std::vector<uint64_t> contigOffsets = reference::computeContigOffsets(contigs);

    std::vector<unsigned> karyotypes;
    karyotypes.reserve(xml_.getContigs().size());
    std::transform(xml_.getContigs().begin(), xml_.getContigs().end(), std::back_inserter(karyotypes),
                   boost::bind(&reference::SortedReferenceMetadata::Contig::karyotypeIndex_, _1));


    std::vector<bool> neighbors(reference::genomeLength(contigs), false);
    std::vector<bool> highRepeats(highRepeatsFilePath_.empty() ? 0 : reference::genomeLength(contigs), true);

    // there could be multiple mask widths in the xml. Just fail if there are.
    unsigned maskWidth = -1U;

    BOOST_FOREACH(const reference::SortedReferenceMetadata::MaskFile &maskFile, maskFiles)
    {
        if (-1U == maskWidth)
        {
            maskWidth = maskFile.maskWidth;
        }
        ISAAC_ASSERT_MSG(maskWidth == maskFile.maskWidth, "Mixed mask widths are not supported");
        scanMaskFile<KmerT>(maskFile, contigOffsets, karyotypes, neighbors, highRepeats);
    }

    dumpResults(neighbors, highRepeats);
}