void ExtractNeighborsWorkflow::run() { const reference::SortedReferenceMetadata::MaskFiles &maskFiles = xml_.getMaskFileList(oligo::KmerTraits<KmerT>::KMER_BASES); if (maskFiles.empty()) { BOOST_THROW_EXCEPTION(isaac::common::PreConditionException("No mask files in " + sortedReferenceMetadata_.string())); } const reference::SortedReferenceMetadata::Contigs contigs = xml_.getKaryotypeOrderedContigs(); const std::vector<unsigned long> contigOffsets = reference::computeContigOffsets(contigs); std::vector<bool> neighbors(reference::genomeLength(contigs), false); std::vector<bool> highRepeats(highRepeatsFilePath_.empty() ? 0 : reference::genomeLength(contigs), true); // there could be mutliple mask widths in the xml. Just pick one. const unsigned maskWidth = xml_.getDefaultMaskWidth(); BOOST_FOREACH(const reference::SortedReferenceMetadata::MaskFile &maskFile, maskFiles) { //Don't reprocess redundant mask files of different widths if (maskWidth == maskFile.maskWidth) { scanMaskFile<KmerT>(maskFile, contigOffsets, neighbors, highRepeats); } } dumpResults(neighbors, highRepeats); }
void ExtractNeighborsWorkflow::run() { const reference::SortedReferenceMetadata::MaskFiles &maskFiles = xml_.getMaskFileList(oligo::KmerTraits<KmerT>::KMER_BASES); if (maskFiles.empty()) { BOOST_THROW_EXCEPTION(isaac::common::PreConditionException("No mask files in " + sortedReferenceMetadata_.string())); } const reference::SortedReferenceMetadata::Contigs contigs = xml_.getContigs(); const std::vector<uint64_t> contigOffsets = reference::computeContigOffsets(contigs); std::vector<unsigned> karyotypes; karyotypes.reserve(xml_.getContigs().size()); std::transform(xml_.getContigs().begin(), xml_.getContigs().end(), std::back_inserter(karyotypes), boost::bind(&reference::SortedReferenceMetadata::Contig::karyotypeIndex_, _1)); std::vector<bool> neighbors(reference::genomeLength(contigs), false); std::vector<bool> highRepeats(highRepeatsFilePath_.empty() ? 0 : reference::genomeLength(contigs), true); // there could be multiple mask widths in the xml. Just fail if there are. unsigned maskWidth = -1U; BOOST_FOREACH(const reference::SortedReferenceMetadata::MaskFile &maskFile, maskFiles) { if (-1U == maskWidth) { maskWidth = maskFile.maskWidth; } ISAAC_ASSERT_MSG(maskWidth == maskFile.maskWidth, "Mixed mask widths are not supported"); scanMaskFile<KmerT>(maskFile, contigOffsets, karyotypes, neighbors, highRepeats); } dumpResults(neighbors, highRepeats); }