void operator()( population& p ) const { const auto n = sizeof(integer_type) << 3; vg::variate_generator<real_type> vg1(0, 1); vg::variate_generator<integer_type> vg2(1, n-1); for ( auto ch : p ) //for all chromosomes in the population for ( auto g : ch ) //for all genes in the chromosome if ( vg1() < r ) //case mutate { g.encode(); //real -> integer auto c = gene_to_gray()(g.code()); // c -> gray c ^= ( 1 << vg2() ); //reverse a random bit g.code() = gray_to_gene()(c); // gray -> code g.decode(); //integer -> real } }
int main(int argc, char** argv) { if(argc == 1) { // Print the help help_main(argv); return 1; } size_t kmerSize = 0; size_t edgeMax = 0; // Should we only merge on kmers and skip paths? bool kmersOnly = false; optind = 1; // Start at first real argument bool optionsRemaining = true; while(optionsRemaining) { static struct option longOptions[] = { {"kmer-size", required_argument, 0, 'k'}, {"edge-max", required_argument, 0, 'e'}, {"kmers-only", no_argument, 0, 'o'}, {"threads", required_argument, 0, 't'}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; int optionIndex = 0; switch(getopt_long(argc, argv, "k:e:t:h", longOptions, &optionIndex)) { // Option value is in global optarg case -1: optionsRemaining = false; break; case 'k': // Set the kmer size kmerSize = atol(optarg); break; case 'e': // Set the edge max parameter for kmer enumeration edgeMax = atol(optarg); break; case 'o': // Only merge on kmers kmersOnly = true; break; case 't': // Set the openmp threads omp_set_num_threads(atoi(optarg)); break; case 'h': // When the user asks for help case '?': // When we get options we can't parse help_main(argv); exit(1); break; default: // TODO: keep track of the option std::cerr << "Illegal option" << std::endl; exit(1); } } if(argc - optind < 2) { // We don't have two positional arguments // Print the help help_main(argv); return 1; } if(kmersOnly && kmerSize == 0) { // We need a kmer size to use kmers throw std::runtime_error("Can't merge only on kmers with no kmer size"); } // Pull out the VG file names std::string vgFile1 = argv[optind++]; std::string vgFile2 = argv[optind++]; // Guess index names (TODO: add options) std::string indexDir1 = vgFile1 + ".index"; std::string indexDir2 = vgFile2 + ".index"; // Open the files std::ifstream vgStream1(vgFile1); if(!vgStream1.good()) { std::cerr << "Could not read " << vgFile1 << std::endl; exit(1); } std::ifstream vgStream2(vgFile2); if(!vgStream2.good()) { std::cerr << "Could not read " << vgFile2 << std::endl; exit(1); } // We may have indexes. We need to use pointers because destructing an index // that was never opened segfaults. TODO: fix vg vg::Index* index1 = nullptr; vg::Index* index2 = nullptr; if(kmerSize) { // Only go looking for indexes if we want to merge on kmers. index1 = new vg::Index(); index1->open_read_only(indexDir1); index2 = new vg::Index(); index2->open_read_only(indexDir2); } // Load up the first VG file vg::VG vg1(vgStream1); // And the second vg::VG vg2(vgStream2); // Make a way to track IDs int64_t nextId = 1; std::function<int64_t(void)> getId = [&]() { return nextId++; }; // Make a thread set auto threadSet = stPinchThreadSet_construct(); // Make a place to keep track of the thread sequences. // This will only contain sequences for threads that aren't staples. // TODO: should this be by pointer instead? std::map<int64_t, std::string> threadSequences; // Add in each vg graph to the thread set coregraph::EmbeddedGraph embedding1(vg1, threadSet, threadSequences, getId, vgFile1); coregraph::EmbeddedGraph embedding2(vg2, threadSet, threadSequences, getId, vgFile2); if(!kmersOnly) { // We want to merge on shared paths in addition to kmers // Complain if any of the graphs is not completely covered by paths if(!embedding1.isCoveredByPaths()) { std::cerr << "WARNING: " << embedding1.getName() << " contains nodes with no paths!" << std::endl; } if(!embedding2.isCoveredByPaths()) { std::cerr << "WARNING: " << embedding2.getName() << " contains nodes with no paths!" << std::endl; } // Trace the paths and merge the embedded graphs. std::cerr << "Pinching graphs on shared paths..." << std::endl; embedding1.pinchWith(embedding2); } if(kmerSize > 0) { // Merge on kmers that are unique in both graphs. std::cerr << "Pinching graphs on shared " << kmerSize << "-mers..." << std::endl; embedding1.pinchOnKmers(*index1, embedding2, *index2, kmerSize, edgeMax); } // Fix trivial joins so we don't produce more vg nodes than we really need to. stPinchThreadSet_joinTrivialBoundaries(threadSet); // Make another vg graph from the thread set vg::VG core = pinchToVG(threadSet, threadSequences); // Spit it out to standard output core.serialize_to_ostream(std::cout); // Tear everything down. TODO: can we somehow run this destruction function // after all our other, potentially depending locals are destructed? stPinchThreadSet_destruct(threadSet); return 0; }