int main(int argc, char* argv[]) { try { size_t kmerSize = 11; cost char* seq = "AGGCGCTAGGGTAGAGGATGATGA"; std::cout << "The initial sequence is '" << seq << "', kmer size is" << kmerSize << std::endl; // We create the graph from a given sequence, and for a given kmer size Graph graph = Graph::create( new BankStrings( seq, NULL), "-kmer-size %d -abundance-min 1 -verbose 0", kmerSize ); // Get the first node Node node = graph.buildNode(seq); // Create a node iterator that iterates over all the simple nodes from // the first node. Rememeber that a simple node has inDeg = outDeg = 1 Graph::Iterator<Node> path = graph.simplePath<Node> (node, DIR_OUTCOMING); // Iterate over the simple path: for (path.start(); !path.isDone(); path.next()) { std::cout << " [" << path.rank() << "] current item is " << graph.toString(path.item()) << std::endl; } std::cout << "The simple path was " << path.rank() << " long" << std::endl; } catch(Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } return EXIT_SUCCESS; }
template<size_t span> struct debruijn_mphf_bench { void operator () (Parameter params) { typedef NodeFast<span> NodeFastT; typedef GraphTemplate<NodeFastT,EdgeFast<span>,GraphDataVariantFast<span>> GraphFast; size_t kmerSize = params.k; Graph graph; GraphFast graphFast; if (params.seq == "") { graph = Graph::create (params.args.c_str()); graphFast = GraphFast::create (params.args.c_str()); } else { graph = Graph::create (new BankStrings (params.seq.c_str(), 0), params.args.c_str()); graphFast = GraphFast::create (new BankStrings (params.seq.c_str(), 0), params.args.c_str()); } cout << "graph built, benchmarking.." << endl; int miniSize = 8; int NB_REPETITIONS = 2000000; double unit = 1000000000; cout.setf(ios_base::fixed); cout.precision(3); Graph::Iterator<Node> nodes = graph.iterator(); typename GraphFast::template Iterator<NodeFastT> nodesFast = graphFast.iterator(); nodes.first (); /** We get the first node. */ Node node = nodes.item(); typedef typename Kmer<span>::Type Type; typedef typename Kmer<span>::ModelCanonical ModelCanonical; typedef typename Kmer<span>::ModelDirect ModelDirect; typedef typename Kmer<span>::template ModelMinimizer <ModelCanonical> ModelMini; typedef typename ModelMini::Kmer KmerType; ModelMini modelMini (kmerSize, miniSize); ModelCanonical modelCanonical (kmerSize); // for some reason.. if *compiled*, this code confuses makes later MPHF queries 3x slower. really? yes. try to replace "if (confuse_mphf)" by "if (confuse_mphf && 0)" and re-run, you will see. { bool confuse_mphf = false; if (confuse_mphf) { //Type b; b.setVal(0); //modelCanonical.emphf_hasher(modelCanonical.adaptor(b)); //typedef std::pair<u_int8_t const*, u_int8_t const*> byte_range_t; //int c = 0; //byte_range_t brange( reinterpret_cast <u_int8_t const*> (&c), reinterpret_cast <u_int8_t const*> (&c) + 2 ); //byte_range_t brange( (u_int8_t const*) 1,(u_int8_t const*)33); //auto hashes = modelCanonical.empfh_hasher(brange); } for (int i = 0; i < 0; i++) { auto start_tt=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) modelCanonical.EMPHFhash(nodes.item().kmer.get<Type>()); auto end_tt=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computing EMPHFhash of kmers on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_tt, end_tt) / unit) << " seconds" << endl; } // it's slow. i don't understand why. see above for the "confuse mphf" part //return; //FIXME } /** We get the value of the first node (just an example, it's not used later). */ Type kmer = node.kmer.get<Type>(); auto start_t=chrono::system_clock::now(); auto end_t=chrono::system_clock::now(); cout << "----\non all nodes of the graph\n-----\n"; /* disable node state (because we don't want to pay the price for overhea of checking whether a node is deleted or not in contain() */ std::cout<< "PAY ATTENTION: this neighbor() benchmark, in the Bloom flavor, is without performing a MPHF query for each found node" << std::endl; graph.disableNodeState(); graphFast.disableNodeState(); /* compute baseline times (= overheads we're not interested in) */ start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) {} end_t=chrono::system_clock::now(); auto baseline_graph_time = diff_wtime(start_t, end_t) / unit; cout << "baseline overhead for graph nodes enumeration (" << nodes.size() << " nodes) : " << baseline_graph_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) {} end_t=chrono::system_clock::now(); auto baseline_graphfast_time = diff_wtime(start_t, end_t) / unit; cout << "baseline overhead for graph NodeFast enumeration (" << nodes.size() << " nodes) : " << baseline_graphfast_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) modelMini.getMinimizerValueDummy(nodes.item().kmer.get<Type>()); end_t=chrono::system_clock::now(); auto baseline_minim_time = diff_wtime(start_t, end_t) / unit; cout << "baseline overhead for graph nodes enumeration and minimizer computation setup (" << nodes.size() << " nodes) : " << baseline_minim_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) nodes.item().getKmer<Type>(); end_t=chrono::system_clock::now(); auto baseline_hash_time = diff_wtime(start_t, end_t) / unit; cout << "baseline overhead for graph nodes enumeration and hash computation setup (" << nodes.size() << " nodes) : " << baseline_hash_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) nodesFast.item().kmer; end_t=chrono::system_clock::now(); auto baseline_hashfast_time = diff_wtime(start_t, end_t) / unit; cout << "baseline overhead for graph NodeFast enumeration and hash computation setup (" << nodes.size() << " nodes) : " << baseline_hashfast_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) graph.nodeMPHFIndexDummy(nodes.item()); end_t=chrono::system_clock::now(); auto baseline_mphf_time = diff_wtime(start_t, end_t) / unit; cout << "baseline overhead for graph nodes enumeration and mphf query setup (" << nodes.size() << " nodes) : " << baseline_mphf_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) graphFast.nodeMPHFIndexDummy(nodesFast.item()); end_t=chrono::system_clock::now(); auto baseline_mphffast_time = diff_wtime(start_t, end_t) / unit; cout << "baseline overhead for graph NodeFast enumeration and mphf query setup (" << nodes.size() << " nodes) : " << baseline_mphffast_time << " seconds" << endl; /* do actual benchmark */ start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) modelMini.getMinimizerValue(nodes.item().kmer.get<Type>(), true); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computations of minimizers (fast method) of length " << miniSize << " on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_minim_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) graph.nodeMPHFIndex(nodes.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computations of MPHF index on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_mphf_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) graphFast.nodeMPHFIndex(nodesFast.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computations of MPHF index on all NodeFast (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_mphffast_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) modelCanonical.getHash(nodes.item().kmer.get<Type>()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computing hash1 of kmers on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_hash_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) modelCanonical.getHash2(nodes.item().kmer.get<Type>()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computing hash2 of kmers on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_hash_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) modelCanonical.getHash2(nodesFast.item().kmer); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computing hash2 of kmers on all NodeFast (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_hashfast_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) modelCanonical.EMPHFhash(nodes.item().kmer.get<Type>()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " computing EMPHFhash of kmers on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_hash_time << " seconds" << endl; // it's slow. i don't understand why. see above for the "confuse mphf" part start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) graph.neighborsDummy(nodes.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " dummy neighbors() query on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_graph_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) graph.neighbors(nodes.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " neighbors() query on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_graph_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) graphFast.neighbors(nodesFast.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " neighbors() query on all NodeFast (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_graphfast_time << " seconds" << endl; /* isBranching */ start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) graph.isBranching(nodes.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " isBranching() query on all nodes (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_graph_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) graphFast.isBranching(nodesFast.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " isBranching() query on all NodeFast (" << kmerSize << "-mers) : " << (diff_wtime(start_t, end_t) / unit) - baseline_graphfast_time << " seconds" << endl; /* now, compute adjacency! */ graph.precomputeAdjacency(); graphFast.precomputeAdjacency(); cout << "adjacency precomputed" << endl; start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) graph.neighbors(nodes.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " neighbors() query on all nodes (" << kmerSize << "-mers) using adjacency : " << (diff_wtime(start_t, end_t) / unit) - baseline_graph_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) graphFast.neighbors(nodesFast.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " fast neighbors() query on all NodeFast (" << kmerSize << "-mers) using adjacency : " << (diff_wtime(start_t, end_t) / unit) - baseline_graphfast_time << " seconds" << endl; /* isBranching */ start_t=chrono::system_clock::now(); for (nodes.first(); !nodes.isDone(); nodes.next()) graph.isBranching(nodes.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " isBranching() query on all nodes (" << kmerSize << "-mers) using adjacency : " << (diff_wtime(start_t, end_t) / unit) - baseline_graph_time << " seconds" << endl; start_t=chrono::system_clock::now(); for (nodesFast.first(); !nodesFast.isDone(); nodesFast.next()) graphFast.isBranching(nodesFast.item()); end_t=chrono::system_clock::now(); cout << "time to do " << nodes.size() << " fast isBranching() query on all NodeFast (" << kmerSize << "-mers) using adjacency : " << (diff_wtime(start_t, end_t) / unit) - baseline_graphfast_time << " seconds" << endl; /** We remove the graph. */ //graph.remove (); //graphFast.remove (); // no actually, I want to keep the .h5 file }