int main(int argc, char* argv[]) { size_t kmerSize = 7; const char* sequences[] = { "AGGCGCTAGGGAGAGGATGATGAAA", "AGGCGCTCGGGAGAGGATGATGAAA", "AGGCGCTTGGGAGAGGATGATGAAA" }; try { // Create the graph Graph graph = Graph::create ( new BankStrings (sequences, ARRAY_SIZE(sequences)), "-kmer-size %d -abundance-min 1 -verbose 0", kmerSize ); // Take the first node, which should be a branching node Node node = graph.buildNode (sequences[0]); // Retrieve the branching neighbors for the node (as BRANCHING EDGES!) Graph::Vector<BranchingEdge> branchingNeighbors = graph.successors<BranchingEdge> (node); std::cout << "We found " << branchingNeighbors.size() << " branching neighbors from node " << graph.toString(node) << std::endl; // Iterate over the branching neighbors to print them for (size_t i = 0; i < branchingNeighbors.size(); i++) { // Note; we don't display all the transition nucleotides, only the // first transition nucleotide. We also display the number of // transitions needed to link the two branching nodes. std::cout << graph.toString (branchingNeighbors[i]) << std::endl; } } catch(Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } return EXIT_SUCCESS; }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser; parser.push_back (new OptionOneParam (STR_URI_INPUT, "graph file", true)); IProperties* params = 0; try { /** We parse the user options. */ params = parser.parse (argc, argv); } catch (OptionFailure& e) { e.getParser().displayErrors (stdout); e.getParser().displayHelp (stdout); return EXIT_FAILURE; } // We create the graph with the bank and other options Graph graph = Graph::load (params->getStr(STR_URI_INPUT)); // We create a graph marker. GraphMarker<BranchingNode> marker (graph); // We create an object for Breadth First Search for the de Bruijn graph. BFS<BranchingNode> bfs (graph); // We want to compute the distribution of connected components of the branching nodes. // - key is a connected component class (for a given number of branching nodes for this component) // - value is the number of times this component class occurs in the branching sub graph map<size_t,Entry> distrib; // We get an iterator for all nodes of the graph. We use a progress iterator to get some progress feedback ProgressGraphIterator<BranchingNode,ProgressTimer> itBranching (graph.iterator<BranchingNode>(), "statistics"); // We want to know the number of connected components size_t nbConnectedComponents = 0; // We define some kind of unique identifier for a couple (indegree,outdegree) map <InOut_t, size_t> topology; size_t simplePathSizeMin = ~0; size_t simplePathSizeMax = 0; // We want time duration of the iteration TimeInfo ti; ti.start ("compute"); // We loop the branching nodes for (itBranching.first(); !itBranching.isDone(); itBranching.next()) { // We get branching nodes neighbors for the current branching node. Graph::Vector<BranchingEdge> successors = graph.successors <BranchingEdge> (*itBranching); Graph::Vector<BranchingEdge> predecessors = graph.predecessors<BranchingEdge> (*itBranching); // We increase the occurrences number for the current couple (in/out) neighbors topology [make_pair(predecessors.size(), successors.size())] ++; // We loop the in/out neighbors and update min/max simple path size for (size_t i=0; i<successors.size(); i++) { simplePathSizeMax = std::max (simplePathSizeMax, successors[i].distance); simplePathSizeMin = std::min (simplePathSizeMin, successors[i].distance); } for (size_t i=0; i<predecessors.size(); i++) { simplePathSizeMax = std::max (simplePathSizeMax, predecessors[i].distance); simplePathSizeMin = std::min (simplePathSizeMin, predecessors[i].distance); } // We skip already visited nodes. if (marker.isMarked (*itBranching)) { continue; } // We launch the breadth first search; we get as a result the set of branching nodes in this component const set<BranchingNode>& component = bfs.run (*itBranching); // We mark the nodes for this connected component marker.mark (component); // We update our distribution distrib[component.size()].nbOccurs += 1; // We update the number of connected components. nbConnectedComponents++; } ti.stop ("compute"); // We compute the total number of branching nodes in all connected components. size_t sumOccurs = 0; size_t sumKmers = 0; for (map<size_t,Entry>::iterator it = distrib.begin(); it != distrib.end(); it++) { sumOccurs += it->first*it->second.nbOccurs; sumKmers += it->second.nbKmers; } // We sort the statistics by decreasing occurrence numbers. Since map have its own ordering, we need to put all // the data into a vector and sort it with our own sorting criteria. vector < pair<InOut_t,size_t> > stats; for (map <InOut_t, size_t>::iterator it = topology.begin(); it != topology.end(); it++) { stats.push_back (*it); } sort (stats.begin(), stats.end(), CompareFct); // Note: it must be equal to the number of branching nodes of the graph assert (sumOccurs == itBranching.size()); // We aggregate the computed information Properties props ("topology"); props.add (1, "graph"); props.add (2, "name", "%s", graph.getName().c_str()); props.add (2, "db_input", "%s", graph.getInfo().getStr("input").c_str()); props.add (2, "db_nb_seq", "%d", graph.getInfo().getInt("sequences_number")); props.add (2, "db_size", "%d", graph.getInfo().getInt("sequences_size")); props.add (2, "kmer_size", "%d", graph.getInfo().getInt("kmer_size")); props.add (2, "kmer_nks", "%d", graph.getInfo().getInt("nks")); props.add (2, "nb_nodes", "%d", graph.getInfo().getInt("kmers_nb_solid")); props.add (2, "nb_branching_nodes", "%d", graph.getInfo().getInt("nb_branching")); props.add (2, "percent_branching_nodes", "%.1f", graph.getInfo().getInt("kmers_nb_solid") > 0 ? 100.0 * (float)graph.getInfo().getInt("nb_branching") / (float) graph.getInfo().getInt("kmers_nb_solid") : 0 ); props.add (1, "branching_nodes"); props.add (2, "simple_path"); props.add (3, "size_min", "%d", simplePathSizeMin); props.add (3, "size_max", "%d", simplePathSizeMax); props.add (2, "neighborhoods"); for (size_t i=0; i<stats.size(); i++) { props.add (3, "neighborhood", "in=%d out=%d", stats[i].first.first, stats[i].first.second); props.add (4, "nb_bnodes", "%d", stats[i].second); props.add (4, "percentage", "%5.2f", itBranching.size() > 0 ? 100.0*(float)stats[i].second / (float)itBranching.size() : 0 ); } props.add (2, "connected_components"); props.add (3, "nb_classes", "%d", distrib.size()); props.add (3, "nb_components", "%d", nbConnectedComponents); for (map<size_t,Entry>::iterator it = distrib.begin(); it!=distrib.end(); it++) { props.add (3, "component_class"); props.add (4, "nb_occurs", "%d", it->second.nbOccurs); props.add (4, "nb_bnodes", "%d", it->first); props.add (4, "freq_bnodes", "%f", sumOccurs > 0 ? 100.0*(float)(it->first*it->second.nbOccurs) / (float)sumOccurs : 0 ); } props.add (1, ti.getProperties("time")); // We dump the results in a XML file in the current directory XmlDumpPropertiesVisitor v (graph.getName() + ".xml", false); props.accept (&v); return EXIT_SUCCESS; }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("GraphStats"); parser.push_back (new OptionOneParam (STR_URI_GRAPH, "graph input", true)); parser.push_back (new OptionOneParam (STR_NB_CORES, "nb cores", false, "0")); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); // We load the graph Graph graph = Graph::load (options->getStr(STR_URI_GRAPH)); // We set the number of cores to be used. Use all available cores if set to 0. size_t nbCores = options->getInt(STR_NB_CORES); // We get an iterator for branching nodes of the graph. // We use a progress iterator to get some progress feedback ProgressGraphIterator<BranchingNode,ProgressTimer> itBranching (graph.iterator<BranchingNode>(), "statistics"); // We define some kind of unique identifier for a couple (indegree,outdegree) typedef pair<size_t,size_t> InOut_t; // We want to gather some statistics during the iteration. // Note the use of ThreadObject: this object will be cloned N times (one object per thread) and each clone will // be reachable within the iteration block through ThreadObject::operator() ThreadObject <map <InOut_t, size_t> > topology; // We dispatch the iteration on several cores. Note the usage of lambda expression here. IDispatcher::Status status = Dispatcher(nbCores).iterate (itBranching, [&] (const BranchingNode& node) { // We retrieve the current instance of map <InOut_t,size_t> for the current running thread. map <InOut_t,size_t>& localTopology = topology(); // We get branching nodes neighbors for the current branching node. Graph::Vector<BranchingEdge> successors = graph.successors <BranchingEdge> (node); Graph::Vector<BranchingEdge> predecessors = graph.predecessors<BranchingEdge> (node); // We increase the occurrences number for the current couple (in/out) neighbors localTopology [make_pair(predecessors.size(), successors.size())] ++; }); // Now, the parallel processing is done. We want now to aggregate the information retrieved // in each thread in a single map. // We get each map<InOut_t,size_t> object filled in each thread, and we add its data into the "global" map. // The global map is reachable through the ThreadObject::operator*. The "topology.foreach" will loop over // all cloned object used in the threads. topology.foreach ([&] (const map <InOut_t, size_t>& t) { // We update the occurrence of the current couple (in/out) for_each (t.begin(), t.end(), [&] (const pair<InOut_t, size_t>& p) { (*topology)[p.first] += p.second; }); }); // We sort the statistics by decreasing occurrence numbers. Since map have its own ordering, we need to put all // the data into a vector and sort it with our own sorting criteria. vector < pair<InOut_t,size_t> > stats; for (auto it = topology->begin(); it != topology->end(); it++) { stats.push_back (*it); } sort (stats.begin(), stats.end(), [=] (const pair<InOut_t,size_t>& a, const pair<InOut_t,size_t>& b) { return a.second > b.second; }); printf ("\nThere are %d branching nodes with the following distribution: \n", itBranching.size()); size_t sum=0; for (size_t i=0; i<stats.size(); i++) { sum += stats[i].second; printf (" [in=%d out=%d] nb=%7d percent=%5.2f distrib=%5.2f\n", stats[i].first.first, stats[i].first.second, stats[i].second, 100.0*(float)stats[i].second / (float)itBranching.size(), 100.0*(float)sum / (float)itBranching.size() ); } printf ("\nDone on %d cores in %.2f sec\n\n", status.nbCores, (float)status.time/1000.0); } catch (OptionFailure& e) { return e.displayErrors (std::cout); } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } return EXIT_SUCCESS; }