void parseArgs(){ _options = getInput(); //_sketchSize = _options->getInt(STR_SIMKA_SKETCH_SIZE); _nbCores = _options->getInt(STR_NB_CORES); _inputFilename1 = _options->getStr(STR_SIMKA_URI_INPUT_1); _inputFilename2 = _options->getStr(STR_SIMKA_URI_INPUT_2); _outputDir = _options->getStr(STR_URI_OUTPUT); _start_i = _options->getInt("-start-i"); _start_j = _options->getInt("-start-j"); _n_i = _options->getInt("-n-i"); _n_j = _options->getInt("-n-j"); //_kmerSize = _options->getInt(STR_KMER_SIZE); if(!System::file().doesExist(_outputDir)){ int ok = System::file().mkdir(_outputDir, -1); if(ok != 0){ std::cerr << "Error: can't create output directory (" << _outputDir << ")" << std::endl; exit(1); } } }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("bankgen"); const char* OUTPUT_PREFIX = "-out"; const char* SEQ_LEN = "-seq-len"; const char* READ_LEN = "-read-len"; const char* OVERLAP_LEN = "-overlap-len"; const char* COVERAGE = "-coverage"; parser.push_back (new OptionOneParam (OUTPUT_PREFIX, "output prefix", true)); parser.push_back (new OptionOneParam (SEQ_LEN, "sequence length", false, "1000000")); parser.push_back (new OptionOneParam (READ_LEN, "read length", false, "150" )); parser.push_back (new OptionOneParam (OVERLAP_LEN, "overlap between two reads", false, "50" )); parser.push_back (new OptionOneParam (COVERAGE, "coverage", false, "3" )); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); /** We create the random sequence. */ IBank* randomBank = new BankRandom (1, options->getInt(SEQ_LEN)); LOCAL (randomBank); /** We create the reads bank. */ IBank* readsBank = new BankSplitter ( randomBank, options->getInt(READ_LEN), options->getInt(OVERLAP_LEN), options->getInt(COVERAGE) ); LOCAL (readsBank); /** We save the random bank. */ SaveAsFasta (randomBank, options->getStr(OUTPUT_PREFIX) + "_sequence.fa"); /** We save the reads bank. */ SaveAsFasta (readsBank, options->getStr(OUTPUT_PREFIX) + "_reads.fa"); } catch (OptionFailure& e) { e.getParser().displayErrors (stdout); e.getParser().displayHelp (stdout); return EXIT_FAILURE; } return EXIT_SUCCESS; }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("GraphStats"); parser.push_back (new OptionOneParam (STR_URI_GRAPH, "graph input", true)); parser.push_back (new OptionOneParam (STR_NB_CORES, "nb cores", false, "0")); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); // We load the graph Graph graph = Graph::load (options->getStr(STR_URI_GRAPH)); // We set the number of cores to be used. Use all available cores if set to 0. size_t nbCores = options->getInt(STR_NB_CORES); // We get an iterator for branching nodes of the graph. // We use a progress iterator to get some progress feedback ProgressGraphIterator<BranchingNode,ProgressTimer> itBranching (graph.iterator<BranchingNode>(), "statistics"); // We define some kind of unique identifier for a couple (indegree,outdegree) typedef pair<size_t,size_t> InOut_t; // We want to gather some statistics during the iteration. // Note the use of ThreadObject: this object will be cloned N times (one object per thread) and each clone will // be reachable within the iteration block through ThreadObject::operator() ThreadObject <map <InOut_t, size_t> > topology; // We dispatch the iteration on several cores. Note the usage of lambda expression here. IDispatcher::Status status = Dispatcher(nbCores).iterate (itBranching, [&] (const BranchingNode& node) { // We retrieve the current instance of map <InOut_t,size_t> for the current running thread. map <InOut_t,size_t>& localTopology = topology(); // We get branching nodes neighbors for the current branching node. Graph::Vector<BranchingEdge> successors = graph.successors <BranchingEdge> (node); Graph::Vector<BranchingEdge> predecessors = graph.predecessors<BranchingEdge> (node); // We increase the occurrences number for the current couple (in/out) neighbors localTopology [make_pair(predecessors.size(), successors.size())] ++; }); // Now, the parallel processing is done. We want now to aggregate the information retrieved // in each thread in a single map. // We get each map<InOut_t,size_t> object filled in each thread, and we add its data into the "global" map. // The global map is reachable through the ThreadObject::operator*. The "topology.foreach" will loop over // all cloned object used in the threads. topology.foreach ([&] (const map <InOut_t, size_t>& t) { // We update the occurrence of the current couple (in/out) for_each (t.begin(), t.end(), [&] (const pair<InOut_t, size_t>& p) { (*topology)[p.first] += p.second; }); }); // We sort the statistics by decreasing occurrence numbers. Since map have its own ordering, we need to put all // the data into a vector and sort it with our own sorting criteria. vector < pair<InOut_t,size_t> > stats; for (auto it = topology->begin(); it != topology->end(); it++) { stats.push_back (*it); } sort (stats.begin(), stats.end(), [=] (const pair<InOut_t,size_t>& a, const pair<InOut_t,size_t>& b) { return a.second > b.second; }); printf ("\nThere are %d branching nodes with the following distribution: \n", itBranching.size()); size_t sum=0; for (size_t i=0; i<stats.size(); i++) { sum += stats[i].second; printf (" [in=%d out=%d] nb=%7d percent=%5.2f distrib=%5.2f\n", stats[i].first.first, stats[i].first.second, stats[i].second, 100.0*(float)stats[i].second / (float)itBranching.size(), 100.0*(float)sum / (float)itBranching.size() ); } printf ("\nDone on %d cores in %.2f sec\n\n", status.nbCores, (float)status.time/1000.0); } catch (OptionFailure& e) { return e.displayErrors (std::cout); } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } return EXIT_SUCCESS; }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("KmerTest"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input", true)); parser.push_back (new OptionOneParam (STR_KMER_SIZE, "kmer size", true)); parser.push_back (new OptionOneParam (STR_MINIMIZER_SIZE, "minimizer size", true)); parser.push_back (new OptionNoParam (STR_VERBOSE, "display kmers", false)); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); // We get the kmer and minimizer sizes. size_t kmerSize = options->getInt(STR_KMER_SIZE); size_t mmerSize = options->getInt(STR_MINIMIZER_SIZE); // We define a try/catch block in case some method fails (bad filename for instance) u_int64_t nbKmers = 0; bool display = options->get(STR_VERBOSE) != 0; // We declare a Bank instance defined by a list of filenames IBank* bank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (bank); // We declare a kmer model and a minimizer model Model model (kmerSize, mmerSize); // We get a reference on the minimizer model, which will be useful for dumping const ModelMinimizer::Model& modelMinimizer = model.getMmersModel(); Kmer<span>::Type checksum; size_t nbChanged = 0; size_t nbInvalid = 0; // We define an iterator that encapsulates the sequences iterator with progress feedback ProgressIterator<Sequence> iter (*bank, "iterate bank"); // We loop over sequences. for (iter.first(); !iter.isDone(); iter.next()) { // Shortcut Sequence& seq = iter.item(); //! [snippet1_iterate] // We iterate the kmers (and minimizers) of the current sequence. model.iterate (seq.getData(), [&] (const Model::Kmer& kmer, size_t idx) { nbKmers ++; if (kmer.hasChanged() == true) { nbChanged++; } if (kmer.isValid() == false) { nbInvalid++; } checksum += kmer.minimizer().value(); }); //! [snippet1_iterate] } cout << "nbKmers : " << nbKmers << endl; cout << "nbInvalid : " << nbInvalid << endl; cout << "nbChanged : " << nbChanged << endl; cout << "ratio : " << (nbChanged > 0 ? (double)nbKmers / (double)nbChanged : 0) << endl; cout << "checksum : " << checksum << endl; } catch (OptionFailure& e) { return e.displayErrors (std::cout); } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } return EXIT_SUCCESS; }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("BankSplitter"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank reference", true)); parser.push_back (new OptionOneParam (STR_MAX_INPUT_SIZE, "average db size per split", true)); parser.push_back (new OptionOneParam (STR_URI_OUTPUT_DIR, "output directory", false, ".")); parser.push_back (new OptionNoParam (STR_OUTPUT_FASTQ, "fastq output", false)); parser.push_back (new OptionNoParam (STR_OUTPUT_GZ, "gzip output", false)); // We define a try/catch block in case some method fails (bad filename for instance) try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); /** Shortcuts. */ u_int64_t maxDbSize = options->getInt(STR_MAX_INPUT_SIZE); // We declare an input Bank IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (inputBank); // We get the basename of the input bank. string inputBasename = System::file().getBaseName (options->getStr(STR_URI_INPUT)); /** We set the name of the output directory. */ stringstream ss; ss << inputBasename << "_S" << maxDbSize; string outputDirName = ss.str(); /** We create the output directory. */ string outputDir = options->getStr(STR_URI_OUTPUT_DIR) + "/" + outputDirName; System::file().mkdir (outputDir, S_IRWXU); // We create the album bank. BankAlbum album (outputDir + "/album.txt"); /** We get estimations about the bank. */ u_int64_t number, totalSize, maxSize; inputBank->estimate (number, totalSize, maxSize); u_int64_t estimationNbSeqToIterate = number; // We create an iterator over the input bank ProgressIterator<Sequence> itSeq (*inputBank, "split"); // We loop over sequences to get the exact number of sequences. int64_t nbBanksOutput = -1; u_int64_t nbSequences = 0; u_int64_t dbSize = ~0; bool isFastq = options->get(STR_OUTPUT_FASTQ) != 0; bool isGzipped = options->get(STR_OUTPUT_GZ) != 0; IBank* currentBank = 0; for (itSeq.first(); !itSeq.isDone(); itSeq.next()) { if (dbSize > maxDbSize) { if (currentBank != 0) { currentBank->flush(); currentBank->finalize(); } nbBanksOutput ++; /** We build the uri of the current bank. */ stringstream ss; ss << inputBasename << "_" << nbBanksOutput << (isFastq ? ".fastq" : ".fasta"); if (isGzipped) { ss << ".gz"; } /** We create a new bank and put it in the album. */ currentBank = album.addBank (outputDir, ss.str(), isFastq, isGzipped); /** We reinit the db size counter. */ dbSize = 0; } dbSize += itSeq->getDataSize(); /** We insert the sequence into the current output bank. */ currentBank->insert (*itSeq); } if (currentBank != 0) { currentBank->flush(); } } catch (OptionFailure& e) { return e.displayErrors (cout); } catch (Exception& e) { cerr << "EXCEPTION: " << e.getMessage() << endl; } }