int main (int argc, char* argv[]) { if (argc < 2) { std::cerr << "you must provide a bank." << std::endl; return EXIT_FAILURE; } // We define a try/catch block in case some method fails try { // We declare an input Bank and use it locally IBank* inputBank = Bank::open (argv[1]); LOCAL (inputBank); // We create a sequence iterator for the bank with progress information ProgressIterator<Sequence> iter (*inputBank, "Iterating sequences"); // We loop over sequences. for (iter.first(); !iter.isDone(); iter.next()) { // Note that we do nothing inside the sequence iterating loop } } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("BankStats"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input", true)); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); // We get information about the bank. u_int64_t nbSequences=0, dataSize=0, seqMaxSize=0, seqMinSize=~0; // We declare an input Bank and use it locally IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (inputBank); ProgressIterator<Sequence> it (*inputBank, "iterate"); for (it.first(); !it.isDone(); it.next()) { Data& data = it.item().getData(); nbSequences ++; if (data.size() > seqMaxSize) { seqMaxSize = data.size(); } if (data.size() < seqMinSize) { seqMinSize = data.size(); } dataSize += data.size (); } std::cout << "data size : " << dataSize << std::endl; std::cout << "sequence number : " << nbSequences << std::endl; std::cout << "sequence max size : " << seqMaxSize << std::endl; std::cout << "sequence min size : " << seqMinSize << std::endl; } catch (OptionFailure& e) { return e.displayErrors (std::cout); } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("KmerTest"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input", true)); parser.push_back (new OptionOneParam (STR_KMER_SIZE, "kmer size", true)); parser.push_back (new OptionOneParam (STR_MINIMIZER_SIZE, "minimizer size", true)); parser.push_back (new OptionNoParam (STR_VERBOSE, "display kmers", false)); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); // We get the kmer and minimizer sizes. size_t kmerSize = options->getInt(STR_KMER_SIZE); size_t mmerSize = options->getInt(STR_MINIMIZER_SIZE); // We define a try/catch block in case some method fails (bad filename for instance) u_int64_t nbKmers = 0; bool display = options->get(STR_VERBOSE) != 0; // We declare a Bank instance defined by a list of filenames IBank* bank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (bank); // We declare a kmer model and a minimizer model Model model (kmerSize, mmerSize); // We get a reference on the minimizer model, which will be useful for dumping const ModelMinimizer::Model& modelMinimizer = model.getMmersModel(); Kmer<span>::Type checksum; size_t nbChanged = 0; size_t nbInvalid = 0; // We define an iterator that encapsulates the sequences iterator with progress feedback ProgressIterator<Sequence> iter (*bank, "iterate bank"); // We loop over sequences. for (iter.first(); !iter.isDone(); iter.next()) { // Shortcut Sequence& seq = iter.item(); //! [snippet1_iterate] // We iterate the kmers (and minimizers) of the current sequence. model.iterate (seq.getData(), [&] (const Model::Kmer& kmer, size_t idx) { nbKmers ++; if (kmer.hasChanged() == true) { nbChanged++; } if (kmer.isValid() == false) { nbInvalid++; } checksum += kmer.minimizer().value(); }); //! [snippet1_iterate] } cout << "nbKmers : " << nbKmers << endl; cout << "nbInvalid : " << nbInvalid << endl; cout << "nbChanged : " << nbChanged << endl; cout << "ratio : " << (nbChanged > 0 ? (double)nbKmers / (double)nbChanged : 0) << endl; cout << "checksum : " << checksum << endl; } catch (OptionFailure& e) { return e.displayErrors (std::cout); } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } return EXIT_SUCCESS; }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("BankSplitter"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank reference", true)); parser.push_back (new OptionOneParam (STR_MAX_INPUT_SIZE, "average db size per split", true)); parser.push_back (new OptionOneParam (STR_URI_OUTPUT_DIR, "output directory", false, ".")); parser.push_back (new OptionNoParam (STR_OUTPUT_FASTQ, "fastq output", false)); parser.push_back (new OptionNoParam (STR_OUTPUT_GZ, "gzip output", false)); // We define a try/catch block in case some method fails (bad filename for instance) try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); /** Shortcuts. */ u_int64_t maxDbSize = options->getInt(STR_MAX_INPUT_SIZE); // We declare an input Bank IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (inputBank); // We get the basename of the input bank. string inputBasename = System::file().getBaseName (options->getStr(STR_URI_INPUT)); /** We set the name of the output directory. */ stringstream ss; ss << inputBasename << "_S" << maxDbSize; string outputDirName = ss.str(); /** We create the output directory. */ string outputDir = options->getStr(STR_URI_OUTPUT_DIR) + "/" + outputDirName; System::file().mkdir (outputDir, S_IRWXU); // We create the album bank. BankAlbum album (outputDir + "/album.txt"); /** We get estimations about the bank. */ u_int64_t number, totalSize, maxSize; inputBank->estimate (number, totalSize, maxSize); u_int64_t estimationNbSeqToIterate = number; // We create an iterator over the input bank ProgressIterator<Sequence> itSeq (*inputBank, "split"); // We loop over sequences to get the exact number of sequences. int64_t nbBanksOutput = -1; u_int64_t nbSequences = 0; u_int64_t dbSize = ~0; bool isFastq = options->get(STR_OUTPUT_FASTQ) != 0; bool isGzipped = options->get(STR_OUTPUT_GZ) != 0; IBank* currentBank = 0; for (itSeq.first(); !itSeq.isDone(); itSeq.next()) { if (dbSize > maxDbSize) { if (currentBank != 0) { currentBank->flush(); currentBank->finalize(); } nbBanksOutput ++; /** We build the uri of the current bank. */ stringstream ss; ss << inputBasename << "_" << nbBanksOutput << (isFastq ? ".fastq" : ".fasta"); if (isGzipped) { ss << ".gz"; } /** We create a new bank and put it in the album. */ currentBank = album.addBank (outputDir, ss.str(), isFastq, isGzipped); /** We reinit the db size counter. */ dbSize = 0; } dbSize += itSeq->getDataSize(); /** We insert the sequence into the current output bank. */ currentBank->insert (*itSeq); } if (currentBank != 0) { currentBank->flush(); } } catch (OptionFailure& e) { return e.displayErrors (cout); } catch (Exception& e) { cerr << "EXCEPTION: " << e.getMessage() << endl; } }