int main (int argc, char* argv[]) { // We get the file name from the user arguments const char* filename = argc >= 2 ? argv[1] : ""; // We get information about the bank. u_int64_t nbSequences=0, dataSize=0, seqMaxSize=0, seqMinSize=~0; // We declare a Bank instance. IBank* bank = Bank::open (filename); LOCAL (bank); // IN A NEAR FUTURE, WE WILL HAVE STL LIKE ITERATORS. #if 0 for (BankFasta::iterator it = bank->begin(); it != bank->end(); ++it) { Sequence& seq = *it; Data& data = seq.getData(); nbSequences ++; if (data.size() > seqMaxSize) { seqMaxSize = data.size(); } if (data.size() < seqMinSize) { seqMinSize = data.size(); } dataSize += data.size (); } #endif std::cout << "data size : " << dataSize << std::endl; std::cout << "sequence number : " << nbSequences << std::endl; std::cout << "sequence max size : " << seqMaxSize << std::endl; std::cout << "sequence min size : " << seqMinSize << std::endl; }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("BankFilter"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank reference", true)); parser.push_back (new OptionOneParam (STR_URI_SEQ_IDS, "file holding indexes of bank", true)); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); /** We read the list of indexes. */ set<size_t> indexes; FILE* file = fopen (options->getStr(STR_URI_SEQ_IDS).c_str(), "r"); if (file != 0) { char buffer[128]; while (fgets (buffer, sizeof(buffer), file)) { indexes.insert (atoi(buffer)); } fclose (file); } cout << "found " << indexes.size() << " indexes" << endl; /** We open the output bank. */ string outputBankUri = options->getStr(STR_URI_INPUT) + "_" + System::file().getBaseName (options->getStr(STR_URI_SEQ_IDS)); IBank* outputBank = Bank::open (outputBankUri); LOCAL (outputBank); /** We loop the input bank. */ IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (inputBank); /** We use another iterator for filtering out some sequences. */ FilterIterator<Sequence,FilterFunctor> itSeq (inputBank->iterator(), FilterFunctor(indexes)); /** We loop the sequences. */ for (itSeq.first(); !itSeq.isDone(); itSeq.next()) { outputBank->insert (itSeq.item()); } /** We flush the output bank. */ outputBank->flush(); } catch (OptionFailure& e) { return e.displayErrors (cout); } catch (Exception& e) { cerr << "EXCEPTION: " << e.getMessage() << endl; } }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("BankFilter"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input", true)); parser.push_back (new OptionOneParam (STR_FILTER_RATIO, "skip a sequence if 'good letters number / seq.len > X'", false, "0.8")); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); /** Shortcuts. */ double percentThreshold = options->getDouble(STR_FILTER_RATIO); /** We open the input bank. */ IBank* inBank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (inBank); /** We create the output inBank. */ IBank* outBank = new BankFasta (options->getStr(STR_URI_INPUT) + "_filtered"); LOCAL (outBank); /** We iterate the inBank. NOTE: WE USE A LAMBDA EXPRESSION HERE. */ inBank->iterate ([&] (Sequence& s) { /** Shortcut. */ char* data = s.getDataBuffer(); size_t nbOK = 0; for (size_t i=0; i<s.getDataSize(); i++) { if (data[i]=='A' || data[i]=='C' || data[i]=='G' || data[i]=='T') { nbOK++; } } if ((double)nbOK / (double)s.getDataSize() > percentThreshold) { outBank->insert (s); } }); /** We flush the output bank. */ outBank->flush(); } catch (OptionFailure& e) { return e.displayErrors (cout); } catch (Exception& e) { cerr << "EXCEPTION: " << e.getMessage() << endl; } }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("BankStats"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank input", true)); try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); std::string filename = options->getStr(STR_URI_INPUT); //! [snippet16_bank] // We get an instance of IBank from the URI. IBank* bank = Bank::open (filename); //! [snippet16_seq] // We create an iterator on the bank Iterator<Sequence>* it = bank->iterator(); // We iterate the sequences of the bank for (it->first(); !it->isDone(); it->next()) { // We get a shortcut on the current sequence and its data Sequence& seq = it->item(); Data& data = seq.getData(); // We dump some information about the sequence. std::cout << "comment " << seq.getComment() << std::endl; // We dump each nucleotide. NOTE: the output depends on the data encoding for (size_t i=0; i<data.size(); i++) { std::cout << data[i]; } std::cout << std::endl; } //! [snippet16_seq] // The bank and the iterator have been allocated on the heap, so we have to delete them delete it; delete bank; //! [snippet16_bank] } catch (OptionFailure& e) { return e.displayErrors (std::cout); } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } }
int main (int argc, char* argv[]) { if (argc < 2) { std::cerr << "you must provide a bank." << std::endl; return EXIT_FAILURE; } // We declare an input Bank and use it locally IBank* inputBank = Bank::open (argv[1]); LOCAL (inputBank); // We create an iterator over this bank. Iterator<Sequence>* it = inputBank->iterator(); LOCAL (it); // We loop over sequences in a "push" fashion (a functor is called for each sequence) Functor fct; it->iterate (fct); }
// START Application int main (int argc, char* argv[]) { // We check that the user provides at least one option: a Fasta/FastQ file. // Online GATB-Tutorial: this argument is automatically filled in with an // appropriate file. if (argc < 2) { std::cerr << "Please, provide a sequence file." << std::endl; return EXIT_FAILURE; } // We define a try/catch block in case some method fails (bad filename for instance) try { // We declare an input Bank and use it locally IBank* inputBank = Bank::open (argv[1]); LOCAL (inputBank); // We create an iterator over this bank using some filtering system FilterIterator<Sequence,QualityFilter> it (inputBank->iterator(), QualityFilter()); // We loop over sequences. for (it.first(); !it.isDone(); it.next()) { // Shortcut Sequence& seq = it.item(); // We dump the sequence quality std::cout << "[" << seq.getQuality() << "] " << computeMeanPhredScore(seq.getQuality()) << std::endl; } } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } }
int main (int argc, char* argv[]) { if (argc < 2) { std::cerr << "you must provide a bank." << std::endl; return EXIT_FAILURE; } // We define a try/catch block in case some method fails try { // We declare an input Bank and use it locally IBank* inputBank = Bank::open (argv[1]); LOCAL (inputBank); // Note also that we have to parameterize the SubjectIterator by the kind of iterated // items (Sequence) and the processing that has to be done on each iteration (ProgressFunctor). SubjectIterator<Sequence> iter (inputBank->iterator(), 10); // We create some listener to be notified every 10 iterations and attach it to the iterator. iter.addObserver (new ProgressFunctor()); // We loop over sequences. for (iter.first(); !iter.isDone(); iter.next()) { // Note that we do nothing inside the sequence iterating loop about the progression management. // In other words, we don't "pollute" the code inside this loop by presentation concerns and // we can therefore focus on the job to be done on the iterated sequences. } } catch (Exception& e) { std::cerr << "EXCEPTION: " << e.getMessage() << std::endl; } }
int main (int argc, char* argv[]) { /** We create a command line parser. */ OptionsParser parser ("BankSplitter"); parser.push_back (new OptionOneParam (STR_URI_INPUT, "bank reference", true)); parser.push_back (new OptionOneParam (STR_MAX_INPUT_SIZE, "average db size per split", true)); parser.push_back (new OptionOneParam (STR_URI_OUTPUT_DIR, "output directory", false, ".")); parser.push_back (new OptionNoParam (STR_OUTPUT_FASTQ, "fastq output", false)); parser.push_back (new OptionNoParam (STR_OUTPUT_GZ, "gzip output", false)); // We define a try/catch block in case some method fails (bad filename for instance) try { /** We parse the user options. */ IProperties* options = parser.parse (argc, argv); /** Shortcuts. */ u_int64_t maxDbSize = options->getInt(STR_MAX_INPUT_SIZE); // We declare an input Bank IBank* inputBank = Bank::open (options->getStr(STR_URI_INPUT)); LOCAL (inputBank); // We get the basename of the input bank. string inputBasename = System::file().getBaseName (options->getStr(STR_URI_INPUT)); /** We set the name of the output directory. */ stringstream ss; ss << inputBasename << "_S" << maxDbSize; string outputDirName = ss.str(); /** We create the output directory. */ string outputDir = options->getStr(STR_URI_OUTPUT_DIR) + "/" + outputDirName; System::file().mkdir (outputDir, S_IRWXU); // We create the album bank. BankAlbum album (outputDir + "/album.txt"); /** We get estimations about the bank. */ u_int64_t number, totalSize, maxSize; inputBank->estimate (number, totalSize, maxSize); u_int64_t estimationNbSeqToIterate = number; // We create an iterator over the input bank ProgressIterator<Sequence> itSeq (*inputBank, "split"); // We loop over sequences to get the exact number of sequences. int64_t nbBanksOutput = -1; u_int64_t nbSequences = 0; u_int64_t dbSize = ~0; bool isFastq = options->get(STR_OUTPUT_FASTQ) != 0; bool isGzipped = options->get(STR_OUTPUT_GZ) != 0; IBank* currentBank = 0; for (itSeq.first(); !itSeq.isDone(); itSeq.next()) { if (dbSize > maxDbSize) { if (currentBank != 0) { currentBank->flush(); currentBank->finalize(); } nbBanksOutput ++; /** We build the uri of the current bank. */ stringstream ss; ss << inputBasename << "_" << nbBanksOutput << (isFastq ? ".fastq" : ".fasta"); if (isGzipped) { ss << ".gz"; } /** We create a new bank and put it in the album. */ currentBank = album.addBank (outputDir, ss.str(), isFastq, isGzipped); /** We reinit the db size counter. */ dbSize = 0; } dbSize += itSeq->getDataSize(); /** We insert the sequence into the current output bank. */ currentBank->insert (*itSeq); } if (currentBank != 0) { currentBank->flush(); } } catch (OptionFailure& e) { return e.displayErrors (cout); } catch (Exception& e) { cerr << "EXCEPTION: " << e.getMessage() << endl; } }