int main (int argc, char* argv[]) { // We get the number of cores to be used. If we don't give any number, // we set to 0 which implies the usage of all available cores size_t nbCores = (argc >=2 ? atoi(argv[1]) : 0); // We create an iterator over an integer range int nmax = 10000; Range<int>::Iterator it (1,nmax); // We create a dispatcher configured for 'nbCores' cores. // The second argument tells how many consecutive values will be received by // each thread. The second argument tells how to group items per thread (set // here to 1 to emphasize concurrent access issue). Dispatcher dispatcher (nbCores, 1); // The idea here is to sum the integers of our range with an iteration. // (Note: we know that the result is N*(N+1)/2) int sum1=0, sum2=0; ////////////////////////////////////////////////// // First iteration: WRONG WAY ////////////////////////////////////////////////// // Our first attempt is to use an integer variable to sum the iterated value. // This variable will be shared by all the threads and, since they access to it // without caution wrt concurrent accesses, the sum result should be wrong (unless // you use one core only) dispatcher.iterate (it, [&] (int i) { sum1 += i; }); ////////////////////////////////////////////////// // Second iteration: CORRECT WAY ////////////////////////////////////////////////// // As previously, our second attempt will share the same integer variable. // But now, we take care about concurrent accesses with the use of the // __sync_fetch_and_add intrinsic instruction. This instruction ensures that // the shared integer can be modified by only one thread at one time. dispatcher.iterate (it, [&] (int i) { __sync_fetch_and_add (&sum2, i); }); ////////////////////////////////////////////////// // CONCLUSION ////////////////////////////////////////////////// cout << "First iteration: sum=" << sum1 << " (result should be " << nmax*(nmax+1)/2 << ")" << endl; cout << "Second iteration: sum=" << sum2 << " (result should be " << nmax*(nmax+1)/2 << ")" << endl; // Parallelization of Iterator is pretty simple with the Dispatcher class. // Moreover, usage of lambda expressions make the whole thing easy to write. // Note that the instruction block of the lambda expression doesn't even know that // it may be executed in different threads. In other words, the block doesn't refer // any stuff related to thread management; it just receives one of the item of the // iteration and process some action on it. // IMPORTANT ! As we have seen here, the user has to be aware that a shared resource (one // integer here) can be modified by several threads at the same time, so the user must use // some kind of synchronization for modifying the shared resource. We will see in other // examples that GATB provides mechanisms for this purpose. }
void SRC_counter::parse_query_sequences (){ std::string bank_filename = getInput()->getStr(STR_URI_BANK_INPUT).substr(getInput()->getStr(STR_URI_BANK_INPUT).find_last_of("/\\") + 1); BankAlbum banks (getInput()->getStr(STR_URI_QUERY_INPUT)); const std::vector<IBank*>& banks_of_queries = banks.getBanks(); const int number_of_read_sets = banks_of_queries.size(); FILE * pFile; pFile = fopen (getInput()->getStr(STR_OUT_FILE).c_str(), "wb"); cout<<"Query "<<kmer_size<<"-mers from "<<getInput()->getStr(STR_URI_QUERY_INPUT)<<endl; for( int bank_id=0;bank_id<number_of_read_sets;bank_id++){ // iterate each bank IBank* bank=banks_of_queries[bank_id]; LOCAL (bank); // BooleanVector bv; // unsigned long bank_size = get_bank_nb_items(bank); // bv.init_false(bank_size); // quick and dirty. Todo: implement a realocation of the bv in case the estimation is too low. // bv.set_comment(string("Reads from "+bank->getId()+" in "+getInput()->getStr(STR_URI_BANK_INPUT)+" with threshold "+to_string(threshold))); string message("#query_read_id (from bank "+bank->getId()+") mean median min max percentage_shared_positions -- number of shared "+to_string(kmer_size)+"mers with banq "+getInput()->getStr(STR_URI_BANK_INPUT)+"\n"); fwrite((message).c_str(), sizeof(char), message.size(), pFile); string progressMessage("Querying read set "+bank->getId()); ProgressIterator<Sequence> itSeq (*bank, progressMessage.c_str()); ISynchronizer* synchro = System::thread().newSynchronizer(); Dispatcher dispatcher (nbCores, 10000); dispatcher.iterate (itSeq, FunctorQuery(synchro,pFile, kmer_size,&quasiDico, keep_low_complexity, threshold, windows_size));//, &bv)); delete synchro; std::string query_filename = bank->getId().substr(bank->getId().find_last_of("/\\") + 1); // cout<<bv.nb_one()<<" reads in out_"+query_filename+"_in_"+bank_filename+".bv"<<endl; // bv.print("out_"+query_filename+"_in_"+bank_filename+".bv"); } fclose (pFile); }
int main (int argc, char* argv[]) { // We get the number of cores to be used. If we don't give any number, // we set to 0 which implies the usage of all available cores size_t nbCores = (argc >=2 ? atoi(argv[1]) : 0); // We create an iterator over an integer range int nmax = 1000; Range<int>::Iterator it (1,nmax); // We open a file. This will be our shared resource between threads. fstream file ("out", std::fstream::out); // For our file, we can't use intrinsics like we did for integer addition, // so we need a general synchronization mechanism that will be shared by the threads. ISynchronizer* synchro = System::thread().newSynchronizer(); // We create a dispatcher configured for 'nbCores' cores. Dispatcher dispatcher (nbCores, 1); // We iterate the range. NOTE: we could also use lambda expression (easing the code readability) dispatcher.iterate (it, Functor(synchro,file)); // We close the file file.close(); // We get rid of the synchronizer delete synchro; }
int main (int argc, char* argv[]) { // We get the number of cores to be used. If we don't give any number, // we set to 0 which implies the usage of all available cores size_t nbCores = (argc >=2 ? atoi(argv[1]) : 0); // We create an iterator over an integer range Range<int>::Iterator it (1,20); // We create a dispatcher configured for 'nbCores' cores. Dispatcher dispatcher (nbCores); // We dispatch the range iteration with the dispatcher. // This will create nbCores threads and each thread will be fed with // one value of the defined range // NOTE: we could also use lambda expression (easing the code readability) // Note: third argument is set to groupSize of 1 instead of 1000 (default), // to avoid that 1000 tasks are batched in the same thread. // In practice, when iterating over a large set of elements, set a reasonable // groupSize value, because a groupSize=1 will incur significant overhead // if Functor() is a very quick task. IDispatcher::Status status = dispatcher.iterate (it, Functor(), 1); // We dump some information about the dispatching cout << "nbCores=" << status.nbCores << " time=" << status.time << endl; // IMPORTANT: usage of Dispatcher has sense only if the iterated items // can be processed independently from each other. // The point to understand with the Dispatcher is that it can // iterate any instance of Iterator class. If you have any set of items // that can be enumerated through an Iterator implementation, then you // can parallelize the iteration with a Dispatcher instance }
void SRC_counter::create_and_fill_quasi_dictionary (){ // const int display = getInput()->getInt (STR_VERBOSE); // We get a handle on the HDF5 storage object. // Note that we use an auto pointer since the StorageFactory dynamically allocates an instance auto_ptr<Storage> storage (StorageFactory(STORAGE_HDF5).load (getInput()->getStr(STR_URI_GRAPH))); // We get the group for dsk Group& dskGroup = storage->getGroup("dsk"); kmer_size = atoi(dskGroup.getProperty("kmer_size").c_str()); // We get the solid kmers collection 1) from the 'dsk' group 2) from the 'solid' collection Partition<Kmer<>::Count>& solidKmers = dskGroup.getPartition<Kmer<>::Count> ("solid"); nbSolidKmers = solidKmers.getNbItems(); if(nbSolidKmers==0){cout<<"No solid kmers in bank -- exit"<<endl;exit(0);} IteratorKmerH5Wrapper iteratorOnKmers (solidKmers.iterator()); quasiDico = quasidictionaryKeyGeneric<IteratorKmerH5Wrapper, unsigned char> (nbSolidKmers, iteratorOnKmers, fingerprint_size, nbCores, gamma_value); cout<<"Empty quasi-ictionary memory usage (MB) = "<<System::info().getMemorySelfUsed()/1024<<endl; ProgressIterator<Kmer<>::Count> itKmers (solidKmers.iterator(), "Indexing solid kmers counts", nbSolidKmers); Dispatcher dispatcher (nbCores, 10000); dispatcher.iterate (itKmers, FunctorIndexer(quasiDico, kmer_size, keep_low_complexity)); cout<<"Filled quasi-ictionary memory usage (MB) = "<<System::info().getMemorySelfUsed()/1024<<endl; }