Beispiel #1
0
int main (int argc, char* argv[])
{
    // We get the number of cores to be used.  If we don't give any number,
    // we set to 0 which implies the usage of all available cores
    size_t nbCores = (argc >=2 ? atoi(argv[1]) : 0);

    // We create an iterator over an integer range
    int nmax = 10000;
    Range<int>::Iterator it (1,nmax);

    // We create a dispatcher configured for 'nbCores' cores.
    // The second argument tells how many consecutive values will be received by
    // each thread. The second argument tells how to group items per thread (set
    // here to 1 to emphasize concurrent access issue).
    Dispatcher dispatcher (nbCores, 1);

    // The idea here is to sum the integers of our range with an iteration.
    // (Note: we know that the result is N*(N+1)/2)
    int sum1=0, sum2=0;

    //////////////////////////////////////////////////
    // First iteration: WRONG WAY
    //////////////////////////////////////////////////
    // Our first attempt is to use an integer variable to sum the iterated value.
    // This variable will be shared by all the threads and, since they access to it
    // without caution wrt concurrent accesses, the sum result should be wrong (unless
    // you use one core only)
    dispatcher.iterate (it, [&] (int i)  {  sum1 += i;  });

    //////////////////////////////////////////////////
    // Second iteration: CORRECT WAY
    //////////////////////////////////////////////////
    // As previously, our second attempt will share the same integer variable.
    // But now, we take care about concurrent accesses with the use of the
    // __sync_fetch_and_add intrinsic instruction. This instruction ensures that
    // the shared integer can be modified by only one thread at one time.
    dispatcher.iterate (it, [&] (int i)  {  __sync_fetch_and_add (&sum2, i);  });

    //////////////////////////////////////////////////
    // CONCLUSION
    //////////////////////////////////////////////////
    cout << "First iteration:  sum=" << sum1 << "  (result should be " << nmax*(nmax+1)/2 << ")" << endl;
    cout << "Second iteration: sum=" << sum2 << "  (result should be " << nmax*(nmax+1)/2 << ")" << endl;

    // Parallelization of Iterator is pretty simple with the Dispatcher class.
    // Moreover, usage of lambda expressions make the whole thing easy to write.
    // Note that the instruction block of the lambda expression doesn't even know that
    // it may be executed in different threads. In other words, the block doesn't refer
    // any stuff related to thread management; it just receives one of the item of the
    // iteration and process some action on it.

    // IMPORTANT ! As we have seen here, the user has to be aware that a shared resource (one
    // integer here) can be modified by several threads at the same time, so the user must use
    // some kind of synchronization for modifying the shared resource. We will see in other
    // examples that GATB provides mechanisms for this purpose.
}
void SRC_counter::parse_query_sequences (){
    std::string bank_filename = getInput()->getStr(STR_URI_BANK_INPUT).substr(getInput()->getStr(STR_URI_BANK_INPUT).find_last_of("/\\") + 1);

    BankAlbum banks (getInput()->getStr(STR_URI_QUERY_INPUT));
    const std::vector<IBank*>& banks_of_queries = banks.getBanks();
    const int number_of_read_sets = banks_of_queries.size();
    
	FILE * pFile;
	pFile = fopen (getInput()->getStr(STR_OUT_FILE).c_str(), "wb");
    
    
	cout<<"Query "<<kmer_size<<"-mers from "<<getInput()->getStr(STR_URI_QUERY_INPUT)<<endl;
    for( int bank_id=0;bank_id<number_of_read_sets;bank_id++){ // iterate each bank
        
        IBank* bank=banks_of_queries[bank_id];
        LOCAL (bank);
//        BooleanVector bv;
//        unsigned long bank_size = get_bank_nb_items(bank);
//        bv.init_false(bank_size); // quick and dirty. Todo: implement a realocation of the bv in case the estimation is too low.
//        bv.set_comment(string("Reads from "+bank->getId()+" in "+getInput()->getStr(STR_URI_BANK_INPUT)+" with threshold "+to_string(threshold)));
        
        string message("#query_read_id (from bank "+bank->getId()+") mean median min max percentage_shared_positions -- number of shared "+to_string(kmer_size)+"mers with banq "+getInput()->getStr(STR_URI_BANK_INPUT)+"\n");
        fwrite((message).c_str(), sizeof(char), message.size(), pFile);
        string progressMessage("Querying read set "+bank->getId());
        ProgressIterator<Sequence> itSeq (*bank, progressMessage.c_str());
        ISynchronizer* synchro = System::thread().newSynchronizer();
        Dispatcher dispatcher (nbCores, 10000);
        dispatcher.iterate (itSeq, FunctorQuery(synchro,pFile, kmer_size,&quasiDico, keep_low_complexity, threshold, windows_size));//, &bv));
        delete synchro;
        std::string query_filename = bank->getId().substr(bank->getId().find_last_of("/\\") + 1);
//        cout<<bv.nb_one()<<" reads in out_"+query_filename+"_in_"+bank_filename+".bv"<<endl;
//        bv.print("out_"+query_filename+"_in_"+bank_filename+".bv");
    }
    fclose (pFile);
}
int main (int argc, char* argv[])
{
    // We get the number of cores to be used.  If we don't give any number,
    // we set to 0 which implies the usage of all available cores
    size_t nbCores = (argc >=2 ? atoi(argv[1]) : 0);

    // We create an iterator over an integer range
    int nmax = 1000;
    Range<int>::Iterator it (1,nmax);

    // We open a file. This will be our shared resource between threads.
    fstream file ("out", std::fstream::out);

    // For our file, we can't use intrinsics like we did for integer addition,
    // so we need a general synchronization mechanism that will be shared by the threads.
    ISynchronizer* synchro = System::thread().newSynchronizer();

    // We create a dispatcher configured for 'nbCores' cores.
    Dispatcher dispatcher (nbCores, 1);

    // We iterate the range.  NOTE: we could also use lambda expression (easing the code readability)
   dispatcher.iterate (it, Functor(synchro,file));

    // We close the file
    file.close();

    // We get rid of the synchronizer
    delete synchro;
}
Beispiel #4
0
int main (int argc, char* argv[])
{
    // We get the number of cores to be used.  If we don't give any number,
    // we set to 0 which implies the usage of all available cores
    size_t nbCores = (argc >=2 ? atoi(argv[1]) : 0);

    // We create an iterator over an integer range
    Range<int>::Iterator it (1,20);

    // We create a dispatcher configured for 'nbCores' cores.
    Dispatcher dispatcher (nbCores);

    // We dispatch the range iteration with the dispatcher.
    // This will create nbCores threads and each thread will be fed with
    // one value of the defined range

    // NOTE: we could also use lambda expression (easing the code readability)
    // Note: third argument is set to groupSize of 1 instead of 1000 (default), 
    // to avoid that 1000 tasks are batched in the same thread.
    // In practice, when iterating over a large set of elements, set a reasonable 
    // groupSize value, because a groupSize=1 will incur significant overhead 
    // if Functor() is a very quick task.
    IDispatcher::Status status = dispatcher.iterate (it, Functor(), 1); 

    // We dump some information about the dispatching
    cout << "nbCores=" << status.nbCores << "  time=" << status.time << endl;

    // IMPORTANT: usage of Dispatcher has sense only if the iterated items
    // can be processed independently from each other.

    // The point to understand with the Dispatcher is that it can
    // iterate any instance of Iterator class. If you have any set of items
    // that can be enumerated through an Iterator implementation, then you
    // can parallelize the iteration with a Dispatcher instance
}
void SRC_counter::create_and_fill_quasi_dictionary (){
//	const int display = getInput()->getInt (STR_VERBOSE);
	// We get a handle on the HDF5 storage object.
	// Note that we use an auto pointer since the StorageFactory dynamically allocates an instance
	auto_ptr<Storage> storage (StorageFactory(STORAGE_HDF5).load (getInput()->getStr(STR_URI_GRAPH)));
	// We get the group for dsk
	Group& dskGroup = storage->getGroup("dsk");
	kmer_size = atoi(dskGroup.getProperty("kmer_size").c_str());
	// We get the solid kmers collection 1) from the 'dsk' group  2) from the 'solid' collection
	Partition<Kmer<>::Count>& solidKmers = dskGroup.getPartition<Kmer<>::Count> ("solid");
	nbSolidKmers = solidKmers.getNbItems();
	if(nbSolidKmers==0){cout<<"No solid kmers in bank -- exit"<<endl;exit(0);}
	IteratorKmerH5Wrapper iteratorOnKmers (solidKmers.iterator());
	quasiDico = quasidictionaryKeyGeneric<IteratorKmerH5Wrapper, unsigned char> (nbSolidKmers, iteratorOnKmers, fingerprint_size, nbCores, gamma_value);
    
    cout<<"Empty quasi-ictionary memory usage (MB) = "<<System::info().getMemorySelfUsed()/1024<<endl;
    
    
	ProgressIterator<Kmer<>::Count> itKmers (solidKmers.iterator(), "Indexing solid kmers counts", nbSolidKmers);
	Dispatcher dispatcher (nbCores, 10000);
	dispatcher.iterate (itKmers, FunctorIndexer(quasiDico, kmer_size, keep_low_complexity));
    
    cout<<"Filled quasi-ictionary memory usage (MB) = "<<System::info().getMemorySelfUsed()/1024<<endl;
}