Пример #1
0
void *thr_writer(void *_count)
{
	unsigned long long *count = _count;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"writer", (unsigned long) pthread_self(),
			(unsigned long) gettid());

	set_affinity();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		pthread_rwlock_wrlock(&lock);
		test_array.a = 0;
		test_array.a = 8;
		if (caa_unlikely(wduration))
			loop_sleep(wduration);
		pthread_rwlock_unlock(&lock);
		URCU_TLS(nr_writes)++;
		if (caa_unlikely(!test_duration_write()))
			break;
		if (caa_unlikely(wdelay))
			loop_sleep(wdelay);
	}

	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
			"writer", (unsigned long) pthread_self(),
			(unsigned long) gettid());
	*count = URCU_TLS(nr_writes);
	return ((void*)2);
}
Пример #2
0
void *thr_writer(void *data)
{
	unsigned long wtidx = (unsigned long)data;

	printf_verbose("thread_begin %s, tid %lu\n",
			"writer", urcu_get_thread_id());

	set_affinity();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		pthread_mutex_lock(&lock);
		test_array.a = 0;
		test_array.a = 8;
		if (caa_unlikely(wduration))
			loop_sleep(wduration);
		pthread_mutex_unlock(&lock);
		URCU_TLS(nr_writes)++;
		if (caa_unlikely(!test_duration_write()))
			break;
		if (caa_unlikely(wdelay))
			loop_sleep(wdelay);
	}

	printf_verbose("thread_end %s, tid %lu\n",
			"writer", urcu_get_thread_id());
	tot_nr_writes[wtidx] = URCU_TLS(nr_writes);
	return ((void*)2);
}
Пример #3
0
void *thr_reader(void *data)
{
	unsigned long tidx = (unsigned long)data;

	printf_verbose("thread_begin %s, tid %lu\n",
			"reader", urcu_get_thread_id());

	set_affinity();

	while (!test_go)
	{
	}

	for (;;) {
		pthread_mutex_lock(&per_thread_lock[tidx].lock);
		assert(test_array.a == 8);
		if (caa_unlikely(rduration))
			loop_sleep(rduration);
		pthread_mutex_unlock(&per_thread_lock[tidx].lock);
		URCU_TLS(nr_reads)++;
		if (caa_unlikely(!test_duration_read()))
			break;
	}

	tot_nr_reads[tidx] = URCU_TLS(nr_reads);
	printf_verbose("thread_end %s, tid %lu\n",
			"reader", urcu_get_thread_id());
	return ((void*)1);

}
Пример #4
0
void *thr_reader(void *_count)
{
	unsigned long long *count = _count;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"reader", (unsigned long) pthread_self(),
			(unsigned long) gettid());

	set_affinity();

	while (!test_go)
	{
	}

	for (;;) {
		pthread_rwlock_rdlock(&lock);
		assert(test_array.a == 8);
		if (caa_unlikely(rduration))
			loop_sleep(rduration);
		pthread_rwlock_unlock(&lock);
		URCU_TLS(nr_reads)++;
		if (caa_unlikely(!test_duration_read()))
			break;
	}

	*count = URCU_TLS(nr_reads);
	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
			"reader", (unsigned long) pthread_self(),
			(unsigned long) gettid());
	return ((void*)1);

}
Пример #5
0
void *thr_reader(void *data)
{
	unsigned long tidx = (unsigned long)data;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"reader", pthread_self(), (unsigned long)gettid());

	set_affinity();

	while (!test_go)
	{
	}

	for (;;) {
		pthread_mutex_lock(&lock);
		assert(test_array.a == 8);
		if (unlikely(rduration))
			loop_sleep(rduration);
		pthread_mutex_unlock(&lock);
		nr_reads++;
		if (unlikely(!test_duration_read()))
			break;
	}

	tot_nr_reads[tidx] = nr_reads;
	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
			"reader", pthread_self(), (unsigned long)gettid());
	return ((void*)1);

}
Пример #6
0
void *test_hash_rw_thr_reader(void *_count)
{
	unsigned long long *count = _count;
	struct lfht_test_node *node;
	struct cds_lfht_iter iter;

	printf_verbose("thread_begin %s, tid %lu\n",
			"reader", urcu_get_thread_id());

	URCU_TLS(rand_lookup) = urcu_get_thread_id() ^ time(NULL);

	set_affinity();

	rcu_register_thread();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		rcu_read_lock();
		cds_lfht_test_lookup(test_ht,
			(void *)(((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % lookup_pool_size) + lookup_pool_offset),
			sizeof(void *), &iter);
		node = cds_lfht_iter_get_test_node(&iter);
		if (node == NULL) {
			if (validate_lookup) {
				printf("[ERROR] Lookup cannot find initial node.\n");
				exit(-1);
			}
			URCU_TLS(lookup_fail)++;
		} else {
			URCU_TLS(lookup_ok)++;
		}
		rcu_debug_yield_read();
		if (caa_unlikely(rduration))
			loop_sleep(rduration);
		rcu_read_unlock();
		URCU_TLS(nr_reads)++;
		if (caa_unlikely(!test_duration_read()))
			break;
		if (caa_unlikely((URCU_TLS(nr_reads) & ((1 << 10) - 1)) == 0))
			rcu_quiescent_state();
	}

	rcu_unregister_thread();

	*count = URCU_TLS(nr_reads);
	printf_verbose("thread_end %s, tid %lu\n",
			"reader", urcu_get_thread_id());
	printf_verbose("read tid : %lx, lookupfail %lu, lookupok %lu\n",
			urcu_get_thread_id(),
			URCU_TLS(lookup_fail),
			URCU_TLS(lookup_ok));
	return ((void*)1);

}
Пример #7
0
int main()
{
	cycles_t time1, time2;

	time1 = caa_get_cycles();
	loop_sleep(NR_LOOPS);
	time2 = caa_get_cycles();
	printf("CPU clock cycles per loop: %g\n", (time2 - time1) /
						  (double)NR_LOOPS);
	return 0;
}
Пример #8
0
void *thr_dequeuer(void *_count)
{
	unsigned long long *count = _count;
	int ret;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"dequeuer", pthread_self(), (unsigned long)gettid());

	set_affinity();

	ret = rcu_defer_register_thread();
	if (ret) {
		printf("Error in rcu_defer_register_thread\n");
		exit(-1);
	}
	rcu_register_thread();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		struct cds_lfq_node_rcu *qnode;
		struct test *node;

		rcu_read_lock();
		qnode = cds_lfq_dequeue_rcu(&q);
		node = caa_container_of(qnode, struct test, list);
		rcu_read_unlock();

		if (node) {
			call_rcu(&node->rcu, free_node_cb);
			nr_successful_dequeues++;
		}

		nr_dequeues++;
		if (caa_unlikely(!test_duration_dequeue()))
			break;
		if (caa_unlikely(rduration))
			loop_sleep(rduration);
	}

	rcu_unregister_thread();
	rcu_defer_unregister_thread();
	printf_verbose("dequeuer thread_end, thread id : %lx, tid %lu, "
		       "dequeues %llu, successful_dequeues %llu\n",
		       pthread_self(), (unsigned long)gettid(), nr_dequeues,
		       nr_successful_dequeues);
	count[0] = nr_dequeues;
	count[1] = nr_successful_dequeues;
	return ((void*)2);
}
void *thr_writer(void *data)
{
	unsigned long wtidx = (unsigned long)data;
	long tidx;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"writer", (unsigned long) pthread_self(),
			(unsigned long) gettid());

	set_affinity();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		for (tidx = 0; tidx < nr_readers; tidx++) {
			pthread_mutex_lock(&per_thread_lock[tidx].lock);
		}
		test_array.a = 0;
		test_array.a = 8;
		if (caa_unlikely(wduration))
			loop_sleep(wduration);
		for (tidx = (long)nr_readers - 1; tidx >= 0; tidx--) {
			pthread_mutex_unlock(&per_thread_lock[tidx].lock);
		}
		URCU_TLS(nr_writes)++;
		if (caa_unlikely(!test_duration_write()))
			break;
		if (caa_unlikely(wdelay))
			loop_sleep(wdelay);
	}

	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
			"writer", (unsigned long) pthread_self(),
			(unsigned long) gettid());
	tot_nr_writes[wtidx] = URCU_TLS(nr_writes);
	return ((void*)2);
}
Пример #10
0
static void *thr_dequeuer(void *_count)
{
	unsigned long long *count = _count;
	unsigned int counter = 0;

	printf_verbose("thread_begin %s, tid %lu\n",
			"dequeuer", urcu_get_thread_id());

	set_affinity();

	rcu_register_thread();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	assert(test_pop || test_pop_all);

	for (;;) {
		if (test_pop && test_pop_all) {
			/* both pop and pop all */
			if (counter & 1)
				do_test_pop(test_sync);
			else
				do_test_pop_all(test_sync);
			counter++;
		} else {
			if (test_pop)
				do_test_pop(test_sync);
			else
				do_test_pop_all(test_sync);
		}

		if (caa_unlikely(!test_duration_dequeue()))
			break;
		if (caa_unlikely(rduration))
			loop_sleep(rduration);
	}

	rcu_unregister_thread();

	printf_verbose("dequeuer thread_end, tid %lu, "
			"dequeues %llu, successful_dequeues %llu\n",
			urcu_get_thread_id(),
			URCU_TLS(nr_dequeues),
			URCU_TLS(nr_successful_dequeues));
	count[0] = URCU_TLS(nr_dequeues);
	count[1] = URCU_TLS(nr_successful_dequeues);
	return ((void*)2);
}
Пример #11
0
static void *thr_enqueuer(void *_count)
{
	unsigned long long *count = _count;
	bool was_nonempty;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"enqueuer", (unsigned long) pthread_self(),
			(unsigned long) gettid());

	set_affinity();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		struct cds_wfs_node *node = malloc(sizeof(*node));
		if (!node)
			goto fail;
		cds_wfs_node_init(node);
		was_nonempty = cds_wfs_push(&s, node);
		URCU_TLS(nr_successful_enqueues)++;
		if (!was_nonempty)
			URCU_TLS(nr_empty_dest_enqueues)++;

		if (caa_unlikely(wdelay))
			loop_sleep(wdelay);
fail:
		URCU_TLS(nr_enqueues)++;
		if (caa_unlikely(!test_duration_enqueue()))
			break;
	}

	uatomic_inc(&test_enqueue_stopped);
	count[0] = URCU_TLS(nr_enqueues);
	count[1] = URCU_TLS(nr_successful_enqueues);
	count[2] = URCU_TLS(nr_empty_dest_enqueues);
	printf_verbose("enqueuer thread_end, thread id : %lx, tid %lu, "
		       "enqueues %llu successful_enqueues %llu, "
		       "empty_dest_enqueues %llu\n",
		       pthread_self(),
			(unsigned long) gettid(),
		       URCU_TLS(nr_enqueues),
		       URCU_TLS(nr_successful_enqueues),
		       URCU_TLS(nr_empty_dest_enqueues));
	return ((void*)1);

}
Пример #12
0
void *thr_dequeuer(void *_count)
{
    unsigned long long *count = _count;

    printf_verbose("thread_begin %s, tid %lu\n",
                   "dequeuer", urcu_get_thread_id());

    set_affinity();

    rcu_register_thread();

    while (!test_go)
    {
    }
    cmm_smp_mb();

    for (;;) {
        struct cds_lfq_node_rcu *qnode;

        rcu_read_lock();
        qnode = cds_lfq_dequeue_rcu(&q);
        rcu_read_unlock();

        if (qnode) {
            struct test *node;

            node = caa_container_of(qnode, struct test, list);
            call_rcu(&node->rcu, free_node_cb);
            URCU_TLS(nr_successful_dequeues)++;
        }

        URCU_TLS(nr_dequeues)++;
        if (caa_unlikely(!test_duration_dequeue()))
            break;
        if (caa_unlikely(rduration))
            loop_sleep(rduration);
    }

    rcu_unregister_thread();
    printf_verbose("dequeuer thread_end, tid %lu, "
                   "dequeues %llu, successful_dequeues %llu\n",
                   urcu_get_thread_id(),
                   URCU_TLS(nr_dequeues),
                   URCU_TLS(nr_successful_dequeues));
    count[0] = URCU_TLS(nr_dequeues);
    count[1] = URCU_TLS(nr_successful_dequeues);
    return ((void*)2);
}
Пример #13
0
void *thr_enqueuer(void *_count)
{
    unsigned long long *count = _count;

    printf_verbose("thread_begin %s, tid %lu\n",
                   "enqueuer", urcu_get_thread_id());

    set_affinity();

    rcu_register_thread();

    while (!test_go)
    {
    }
    cmm_smp_mb();

    for (;;) {
        struct test *node = malloc(sizeof(*node));
        if (!node)
            goto fail;
        cds_lfq_node_init_rcu(&node->list);
        rcu_read_lock();
        cds_lfq_enqueue_rcu(&q, &node->list);
        rcu_read_unlock();
        URCU_TLS(nr_successful_enqueues)++;

        if (caa_unlikely(wdelay))
            loop_sleep(wdelay);
fail:
        URCU_TLS(nr_enqueues)++;
        if (caa_unlikely(!test_duration_enqueue()))
            break;
    }

    rcu_unregister_thread();

    count[0] = URCU_TLS(nr_enqueues);
    count[1] = URCU_TLS(nr_successful_enqueues);
    printf_verbose("enqueuer thread_end, tid %lu, "
                   "enqueues %llu successful_enqueues %llu\n",
                   urcu_get_thread_id(),
                   URCU_TLS(nr_enqueues),
                   URCU_TLS(nr_successful_enqueues));
    return ((void*)1);

}
Пример #14
0
int main(int argc, char **argv)
{
	unsigned long i;
	caa_cycles_t time1, time2;
	caa_cycles_t time_tot = 0;
	double cpl;

	for (i = 0; i < TESTS; i++) {
		time1 = caa_get_cycles();
		loop_sleep(LOOPS);
		time2 = caa_get_cycles();
		time_tot += time2 - time1;
	}
	cpl = ((double)time_tot) / (double)TESTS / (double)LOOPS;

	printf("CALIBRATION : %g cycles per loop\n", cpl);
	printf("time_tot = %llu, LOOPS = %d, TESTS = %d\n",
	       (unsigned long long) time_tot, LOOPS, TESTS);

	return 0;
}
Пример #15
0
void *thr_enqueuer(void *_count)
{
	unsigned long long *count = _count;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"enqueuer", pthread_self(), (unsigned long)gettid());

	set_affinity();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		struct cds_wfq_node *node = malloc(sizeof(*node));
		if (!node)
			goto fail;
		cds_wfq_node_init(node);
		cds_wfq_enqueue(&q, node);
		nr_successful_enqueues++;

		if (caa_unlikely(wdelay))
			loop_sleep(wdelay);
fail:
		nr_enqueues++;
		if (caa_unlikely(!test_duration_enqueue()))
			break;
	}

	count[0] = nr_enqueues;
	count[1] = nr_successful_enqueues;
	printf_verbose("enqueuer thread_end, thread id : %lx, tid %lu, "
		       "enqueues %llu successful_enqueues %llu\n",
		       pthread_self(), (unsigned long)gettid(), nr_enqueues,
		       nr_successful_enqueues);
	return ((void*)1);

}
Пример #16
0
void *thr_dequeuer(void *_count)
{
	unsigned long long *count = _count;

	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
			"dequeuer", (unsigned long) pthread_self(),
			(unsigned long) gettid());

	set_affinity();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		struct cds_wfq_node *node = cds_wfq_dequeue_blocking(&q);

		if (node) {
			free(node);
			URCU_TLS(nr_successful_dequeues)++;
		}

		URCU_TLS(nr_dequeues)++;
		if (caa_unlikely(!test_duration_dequeue()))
			break;
		if (caa_unlikely(rduration))
			loop_sleep(rduration);
	}

	printf_verbose("dequeuer thread_end, thread id : %lx, tid %lu, "
		       "dequeues %llu, successful_dequeues %llu\n",
		       pthread_self(),
			(unsigned long) gettid(),
		       URCU_TLS(nr_dequeues), URCU_TLS(nr_successful_dequeues));
	count[0] = URCU_TLS(nr_dequeues);
	count[1] = URCU_TLS(nr_successful_dequeues);
	return ((void*)2);
}
Пример #17
0
void *test_hash_rw_thr_writer(void *_count)
{
	struct lfht_test_node *node;
	struct cds_lfht_node *ret_node;
	struct cds_lfht_iter iter;
	struct wr_count *count = _count;
	int ret;

	printf_verbose("thread_begin %s, tid %lu\n",
			"writer", urcu_get_thread_id());

	URCU_TLS(rand_lookup) = urcu_get_thread_id() ^ time(NULL);

	set_affinity();

	rcu_register_thread();

	while (!test_go)
	{
	}
	cmm_smp_mb();

	for (;;) {
		if ((addremove == AR_ADD || add_only)
				|| (addremove == AR_RANDOM && rand_r(&URCU_TLS(rand_lookup)) & 1)) {
			node = malloc(sizeof(struct lfht_test_node));
			lfht_test_node_init(node,
				(void *)(((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % write_pool_size) + write_pool_offset),
				sizeof(void *));
			rcu_read_lock();
			if (add_unique) {
				ret_node = cds_lfht_add_unique(test_ht,
					test_hash(node->key, node->key_len, TEST_HASH_SEED),
					test_match, node->key, &node->node);
			} else {
				if (add_replace)
					ret_node = cds_lfht_add_replace(test_ht,
							test_hash(node->key, node->key_len, TEST_HASH_SEED),
							test_match, node->key, &node->node);
				else
					cds_lfht_add(test_ht,
						test_hash(node->key, node->key_len, TEST_HASH_SEED),
						&node->node);
			}
			rcu_read_unlock();
			if (add_unique && ret_node != &node->node) {
				free(node);
				URCU_TLS(nr_addexist)++;
			} else {
				if (add_replace && ret_node) {
					call_rcu(&to_test_node(ret_node)->head,
							free_node_cb);
					URCU_TLS(nr_addexist)++;
				} else {
					URCU_TLS(nr_add)++;
				}
			}
		} else {
			/* May delete */
			rcu_read_lock();
			cds_lfht_test_lookup(test_ht,
				(void *)(((unsigned long) rand_r(&URCU_TLS(rand_lookup)) % write_pool_size) + write_pool_offset),
				sizeof(void *), &iter);
			ret = cds_lfht_del(test_ht, cds_lfht_iter_get_node(&iter));
			rcu_read_unlock();
			if (ret == 0) {
				node = cds_lfht_iter_get_test_node(&iter);
				call_rcu(&node->head, free_node_cb);
				URCU_TLS(nr_del)++;
			} else
				URCU_TLS(nr_delnoent)++;
		}
#if 0
		//if (URCU_TLS(nr_writes) % 100000 == 0) {
		if (URCU_TLS(nr_writes) % 1000 == 0) {
			rcu_read_lock();
			if (rand_r(&URCU_TLS(rand_lookup)) & 1) {
				ht_resize(test_ht, 1);
			} else {
				ht_resize(test_ht, -1);
			}
			rcu_read_unlock();
		}
#endif //0
		URCU_TLS(nr_writes)++;
		if (caa_unlikely(!test_duration_write()))
			break;
		if (caa_unlikely(wdelay))
			loop_sleep(wdelay);
		if (caa_unlikely((URCU_TLS(nr_writes) & ((1 << 10) - 1)) == 0))
			rcu_quiescent_state();
	}

	rcu_unregister_thread();

	printf_verbose("thread_end %s, tid %lu\n",
			"writer", urcu_get_thread_id());
	printf_verbose("info tid %lu: nr_add %lu, nr_addexist %lu, nr_del %lu, "
			"nr_delnoent %lu\n", urcu_get_thread_id(),
			URCU_TLS(nr_add),
			URCU_TLS(nr_addexist),
			URCU_TLS(nr_del),
			URCU_TLS(nr_delnoent));
	count->update_ops = URCU_TLS(nr_writes);
	count->add = URCU_TLS(nr_add);
	count->add_exist = URCU_TLS(nr_addexist);
	count->remove = URCU_TLS(nr_del);
	return ((void*)2);
}
Пример #18
0
int main(int argc, char** argv)
{
  utils::mpi_world mpi_world(argc, argv);
  
  const int mpi_rank = MPI::COMM_WORLD.Get_rank();
  const int mpi_size = MPI::COMM_WORLD.Get_size();
  
  try {
    if (getoptions(argc, argv) != 0) 
      return 1;

    if (command.empty())
      throw std::runtime_error("no command?");

    typedef MapReduce map_reduce_type;
      
    typedef map_reduce_type::id_type         id_type;
    typedef map_reduce_type::value_type      value_type;
    typedef map_reduce_type::queue_type      queue_type;
    typedef map_reduce_type::queue_id_type   queue_id_type;
    typedef map_reduce_type::subprocess_type subprocess_type;

    typedef Mapper  mapper_type;
    typedef Reducer reducer_type;
    
    typedef Consumer consumer_type;
    typedef Merger   merger_type;
    
    if (mpi_rank == 0) {
      subprocess_type subprocess(command);
      
      queue_type    queue_is(mpi_size);
      queue_type    queue_send(1);
      queue_id_type queue_id;
      queue_type    queue_recv;
      
      const bool flush_output = (output_file == "-"
				 || (boost::filesystem::exists(output_file)
				     && ! boost::filesystem::is_regular_file(output_file)));
      
      utils::compress_istream is(input_file, 1024 * 1024);
      utils::compress_ostream os(output_file, 1024 * 1024 * (! flush_output));

      boost::thread consumer(consumer_type(queue_is, is));
      boost::thread merger(merger_type(queue_recv, os, mpi_size));

      boost::thread mapper(mapper_type(queue_send, queue_id, subprocess));
      boost::thread reducer(reducer_type(queue_recv, queue_id, subprocess));
      
      typedef utils::mpi_ostream        ostream_type;
      typedef utils::mpi_istream_simple istream_type;

      typedef boost::shared_ptr<ostream_type> ostream_ptr_type;
      typedef boost::shared_ptr<istream_type> istream_ptr_type;
      
      typedef std::vector<ostream_ptr_type, std::allocator<ostream_ptr_type> > ostream_ptr_set_type;
      typedef std::vector<istream_ptr_type, std::allocator<istream_ptr_type> > istream_ptr_set_type;
      
      ostream_ptr_set_type ostream(mpi_size);
      istream_ptr_set_type istream(mpi_size);
      
      for (int rank = 1; rank < mpi_size; ++ rank) {
	ostream[rank].reset(new ostream_type(rank, line_tag, 4096));
	istream[rank].reset(new istream_type(rank, line_tag, 4096));
      }
      
      std::string line;
      value_type value(0, std::string());
      value_type value_recv(0, std::string());
      
      int non_found_iter = 0;
      
      while (value.first != id_type(-1)) {
	bool found = false;
	
	for (int rank = 1; rank < mpi_size && value.first != id_type(-1); ++ rank)
	  if (ostream[rank]->test() && queue_is.pop(value, true) && value.first != id_type(-1)) {
	    ostream[rank]->write(utils::lexical_cast<std::string>(value.first) + ' ' + value.second);
	    
	    found = true;
	  }
	
	if (queue_send.empty() && queue_is.pop(value, true) && value.first != id_type(-1)) {
	  queue_send.push(value);
	  
	  found = true;
	}
	
	// reduce...
	for (int rank = 1; rank < mpi_size; ++ rank)
	  if (istream[rank] && istream[rank]->test()) {
	    if (istream[rank]->read(line)) {
	      tokenize(line, value_recv);
	      
	      queue_recv.push_swap(value_recv);
	    } else {
	      queue_recv.push(std::make_pair(id_type(-1), std::string()));
	      istream[rank].reset();
	    }
	    
	    found = true;
	  }
	
	non_found_iter = loop_sleep(found, non_found_iter);
      }
      
      bool terminated = false;
      
      for (;;) {
	bool found = false;
	
	if (! terminated && queue_send.push(std::make_pair(id_type(-1), std::string()), true)) {
	  terminated = true;
	  
	  found = true;
	}
	
	// termination...
	for (int rank = 1; rank < mpi_size; ++ rank)
	  if (ostream[rank] && ostream[rank]->test()) {
	    if (! ostream[rank]->terminated())
	      ostream[rank]->terminate();
	    else
	      ostream[rank].reset();
	    
	    found = true;
	  }
	
	// reduce...
	for (int rank = 1; rank < mpi_size; ++ rank)
	  if (istream[rank] && istream[rank]->test()) {
	    if (istream[rank]->read(line)) {
	      tokenize(line, value_recv);
	      
	      queue_recv.push_swap(value_recv);
	    } else {
	      queue_recv.push(std::make_pair(id_type(-1), std::string()));
	      istream[rank].reset();
	    }
	    
	    found = true;
	  }
	
	// termination condition!
	if (std::count(istream.begin(), istream.end(), istream_ptr_type()) == mpi_size
	    && std::count(ostream.begin(), ostream.end(), ostream_ptr_type()) == mpi_size
	    && terminated) break;
	
	non_found_iter = loop_sleep(found, non_found_iter);
      }
      
      mapper.join();
      reducer.join();
      consumer.join();
      merger.join();
    } else {
      subprocess_type subprocess(command);
      
      queue_type    queue_send(1);
      queue_id_type queue_id;
      queue_type    queue_recv;
      
      boost::thread mapper(mapper_type(queue_send, queue_id, subprocess));
      boost::thread reducer(reducer_type(queue_recv, queue_id, subprocess));

      typedef utils::mpi_istream        istream_type;
      typedef utils::mpi_ostream_simple ostream_type;
      
      boost::shared_ptr<istream_type> is(new istream_type(0, line_tag, 4096));
      boost::shared_ptr<ostream_type> os(new ostream_type(0, line_tag, 4096));
      
      std::string line;
      value_type value;

      bool terminated = false;
      
      int non_found_iter = 0;
      for (;;) {
	bool found = false;
	
	if (is && is->test() && queue_send.empty()) {
	  if (is->read(line))
	    tokenize(line, value);
	  else {
	    value.first  = id_type(-1);
	    value.second = std::string();
	    
	    is.reset();
	  }
	  
	  queue_send.push_swap(value);
	  
	  found = true;
	}
	
	if (! terminated) {
	  if (os && os->test() && queue_recv.pop_swap(value, true)) {
	    if (value.first == id_type(-1))
	      terminated = true;
	    else
	      os->write(utils::lexical_cast<std::string>(value.first) + ' ' + value.second);
	    
	    found = true;
	  }
	} else {
	  if (os && os->test()) {
	    if (! os->terminated())
	      os->terminate();
	    else
	      os.reset();
	    found = true;
	  }
	}
	
	if (! is && ! os) break;
	
	non_found_iter = loop_sleep(found, non_found_iter);
      }
      
      mapper.join();
      reducer.join();
    }
    
    // synchronize...
    if (mpi_rank == 0) {
      std::vector<MPI::Request, std::allocator<MPI::Request> > request_recv(mpi_size);
      std::vector<MPI::Request, std::allocator<MPI::Request> > request_send(mpi_size);
      std::vector<bool, std::allocator<bool> > terminated_recv(mpi_size, false);
      std::vector<bool, std::allocator<bool> > terminated_send(mpi_size, false);
    
      terminated_recv[0] = true;
      terminated_send[0] = true;
      for (int rank = 1; rank != mpi_size; ++ rank) {
	request_recv[rank] = MPI::COMM_WORLD.Irecv(0, 0, MPI::INT, rank, notify_tag);
	request_send[rank] = MPI::COMM_WORLD.Isend(0, 0, MPI::INT, rank, notify_tag);
      }
    
      int non_found_iter = 0;
      for (;;) {
	bool found = false;
      
	for (int rank = 1; rank != mpi_size; ++ rank)
	  if (! terminated_recv[rank] && request_recv[rank].Test()) {
	    terminated_recv[rank] = true;
	    found = true;
	  }
      
	for (int rank = 1; rank != mpi_size; ++ rank)
	  if (! terminated_send[rank] && request_send[rank].Test()) {
	    terminated_send[rank] = true;
	    found = true;
	  }
      
	if (std::count(terminated_send.begin(), terminated_send.end(), true) == mpi_size
	    && std::count(terminated_recv.begin(), terminated_recv.end(), true) == mpi_size) break;
      
	non_found_iter = loop_sleep(found, non_found_iter);
      }
    } else {
      MPI::Request request_send = MPI::COMM_WORLD.Isend(0, 0, MPI::INT, 0, notify_tag);
      MPI::Request request_recv = MPI::COMM_WORLD.Irecv(0, 0, MPI::INT, 0, notify_tag);
    
      bool terminated_send = false;
      bool terminated_recv = false;
    
      int non_found_iter = 0;
      for (;;) {
	bool found = false;
      
	if (! terminated_send && request_send.Test()) {
	  terminated_send = true;
	  found = true;
	}
      
	if (! terminated_recv && request_recv.Test()) {
	  terminated_recv = true;
	  found = true;
	}
      
	if (terminated_send && terminated_recv) break;
      
	non_found_iter = loop_sleep(found, non_found_iter);
      }
    }
  }
  catch (const std::exception& err) {
    std::cerr << "error: " << argv[0] << " "<< err.what() << std::endl;
    MPI::COMM_WORLD.Abort(1);
    return 1;
  }
  return 0;
}