void mg_mpi(int* data, const unsigned long n, const int rank, const int size, const MPI_Comm comm) { // root tells each process how much data to expect. unsigned long partition_size; if(rank == 0) { partition_size = n / size; // TODO handle invalid multiplicity (eg 100 / 8 = 12.5) } MPI_Bcast( &partition_size, 1, MPI_UNSIGNED_LONG, 0, comm ); log(LOG_DEBUG, "part size: %u\n", partition_size); // allocate a partition of the correct size to work in. int* partition = (int*)malloc( partition_size * sizeof(int) ); // scatter data to the partitions MPI_Scatter( data, partition_size, MPI_INT, partition, partition_size, MPI_INT, 0, comm ); // clear initial data, to avoid confusion. if(rank == 0) { memset(data, 0, n); } // sort inividual pieces using mergesort sort_partition(&partition, partition_size); // gather results up the tree, merging 2 pieces at each step, until results // end up in the root. they are stored in data. collect_results(&data, n, &partition, &partition_size, rank, size, comm); // partition buffer is no longer needed so free it. free( partition ); }
void benchmark(struct benchmark_config *config) { int i; struct worker_info *producers; struct worker_info *consumers; struct work_queue queue_to_producer; struct work_queue queue_to_consumer; struct work_queue trash_queue; unsigned long long start, elapsed; work_queue_init(&queue_to_producer); work_queue_init(&queue_to_consumer); work_queue_init(&trash_queue); producers = create_workers(config, config->producer_thnum, config->producer, &queue_to_producer, &queue_to_consumer); consumers = create_workers(config, config->consumer_thnum, config->consumer, &queue_to_consumer, &trash_queue); start = stopwatch_start(); for (i = 0; i < config->num_works; i++) { struct work *work = xmalloc(sizeof(*work)); memset(work, 0, sizeof(*work)); work->seed = config->seed_offset + i; work_queue_push(&queue_to_producer, work); } work_queue_close(&queue_to_producer); join_workers(producers, config->producer_thnum); work_queue_close(&queue_to_consumer); join_workers(consumers, config->consumer_thnum); work_queue_close(&trash_queue); elapsed = stopwatch_stop(start); collect_results(config, &trash_queue, start, elapsed); destroy_workers(consumers, config->consumer_thnum); destroy_workers(producers, config->producer_thnum); work_queue_destroy(&queue_to_producer); work_queue_destroy(&queue_to_consumer); work_queue_destroy(&trash_queue); }
/// Driver routine for testing library mode with partitioned /// MPI_Comm. This test fixture requires MPI and can be run on 3--8 /// processors int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); // manage MPI split MPI_Comm my_comm; int color; manage_mpi(my_comm, color); // cleanup output files; avoid race condition std::remove("dakota.o.1"); std::remove("dakota.o.2"); // remove("dakota.o"); std::remove("dakota.e.1"); std::remove("dakota.e.2"); //remove("dakota.e"); #ifdef _WIN32 Sleep(1000); // milliseconds #else sleep(1); // seconds #endif // colors 1 and 2 run DAKOTA if (color != 0) { std::string input; gen_dakota_input(color, input); run_dakota(my_comm, input, color); } // ideally color 0 would do something concurrently... MPI_Barrier(MPI_COMM_WORLD); if (color == 0) collect_results(); // ideally color 0 would do something concurrently... MPI_Barrier(MPI_COMM_WORLD); MPI_Comm_free(&my_comm); MPI_Finalize(); return 0; }
mcbase::results_type mcbase::collect_results() const { return collect_results(result_names()); }