int main (int argc, char *argv[]) { MPI_Init (&argc, &argv); int nProc, iProc; MPI_Comm_rank (MPI_COMM_WORLD, &iProc); MPI_Comm_size (MPI_COMM_WORLD, &nProc); // number of threads const int NTHREADS = 6; // number of buffers const int NWAY = 2; // left neighbour const int left = LEFT(iProc, nProc); // right neighbour const int right = RIGHT(iProc, nProc); // allocate array of for local vector, left halo and right halo double* array = malloc (NWAY * (NTHREADS+2) * 2 * VLEN * sizeof (double)); ASSERT (array != 0); // initial buffer id int buffer_id = 0; // initialize data data_init (NTHREADS, iProc, buffer_id, array); omp_set_num_threads (NTHREADS); MPI_Barrier (MPI_COMM_WORLD); double time = -now(); #pragma omp parallel default (shared) firstprivate (buffer_id) { const int tid = omp_get_thread_num(); for (int k = 0; k < NITER; ++k) { for ( int i = 0; i < nProc * NTHREADS; ++i ) { const int slice_id = tid + 1; const int left_halo = 0; const int right_halo = NTHREADS+1; if (tid == 0) { MPI_Request send_req[2]; MPI_Request recv_req[2]; // post recv MPI_Irecv ( &array_ELEM_right (buffer_id, left_halo, 0), VLEN, MPI_DOUBLE , left, i, MPI_COMM_WORLD, &recv_req[0]); // post recv MPI_Irecv ( &array_ELEM_left (buffer_id, right_halo, 0), VLEN, MPI_DOUBLE , right, i, MPI_COMM_WORLD, &recv_req[1]); // issue send MPI_Isend ( &array_ELEM_right (buffer_id, right_halo - 1, 0), VLEN, MPI_DOUBLE , right, i, MPI_COMM_WORLD, &send_req[0]); // issue send MPI_Isend ( &array_ELEM_left (buffer_id, left_halo + 1, 0), VLEN, MPI_DOUBLE , left, i, MPI_COMM_WORLD, &send_req[1]); // free send request MPI_Request_free(&send_req[0]); MPI_Request_free(&send_req[1]); // wait for Irecv, Isend MPI_Waitall (2, recv_req, MPI_STATUSES_IGNORE); } #pragma omp barrier // compute data, read from id "buffer_id", write to id "1 - buffer_id" data_compute (NTHREADS, array, 1 - buffer_id, buffer_id, slice_id); #pragma omp barrier // alternate the buffer buffer_id = 1 - buffer_id; } } } time += now(); data_verify (NTHREADS, iProc, ( NITER * nProc * NTHREADS ) % NWAY, array); printf ("# mpi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); MPI_Finalize(); free (array); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { SUCCESS_OR_DIE (gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE (gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE (gaspi_proc_num (&nProc)); // number of threads const int NTHREADS = 2; // number of buffers const int NWAY = 2; gaspi_segment_id_t const segment_id = 0; // allocate segment for array for local vector, left halo and right halo SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, NWAY * (NTHREADS + 2) * 2 * VLEN * sizeof (double) , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_pointer_t array; SUCCESS_OR_DIE ( gaspi_segment_ptr ( segment_id, &array) ); // initial buffer id int buffer_id = 0; // set notification values gaspi_notification_id_t left_data_available[NWAY]; gaspi_notification_id_t right_data_available[NWAY]; for (gaspi_notification_id_t id = 0; id < NWAY; ++id) { left_data_available[id] = id; right_data_available[id] = NWAY + id; } // set queue id gaspi_queue_id_t queue_id = 0; // initialize data data_init (NTHREADS, iProc, buffer_id, array); omp_set_num_threads (NTHREADS); double time = -now(); #pragma omp parallel default (shared) firstprivate (buffer_id) { const int tid = omp_get_thread_num(); for (int k = 0; k < NITER; ++k) { for ( int i = 0; i < nProc * NTHREADS; ++i ) { const int left_halo = 0; const int slice_id = tid + 1; const int right_halo = NTHREADS+1; if (tid == 0) { // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_left (buffer_id, left_halo + 1, 0), LEFT(iProc, nProc) , segment_id, array_OFFSET_left (buffer_id, right_halo, 0), VLEN * sizeof (double) , right_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_right (buffer_id, right_halo - 1, 0), RIGHT(iProc, nProc) , segment_id, array_OFFSET_right (buffer_id, left_halo, 0), VLEN * sizeof (double) , left_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // wait for data notification wait_or_die (segment_id, right_data_available[buffer_id], 1 + i); // wait for data notification wait_or_die (segment_id, left_data_available[buffer_id], 1 + i); } #pragma omp barrier // compute data, read from id "buffer_id", write to id "1 - buffer_id" data_compute ( NTHREADS, array, 1 - buffer_id, buffer_id, slice_id); #pragma omp barrier // alternate the buffer buffer_id = 1 - buffer_id; } } } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc * NTHREADS) % NWAY, array); printf ("# gaspi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }