void wait_for_queue_entries_for_read_notify (gaspi_queue_id_t* queue_id) { gaspi_number_t queue_size_max; gaspi_number_t queue_size; gaspi_number_t queue_num; SUCCESS_OR_DIE (gaspi_queue_num (&queue_num)); *queue_id = (*queue_id + 1) % queue_num; SUCCESS_OR_DIE (gaspi_wait (*queue_id, GASPI_BLOCK)); }
void wait_for_flush_queues () { gaspi_number_t queue_num; SUCCESS_OR_DIE (gaspi_queue_num (&queue_num)); gaspi_queue_id_t queue = 0; while( queue < queue_num ) { SUCCESS_OR_DIE (gaspi_wait (queue, GASPI_BLOCK)); ++queue; } }
int main(int argc, char *argv[]) { SUCCESS_OR_DIE( gaspi_proc_init(GASPI_BLOCK) ); gaspi_rank_t rank; gaspi_rank_t num; SUCCESS_OR_DIE( gaspi_proc_rank(&rank) ); SUCCESS_OR_DIE( gaspi_proc_num(&num) ); gaspi_printf("Hello world from rank %d of %d\n",rank, num); SUCCESS_OR_DIE( gaspi_proc_term(GASPI_BLOCK) ); return EXIT_SUCCESS; }
static void wait_for_queue_entries (gaspi_queue_id_t* queue, int wanted_entries) { gaspi_number_t queue_size_max; gaspi_number_t queue_size; gaspi_number_t queue_num; SUCCESS_OR_DIE (gaspi_queue_size_max (&queue_size_max)); SUCCESS_OR_DIE (gaspi_queue_size (*queue, &queue_size)); SUCCESS_OR_DIE (gaspi_queue_num (&queue_num)); if (! (queue_size + wanted_entries <= queue_size_max)) { *queue = (*queue + 1) % queue_num; SUCCESS_OR_DIE (gaspi_wait (*queue, GASPI_BLOCK)); } }
void wait_or_die ( gaspi_segment_id_t segment_id , gaspi_notification_id_t notification_id , gaspi_notification_t expected ) { gaspi_notification_id_t id; SUCCESS_OR_DIE (gaspi_notify_waitsome (segment_id, notification_id, 1, &id, GASPI_BLOCK)); ASSERT (id == notification_id); gaspi_notification_t value; SUCCESS_OR_DIE (gaspi_notify_reset (segment_id, id, &value)); ASSERT (value == expected); }
int main(int argc, char *argv[]) { static const int VLEN = 1 << 2; SUCCESS_OR_DIE( gaspi_proc_init(GASPI_BLOCK) ); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE( gaspi_proc_rank(&iProc) ); SUCCESS_OR_DIE( gaspi_proc_num(&nProc) ); gaspi_segment_id_t const segment_id = 0; gaspi_size_t const segment_size = VLEN * sizeof (double); SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, segment_size , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED ) ); gaspi_pointer_t array; SUCCESS_OR_DIE( gaspi_segment_ptr (segment_id, &array) ); for (int j = 0; j < VLEN; ++j) { ( (double *)array )[j]= (double)( iProc * VLEN + j ); printf("rank %d elem %d: %f \n", iProc,j,( (double *)array )[j] ); } SUCCESS_OR_DIE( gaspi_proc_term(GASPI_BLOCK) ); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { SUCCESS_OR_DIE (gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE (gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE (gaspi_proc_num (&nProc)); // number of threads const int NTHREADS = 2; // number of buffers const int NWAY = 2; gaspi_segment_id_t const segment_id = 0; // allocate segment for array for local vector, left halo and right halo SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, NWAY * (NTHREADS + 2) * 2 * VLEN * sizeof (double) , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_pointer_t array; SUCCESS_OR_DIE ( gaspi_segment_ptr ( segment_id, &array) ); // initial buffer id int buffer_id = 0; // set notification values gaspi_notification_id_t left_data_available[NWAY]; gaspi_notification_id_t right_data_available[NWAY]; for (gaspi_notification_id_t id = 0; id < NWAY; ++id) { left_data_available[id] = id; right_data_available[id] = NWAY + id; } // set queue id gaspi_queue_id_t queue_id = 0; // initialize data data_init (NTHREADS, iProc, buffer_id, array); omp_set_num_threads (NTHREADS); double time = -now(); #pragma omp parallel default (shared) firstprivate (buffer_id) { const int tid = omp_get_thread_num(); for (int k = 0; k < NITER; ++k) { for ( int i = 0; i < nProc * NTHREADS; ++i ) { const int left_halo = 0; const int slice_id = tid + 1; const int right_halo = NTHREADS+1; if (tid == 0) { // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_left (buffer_id, left_halo + 1, 0), LEFT(iProc, nProc) , segment_id, array_OFFSET_left (buffer_id, right_halo, 0), VLEN * sizeof (double) , right_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_right (buffer_id, right_halo - 1, 0), RIGHT(iProc, nProc) , segment_id, array_OFFSET_right (buffer_id, left_halo, 0), VLEN * sizeof (double) , left_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // wait for data notification wait_or_die (segment_id, right_data_available[buffer_id], 1 + i); // wait for data notification wait_or_die (segment_id, left_data_available[buffer_id], 1 + i); } #pragma omp barrier // compute data, read from id "buffer_id", write to id "1 - buffer_id" data_compute ( NTHREADS, array, 1 - buffer_id, buffer_id, slice_id); #pragma omp barrier // alternate the buffer buffer_id = 1 - buffer_id; } } } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc * NTHREADS) % NWAY, array); printf ("# gaspi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { SUCCESS_OR_DIE (gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE (gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE (gaspi_proc_num (&nProc)); // number of threads const int NTHREADS = 2; // number of buffers const int NWAY = 2; // allocate segment for array for local vector, left halo and right halo gaspi_segment_id_t const segment_id = 0; SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, NWAY * (NTHREADS + 2) * 2 * VLEN * sizeof (double) , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_pointer_t array; SUCCESS_OR_DIE ( gaspi_segment_ptr ( segment_id, &array) ); // initial buffer id int buffer_id = 0; // set notification values gaspi_notification_id_t left_data_available[NWAY]; gaspi_notification_id_t right_data_available[NWAY]; for (gaspi_notification_id_t id = 0; id < NWAY; ++id) { left_data_available[id] = id; right_data_available[id] = NWAY + id; } // set queue id gaspi_queue_id_t queue_id = 0; // initialize slice data structures slice *ssl = (slice *) malloc (NTHREADS * sizeof (slice)); ASSERT (ssl); init_slices (ssl, NTHREADS); // initialize data data_init (NTHREADS,iProc, buffer_id, array); const int right_halo = NTHREADS+1; const int left_halo = 0; // issue initial write to left ngb wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_left (buffer_id, left_halo + 1, 0), LEFT(iProc, nProc) , segment_id, array_OFFSET_left (buffer_id, right_halo, 0), VLEN * sizeof (double) , right_data_available[buffer_id], 1, queue_id, GASPI_BLOCK)); // issue initial write to right ngb wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_right (buffer_id, right_halo - 1, 0), RIGHT(iProc, nProc) , segment_id, array_OFFSET_right (buffer_id, left_halo, 0), VLEN * sizeof (double) , left_data_available[buffer_id], 1, queue_id, GASPI_BLOCK)); // set total number of iterations per slice const int num = nProc * NTHREADS * NITER; omp_set_num_threads (NTHREADS); double time = -now(); #pragma omp parallel default (none) firstprivate (buffer_id, queue_id) \ shared (array, left_data_available, right_data_available, ssl, stderr) { slice* sl; while ((sl = get_slice_and_lock (ssl, NTHREADS, num))) { handle_slice ( sl, array, left_data_available, right_data_available , segment_id, queue_id, NWAY, NTHREADS, num); /* TODO ==== - Which functionality do we need in 'handle_slice' ? (asynchronous dataflow for 1-D halo-exchange) - Discuss. - Bonus question: Can we be at different iteration stages for left and right halo ? if yes: Why ? */ omp_unset_lock (&sl->lock); } #pragma omp barrier } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc * NTHREADS) % NWAY, array); printf ("# gaspi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { static const int VLEN = 1 << 2; SUCCESS_OR_DIE(gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE(gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE(gaspi_proc_num (&nProc)); gaspi_segment_id_t const segment_id = 0; gaspi_size_t const segment_size = 2 * VLEN * sizeof(double); SUCCESS_OR_DIE(gaspi_segment_create (segment_id, segment_size, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED )); gaspi_pointer_t array; SUCCESS_OR_DIE(gaspi_segment_ptr (segment_id, &array)); double * src_array = (double *) (array); double * rcv_array = src_array + VLEN; for (int j = 0; j < VLEN; ++j) { src_array[j] = (double) (iProc * VLEN + j); } gaspi_notification_id_t data_available = 0; gaspi_queue_id_t queue_id = 0; gaspi_offset_t loc_off = 0; gaspi_offset_t rem_off = VLEN * sizeof(double); /* TODO ==== - check queue. - gaspi_write_notify 1-dim array of size VLEN to right neighbour - wait for notification from left neighbour */ for (int j = 0; j < VLEN; ++j) { printf ("rank %d rcv elem %d: %f \n", iProc, j, rcv_array[j]); } /* TODO ==== - why do we have to wait for the queue here ? */ SUCCESS_OR_DIE(gaspi_wait (queue_id, GASPI_BLOCK)); SUCCESS_OR_DIE(gaspi_proc_term (GASPI_BLOCK)); return EXIT_SUCCESS; }