int main(int argc, char *argv[]) { int ntimes = 10; gaspi_rank_t rank, nprocs; gaspi_notification_id_t id; gaspi_notification_t val; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT(gaspi_proc_num(&nprocs)); ASSERT (gaspi_proc_rank(&rank)); const gaspi_rank_t right = (rank + nprocs + 1) % nprocs; do { ASSERT (gaspi_segment_create(0, 1024, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); ASSERT( gaspi_write_notify(0, 0, right, 0, 0, 8, 0, 1, 0, GASPI_BLOCK) ); ASSERT( gaspi_wait( 0, GASPI_BLOCK) ); ASSERT(gaspi_notify_waitsome(0, 0, 1, &id, GASPI_BLOCK)); ASSERT( gaspi_notify_reset(0, id, &val)); ASSERT (gaspi_segment_delete(0)); ASSERT (gaspi_segment_create(0, 2048, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); ASSERT( gaspi_write_notify(0, 0, right, 0, 0, 8, 0, 1, 0, GASPI_BLOCK) ); ASSERT( gaspi_wait( 0, GASPI_BLOCK) ); ASSERT(gaspi_notify_waitsome(0, 0, 1, &id, GASPI_BLOCK)); ASSERT( gaspi_notify_reset(0, id, &val)); ASSERT (gaspi_segment_delete(0)); ntimes--; } while(ntimes > 0); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int i, iter; int ret = 0; gaspi_rank_t myrank, numranks; gaspi_size_t mem_size = 0UL, j; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_proc_num(&numranks)); if( numranks < 2 ) { return EXIT_SUCCESS; } mem_size = 2 * SLOT_SIZE * (numranks - 1); if(myrank == 0) { printf("Mem size: %lu (%.2f MB)\nProcs: %u Max Slot size %lu Iterations %d\n", mem_size, mem_size * 1.0f / 1024/ 1024, numranks, (gaspi_size_t) SLOT_SIZE, MAX_ITERATIONS); #ifdef WITH_SYNC printf("Using notifications only\n"); #endif } ASSERT (gaspi_segment_create(0, mem_size, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); gaspi_pointer_t _vptr; ASSERT (gaspi_segment_ptr(0, &_vptr)); float *mptr = (float *) _vptr; //generate random srand((unsigned)time(0)); srand48((unsigned) time(0)); gaspi_size_t cur_slot_size = SLOT_SIZE; for(cur_slot_size = SLOT_SIZE; cur_slot_size >= sizeof(float); cur_slot_size/=2) { if(myrank == 0) printf("===== Slot Size %lu ====\n", cur_slot_size); for(iter = 0; iter < MAX_ITERATIONS; iter++) { if(myrank == 0) printf("iteration %3d... ", iter); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); /* fill slots with randoms */ for(j = 0; j < (mem_size / sizeof(float) / 2); j++) { mptr[j]= drand48() + (myrank*1.0); } ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); gaspi_offset_t offset_in = 0, offset_out = mem_size / 2; /* rank 0 write to all others */ if(myrank == 0) { for (i = 1; i < numranks; i++) { offset_in = (i - 1) * cur_slot_size; #ifdef WITH_SYNC ASSERT (gaspi_write_notify(0, offset_in, i, 0, 0, cur_slot_size, 0, 1, 0, GASPI_BLOCK)); #else ASSERT (gaspi_write(0, offset_in, i, 0, 0, cur_slot_size, 0, GASPI_BLOCK)); #endif } ASSERT(gaspi_wait(0, GASPI_BLOCK)); } #ifdef WITH_SYNC else { gaspi_notification_id_t id; gaspi_notification_t val; ASSERT(gaspi_notify_waitsome(0, 0, 1, &id, GASPI_BLOCK)); ASSERT(gaspi_notify_reset(0, id, &val)); assert(val == 1); } #endif #ifndef WITH_SYNC ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); #endif /* other ranks all write back to 0 */ if(myrank != 0) { offset_in = 0; offset_out = (mem_size / 2) + (cur_slot_size * (myrank - 1)); #ifdef WITH_SYNC ASSERT (gaspi_write_notify(0, offset_in, 0, 0, offset_out, cur_slot_size, myrank, 1, 0, GASPI_BLOCK)); #else ASSERT (gaspi_write(0, offset_in, i, 0, offset_out, cur_slot_size, 0, GASPI_BLOCK)); #endif ASSERT(gaspi_wait(0, GASPI_BLOCK)); } #ifdef WITH_SYNC else { gaspi_notification_id_t id; gaspi_notification_t val; int notification_counter = 0; do { ASSERT(gaspi_notify_waitsome(0, 1, numranks - 1, &id, GASPI_BLOCK)); ASSERT(gaspi_notify_reset(0, id, &val)); assert(val == 1); notification_counter++; } while( notification_counter < numranks - 1); } #endif #ifndef WITH_SYNC ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); #endif if(myrank == 0) { /* check correctness */ float *in = (float *) _vptr; float *out = (float *) ((char *) _vptr + mem_size / 2); const gaspi_size_t total_elems = (cur_slot_size * (numranks - 1) / sizeof(float)); for(j = 0; j < total_elems; j++) { if(in[j] != out[j]) { printf("Different values at pos %lu: %f %f (iterations %d)\n", j, in[j], out[j], iter); ret = -1; goto end; } } printf("All fine!\n"); } } } end: ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return ret; }
int main(int argc, char *argv[]) { TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); gaspi_notification_id_t n=0; gaspi_rank_t rank, nprocs, i; const gaspi_segment_id_t seg_id = 0; gaspi_offset_t offset; gaspi_number_t queue_size; gaspi_number_t queue_max; ASSERT (gaspi_queue_size_max(&queue_max)); ASSERT(gaspi_proc_num(&nprocs)); ASSERT (gaspi_proc_rank(&rank)); ASSERT (gaspi_segment_create(seg_id, nprocs * sizeof(int), GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); offset = rank * sizeof(int); //set memory gaspi_pointer_t _vptr; ASSERT (gaspi_segment_ptr(0, &_vptr)); int *mem = (int *) _vptr; for(i = 0; i < nprocs; i++) { mem[i] = (int) rank; } ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); //go for(i = 0; i < nprocs; i++) { if (i == rank) continue; ASSERT (gaspi_queue_size(0, &queue_size)); if(queue_size > queue_max - 1) ASSERT (gaspi_wait(0, GASPI_BLOCK)); ASSERT (gaspi_write_notify( seg_id, offset, i, seg_id, offset, sizeof(int), (gaspi_notification_id_t) rank, 1, 0, GASPI_BLOCK)); } do { gaspi_notification_id_t id; ASSERT (gaspi_notify_waitsome(seg_id, 0, (gaspi_notification_id_t) nprocs , &id, GASPI_BLOCK)); gaspi_notification_t notification_val; ASSERT( gaspi_notify_reset(seg_id, id, ¬ification_val)); assert(notification_val == 1); n++; } while(n < (nprocs - 1)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { const int num_elems = 1024; TSUITE_INIT( argc, argv ); ASSERT( gaspi_proc_init(GASPI_BLOCK) ); gaspi_rank_t rank, nprocs; ASSERT( gaspi_proc_num(&nprocs) ); ASSERT( gaspi_proc_rank(&rank) ); const gaspi_rank_t left = (rank + nprocs - 1 ) % nprocs; const gaspi_rank_t right = (rank + nprocs + 1) % nprocs; /* Create and fill buffer */ int * const buf = (int *) malloc(num_elems * sizeof(int)); assert( buf != NULL); int i; for (i = 0; i < num_elems; i++) { buf[i] = rank; } ASSERT( gaspi_segment_use( 0, buf, num_elems * sizeof(int), GASPI_GROUP_ALL, GASPI_BLOCK, 0) ); ASSERT( gaspi_segment_create( 1, num_elems * sizeof(int), GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED) ); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); /* write data to neighbour ( from seg 0 to seg 1) */ ASSERT( gaspi_write_notify( 0, 0, right, 1, 0, num_elems * sizeof(int), 0, 1, 0, GASPI_BLOCK) ); gaspi_notification_id_t id; ASSERT( gaspi_notify_waitsome( 1, 0, 1, &id, GASPI_BLOCK ) ); ASSERT( gaspi_wait( 0, GASPI_BLOCK ) ); /* Check data as segment */ gaspi_pointer_t seg1_ptr; ASSERT( gaspi_segment_ptr( 1, &seg1_ptr ) ); int * recv_buf = (int *) seg1_ptr; for (i = 0; i < num_elems; i++) { assert(recv_buf[i] == left); } ASSERT( gaspi_segment_delete(0)); ASSERT( gaspi_segment_delete(1)); /* Check data in buffer */ for (i = 0; i < num_elems; i++) { assert(buf[i] == rank); } ASSERT( gaspi_barrier( GASPI_GROUP_ALL, GASPI_BLOCK ) ); ASSERT( gaspi_proc_term( GASPI_BLOCK ) ); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { gaspi_proc_init(GASPI_BLOCK); gaspi_rank_t myRank; gaspi_rank_t nProc; gaspi_proc_rank(&myRank); gaspi_proc_num(&nProc); if(nProc < 2) goto end; gaspi_number_t queue_size; gaspi_number_t queue_max; gaspi_queue_size_max(&queue_max); if (myRank == 0) gaspi_printf("Queue max is %d\n", queue_max); gaspi_printf("Rank %i of %i started.\n", myRank, nProc); const gaspi_segment_id_t segment_id = 0; const gaspi_size_t nrReads = NR_OF_READS; gaspi_group_commit(GASPI_GROUP_ALL,GASPI_BLOCK); gaspi_segment_create(segment_id, nrReads * (RAWREADLENGTH) * sizeof(gaspi_char),GASPI_GROUP_ALL,GASPI_BLOCK,GASPI_ALLOC_DEFAULT); gaspi_pointer_t _vptr; //pointer to the segment if(gaspi_segment_ptr(segment_id, &_vptr) != GASPI_SUCCESS) printf("gaspi_segment_ptr failed\n"); gaspi_char * shared_ptr = (gaspi_char *) _vptr; // initialize and print segment initReads(shared_ptr, nrReads, READLENGTH, myRank); gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK); //push the reads from the master to the slaves gaspi_size_t r = 0; int rawReadSize = RAWREADLENGTH * sizeof(gaspi_char); int nrWorkers = nProc - 1; int toRank; gaspi_notification_id_t notif_id; if (myRank == 0) { for (r = 0; r < nrReads; r++) { gaspi_queue_size(0, &queue_size); if(queue_size > queue_max - 1) gaspi_wait(0, GASPI_BLOCK); //wait for queue to become free again... (note: max is 1024) toRank = (r % nrWorkers) + 1; // notif_id = r + 1; notif_id = ((r / nrWorkers) + 1); if ( gaspi_write_notify( segment_id, // from segment r*rawReadSize, // from offset toRank, // to-rank segment_id, // to segment // ((int)(r/nrWorkers))*rawReadSize, // to-offset r * rawReadSize, rawReadSize, // size notif_id, // notification id r+1, // notification value (> 0!) (gaspi_queue_id_t) 0, // notification queue GASPI_BLOCK) == GASPI_SUCCESS) // block until written gaspi_printf("Sending read %d from %d to rank %d with id %d\n", r, myRank, toRank, notif_id); if (toRank == 2) print_read(shared_ptr, r, READLENGTH, myRank); } } //ranks receive reads from the master rank if (myRank != 0) { gaspi_notification_id_t fid; gaspi_notification_t notification_value; int nrOfReceives = (int)(nrReads / (nProc-1)); if (myRank <= nrReads % nrWorkers) nrOfReceives++; gaspi_printf("Rank %d -- listening for %d events...\n", myRank, nrOfReceives); int complete = 0; while (complete < nrOfReceives) { if(gaspi_notify_waitsome( segment_id, // segment 1, // id of first notification to wait for // nrReads, nrOfReceives, // id of last notification to wait for (alternative) &fid, // identifier (output parameter with the identifier of a received notification (?)) GASPI_TEST // immediately return (GASPI_TEST) ) == GASPI_SUCCESS) { if(gaspi_notify_reset( segment_id, // segment fid, // notification identifier ¬ification_value // notification value ) == GASPI_SUCCESS) { complete++ ; gaspi_printf("Rank %d -- got notification: read %d received (%d completed)\n", myRank, notification_value-1, complete); if (myRank == 2) print_read(shared_ptr, notification_value-1, READLENGTH, myRank); } } } } // all values received ! print ! gaspi_barrier(GASPI_GROUP_ALL,GASPI_BLOCK); gaspi_printf("Printing reads\n"); print_char_array_segment(shared_ptr, nrReads, READLENGTH, myRank); // print_read(shared_ptr, 0, READLENGTH, myRank); gaspi_barrier(GASPI_GROUP_ALL,GASPI_BLOCK); gaspi_printf("Rank %d done\n", myRank); //block and exit end: gaspi_barrier(GASPI_GROUP_ALL,GASPI_BLOCK); gaspi_proc_term(GASPI_BLOCK); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); gaspi_rank_t numranks, myrank; ASSERT (gaspi_proc_num(&numranks)); ASSERT (gaspi_proc_rank(&myrank)); int rankSend = (myrank + 1) % numranks; ASSERT(gaspi_segment_create(0, _1MB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); gaspi_size_t segSize; ASSERT( gaspi_segment_size(0, myrank, &segSize)); unsigned char * pGlbMem; gaspi_pointer_t _vptr; ASSERT(gaspi_segment_ptr(0, &_vptr)); pGlbMem = ( unsigned char *) _vptr; gaspi_number_t queueSize, qmax ; ASSERT (gaspi_queue_size_max(&qmax)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); const unsigned long localOff = 0; const unsigned long remOff = 0; /* write_notify */ do { ASSERT(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); /* write */ do { ASSERT(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); /* write + write_notify */ do { ASSERT(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax - 1); EXPECT_FAIL_WITH(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK)); /* read */ do { ASSERT(gaspi_read(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH(gaspi_read(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT(gaspi_read(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); /* write_list_notify */ { const gaspi_number_t nListElems = 255; gaspi_number_t n; gaspi_segment_id_t localSegs[nListElems]; gaspi_offset_t localOffs[nListElems]; const gaspi_rank_t rank2send = (myrank + 1) % numranks; gaspi_segment_id_t remSegs[nListElems]; gaspi_offset_t remOffs[nListElems]; gaspi_size_t sizes[nListElems]; const unsigned int bytes = sizeof(int); gaspi_offset_t initLocOff = 0; gaspi_offset_t initRemOff = (bytes * nListElems + 64); for(n = 0; n < nListElems; n++) { sizes[n] = bytes; localSegs[n] = 0; localOffs[n] = initLocOff; initLocOff += bytes; remSegs[n] = 0; remOffs[n] = initRemOff; initRemOff += bytes; } do { ASSERT( gaspi_write_list_notify( nListElems, localSegs, localOffs, rank2send, remSegs, remOffs, sizes, 0, myrank, 1, 0, GASPI_BLOCK)); gaspi_queue_size(0, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH( gaspi_write_list_notify( nListElems, localSegs, localOffs, rank2send, remSegs, remOffs, sizes, 0, myrank, 1, 0, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(0, GASPI_BLOCK)); ASSERT( gaspi_write_list_notify( nListElems, localSegs, localOffs, rank2send, remSegs, remOffs, sizes, 0, myrank, 1, 0, GASPI_BLOCK)); } ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { SUCCESS_OR_DIE (gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE (gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE (gaspi_proc_num (&nProc)); // number of threads const int NTHREADS = 2; // number of buffers const int NWAY = 2; gaspi_segment_id_t const segment_id = 0; // allocate segment for array for local vector, left halo and right halo SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, NWAY * (NTHREADS + 2) * 2 * VLEN * sizeof (double) , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_pointer_t array; SUCCESS_OR_DIE ( gaspi_segment_ptr ( segment_id, &array) ); // initial buffer id int buffer_id = 0; // set notification values gaspi_notification_id_t left_data_available[NWAY]; gaspi_notification_id_t right_data_available[NWAY]; for (gaspi_notification_id_t id = 0; id < NWAY; ++id) { left_data_available[id] = id; right_data_available[id] = NWAY + id; } // set queue id gaspi_queue_id_t queue_id = 0; // initialize data data_init (NTHREADS, iProc, buffer_id, array); omp_set_num_threads (NTHREADS); double time = -now(); #pragma omp parallel default (shared) firstprivate (buffer_id) { const int tid = omp_get_thread_num(); for (int k = 0; k < NITER; ++k) { for ( int i = 0; i < nProc * NTHREADS; ++i ) { const int left_halo = 0; const int slice_id = tid + 1; const int right_halo = NTHREADS+1; if (tid == 0) { // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_left (buffer_id, left_halo + 1, 0), LEFT(iProc, nProc) , segment_id, array_OFFSET_left (buffer_id, right_halo, 0), VLEN * sizeof (double) , right_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // issue write wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_right (buffer_id, right_halo - 1, 0), RIGHT(iProc, nProc) , segment_id, array_OFFSET_right (buffer_id, left_halo, 0), VLEN * sizeof (double) , left_data_available[buffer_id], 1 + i, queue_id, GASPI_BLOCK)); // wait for data notification wait_or_die (segment_id, right_data_available[buffer_id], 1 + i); // wait for data notification wait_or_die (segment_id, left_data_available[buffer_id], 1 + i); } #pragma omp barrier // compute data, read from id "buffer_id", write to id "1 - buffer_id" data_compute ( NTHREADS, array, 1 - buffer_id, buffer_id, slice_id); #pragma omp barrier // alternate the buffer buffer_id = 1 - buffer_id; } } } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc * NTHREADS) % NWAY, array); printf ("# gaspi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { SUCCESS_OR_DIE (gaspi_proc_init (GASPI_BLOCK)); gaspi_rank_t iProc, nProc; SUCCESS_OR_DIE (gaspi_proc_rank (&iProc)); SUCCESS_OR_DIE (gaspi_proc_num (&nProc)); // number of threads const int NTHREADS = 2; // number of buffers const int NWAY = 2; // allocate segment for array for local vector, left halo and right halo gaspi_segment_id_t const segment_id = 0; SUCCESS_OR_DIE ( gaspi_segment_create ( segment_id, NWAY * (NTHREADS + 2) * 2 * VLEN * sizeof (double) , GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_pointer_t array; SUCCESS_OR_DIE ( gaspi_segment_ptr ( segment_id, &array) ); // initial buffer id int buffer_id = 0; // set notification values gaspi_notification_id_t left_data_available[NWAY]; gaspi_notification_id_t right_data_available[NWAY]; for (gaspi_notification_id_t id = 0; id < NWAY; ++id) { left_data_available[id] = id; right_data_available[id] = NWAY + id; } // set queue id gaspi_queue_id_t queue_id = 0; // initialize slice data structures slice *ssl = (slice *) malloc (NTHREADS * sizeof (slice)); ASSERT (ssl); init_slices (ssl, NTHREADS); // initialize data data_init (NTHREADS,iProc, buffer_id, array); const int right_halo = NTHREADS+1; const int left_halo = 0; // issue initial write to left ngb wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_left (buffer_id, left_halo + 1, 0), LEFT(iProc, nProc) , segment_id, array_OFFSET_left (buffer_id, right_halo, 0), VLEN * sizeof (double) , right_data_available[buffer_id], 1, queue_id, GASPI_BLOCK)); // issue initial write to right ngb wait_for_queue_max_half (&queue_id); SUCCESS_OR_DIE ( gaspi_write_notify ( segment_id, array_OFFSET_right (buffer_id, right_halo - 1, 0), RIGHT(iProc, nProc) , segment_id, array_OFFSET_right (buffer_id, left_halo, 0), VLEN * sizeof (double) , left_data_available[buffer_id], 1, queue_id, GASPI_BLOCK)); // set total number of iterations per slice const int num = nProc * NTHREADS * NITER; omp_set_num_threads (NTHREADS); double time = -now(); #pragma omp parallel default (none) firstprivate (buffer_id, queue_id) \ shared (array, left_data_available, right_data_available, ssl, stderr) { slice* sl; while ((sl = get_slice_and_lock (ssl, NTHREADS, num))) { handle_slice ( sl, array, left_data_available, right_data_available , segment_id, queue_id, NWAY, NTHREADS, num); /* TODO ==== - Which functionality do we need in 'handle_slice' ? (asynchronous dataflow for 1-D halo-exchange) - Discuss. - Bonus question: Can we be at different iteration stages for left and right halo ? if yes: Why ? */ omp_unset_lock (&sl->lock); } #pragma omp barrier } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc * NTHREADS) % NWAY, array); printf ("# gaspi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }
gaspi_return_t pgaspi_gpu_write_notify(const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { if(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId < 0 || size <= GASPI_GPU_DIRECT_MAX ) { return gaspi_write_notify(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size,notification_id, notification_value, queue, timeout_ms); } if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; char *host_ptr = (char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].host_ptr+NOTIFY_OFFSET+offset_local); char* device_ptr =(char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr+offset_local); gaspi_gpu* agpu = _gaspi_find_gpu(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId); if( !agpu ) { gaspi_print_error("No GPU found or not initialized (gaspi_init_GPUs)."); unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } int copy_size = 0; int gpu_offset = 0; int size_left = size; int BLOCK_SIZE= GASPI_GPU_BUFFERED; const gaspi_cycles_t s0 = gaspi_get_cycles (); while(size_left > 0) { int i; for(i = 0; i < GASPI_CUDA_EVENTS; i++) { if(size_left > BLOCK_SIZE) copy_size = BLOCK_SIZE; else copy_size = size_left; if(cudaMemcpyAsync(host_ptr+gpu_offset, device_ptr + gpu_offset, copy_size, cudaMemcpyDeviceToHost, agpu->streams[queue])) { unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } glb_gaspi_ctx.ne_count_c[queue]++; agpu->events[queue][i].segment_remote = segment_id_remote; agpu->events[queue][i].segment_local = segment_id_local; agpu->events[queue][i].size = copy_size; agpu->events[queue][i].rank = rank; agpu->events[queue][i].offset_local = offset_local+gpu_offset; agpu->events[queue][i].offset_remote = offset_remote+gpu_offset; agpu->events[queue][i].in_use = 1; cudaError_t err = cudaEventRecord(agpu->events[queue][i].event,agpu->streams[queue]); if(err != cudaSuccess) { unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } /* Thats not beautiful at all, however, else we have a overflow soon in the queue */ if(agpu->events[queue][i].ib_use) { struct ibv_wc wc; int ne; do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqC[queue], 1, &wc); glb_gaspi_ctx.ne_count_c[queue] -= ne; if (ne == 0) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } } while(ne == 0); agpu->events[queue][i].ib_use = 0; } gpu_offset += copy_size; size_left -= copy_size; if(size_left == 0) break; } for(i = 0; i < GASPI_CUDA_EVENTS; i++) { cudaError_t error; if (agpu->events[queue][i].in_use == 1 ) { do { error = cudaEventQuery(agpu->events[queue][i].event ); if( cudaSuccess == error ) { if (_gaspi_event_send(&agpu->events[queue][i],queue) ) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } agpu->events[queue][i].in_use = 0; } else if(error == cudaErrorNotReady) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } else { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } } while(error != cudaSuccess); } } } struct ibv_send_wr *bad_wr; struct ibv_sge slistN; struct ibv_send_wr swrN; slistN.addr = (uintptr_t)(glb_gaspi_ctx.nsrc.buf + notification_id * sizeof(gaspi_notification_id_t)); *((unsigned int *) slistN.addr) = notification_value; slistN.length = sizeof(gaspi_notification_id_t); slistN.lkey =((struct ibv_mr *) glb_gaspi_ctx.nsrc.mr)->lkey; if((glb_gaspi_ctx.rrmd[segment_id_remote][rank].cudaDevId >= 0)) { swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_addr + notification_id * sizeof(gaspi_notification_id_t)); swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_rkey; } else { swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr + notification_id * sizeof(gaspi_notification_id_t)); swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].rkey; } swrN.sg_list = &slistN; swrN.num_sge = 1; swrN.wr_id = rank; swrN.opcode = IBV_WR_RDMA_WRITE; swrN.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;; swrN.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpC[queue][rank], &swrN, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } glb_gaspi_ctx.ne_count_c[queue]++; unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_SUCCESS; }
int main(int argc, char *argv[]) { TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); gaspi_rank_t numranks, myrank; ASSERT (gaspi_proc_num(&numranks)); ASSERT (gaspi_proc_rank(&myrank)); int rankSend = (myrank + 1) % numranks; gaspi_printf("Seg size: %lu MB\n", _2GB / 1024 / 1024); ASSERT(gaspi_segment_create(0, _2GB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); gaspi_size_t segSize; ASSERT( gaspi_segment_size(0, myrank, &segSize)); unsigned char * pGlbMem; gaspi_pointer_t _vptr; ASSERT(gaspi_segment_ptr(0, &_vptr)); pGlbMem = ( unsigned char *) _vptr; gaspi_number_t qmax ; ASSERT (gaspi_queue_size_max(&qmax)); unsigned long i; unsigned long size = 1800; for(i = 0; i < size / sizeof(unsigned char); i++) pGlbMem[i] = myrank; gaspi_printf("Queue max: %lu\n", qmax); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); unsigned long localOff = 0; unsigned long remOff = size; ASSERT(gaspi_write_notify(0, localOff, rankSend, 0, remOff, size, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK)); gaspi_rank_t rankGet = (myrank + numranks - 1) % numranks; gaspi_notification_t got_val; gaspi_notification_id_t got; ASSERT(gaspi_notify_waitsome(0, (gaspi_notification_id_t) rankGet, 1, &got, GASPI_BLOCK)); ASSERT(gaspi_notify_reset(0, got, &got_val)); ASSERT (gaspi_wait(1, GASPI_BLOCK)); /* check */ for(i = size; i < 2 * size / sizeof(unsigned char); i++) assert(pGlbMem[i] == rankGet); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); gaspi_rank_t rank, nprocs, i; const gaspi_segment_id_t seg_id = 0; const gaspi_offset_t offset = 0; const gaspi_size_t transfer_size = 8192; gaspi_number_t queue_size; gaspi_number_t queue_max; ASSERT (gaspi_queue_size_max(&queue_max)); ASSERT(gaspi_proc_num(&nprocs)); ASSERT (gaspi_proc_rank(&rank)); if( nprocs < 2 ) { return EXIT_SUCCESS; } ASSERT (gaspi_segment_create(seg_id, nprocs * 2 * transfer_size, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_UNINITIALIZED)); gaspi_number_t max_notifications; ASSERT(gaspi_notification_num(&max_notifications)); gaspi_number_t avail_notifications = max_notifications / nprocs; max_notifications = avail_notifications * nprocs; gaspi_pointer_t _vptr; ASSERT (gaspi_segment_ptr(0, &_vptr)); int *mem = (int *) _vptr; for(i = 0; i < nprocs; i++) { mem[i] = (int) rank; } ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); for(i = 0; i < nprocs; i++) { gaspi_notification_id_t not; for(not = 0; not < avail_notifications; not++) { ASSERT (gaspi_queue_size(0, &queue_size)); if( queue_size > queue_max - 1 ) { ASSERT (gaspi_wait(0, GASPI_BLOCK)); } gaspi_notification_id_t the_notification = (gaspi_notification_id_t) (rank * avail_notifications + not); ASSERT( gaspi_write_notify( seg_id, offset, i, seg_id, offset, transfer_size, the_notification, 1, 0, GASPI_BLOCK)); } } gaspi_notification_id_t n = 0; do { gaspi_notification_id_t id; ASSERT (gaspi_notify_waitsome(seg_id, 0, max_notifications - 1 , &id, GASPI_BLOCK)); gaspi_notification_t notification_val; ASSERT( gaspi_notify_reset(seg_id, id, ¬ification_val)); assert(notification_val == 1); n++; } while(n < max_notifications - 1); ASSERT(gaspi_wait(0, GASPI_BLOCK)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }