int main(int argc, char *argv[]) { unsigned long i; gaspi_pointer_t _vptr; gaspi_rank_t num_ranks, myrank; gaspi_number_t qmax ; gaspi_number_t queueSize; gaspi_rank_t left_rank, right_rank; const unsigned long N = (1 << 13); TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_proc_num(&num_ranks)); ASSERT (gaspi_proc_rank(&myrank)); ASSERT(gaspi_segment_create(0, _2MB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); ASSERT(gaspi_segment_ptr(0, &_vptr)); ASSERT (gaspi_queue_size_max(&qmax)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); left_rank = (myrank + num_ranks - 1 ) % num_ranks; right_rank = (myrank + num_ranks + 1) % num_ranks; ASSERT( gaspi_write(0, //seg 0, //local off left_rank, //rank 0, //seg rem 0, //remote off 1, //size 32KB 0, //queue GASPI_BLOCK)); ASSERT( gaspi_write(0, //seg 0, //local off right_rank, //rank 0, //seg rem 0, //remote off 1, //size 32KB 0, //queue GASPI_BLOCK)); ASSERT (gaspi_wait(0, GASPI_BLOCK)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, 5000)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); printf("Rank %d: Finish\n", myrank); fflush(stdout); return EXIT_SUCCESS; }
void work(int tid) { gaspi_rank_t rankSend; gaspi_offset_t localOff= 0; gaspi_offset_t remOff = 0; gaspi_number_t queueSize, qmax; gaspi_size_t commSize ; ASSERT (gaspi_queue_size_max(&qmax)); for(commSize= 1; commSize < _500MB; commSize*=2 ) for(rankSend = 0; rankSend < numranks; rankSend++) { gaspi_printf("thread %d rank to send: %d - %lu bytes\n", tid, rankSend, commSize); gaspi_queue_size(1, &queueSize); if (queueSize > qmax - 100) ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_write(0, localOff, rankSend, 0, remOff, commSize, 1, GASPI_BLOCK)); } ASSERT (gaspi_wait(1, GASPI_BLOCK)); gaspi_threads_sync(); }
void work(int tid) { gaspi_rank_t rankSend; gaspi_offset_t localOff = 81478066; gaspi_offset_t remOff = 81478246; gaspi_offset_t size = 1800; gaspi_number_t queueSize, qmax; ASSERT (gaspi_queue_size_max(&qmax)); for(rankSend = 0; rankSend < numranks; rankSend++) { gaspi_printf("thread %d rank to send: %d\n", tid, rankSend); gaspi_queue_size(1, &queueSize); if (queueSize > qmax - 24) ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_write(0, localOff, rankSend, 0, remOff, size, 1, GASPI_BLOCK)); } ASSERT (gaspi_wait(1, GASPI_BLOCK)); gaspi_threads_sync(); }
int main(int argc, char *argv[]) { gaspi_rank_t numranks, myrank; gaspi_rank_t rankSend; gaspi_size_t segSize; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_proc_num(&numranks)); ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_segment_create(0, _2GB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); ASSERT( gaspi_segment_size(0, myrank, &segSize)); gaspi_printf("seg size %lu MB \n", segSize/1024/1024); // ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); gaspi_offset_t localOff= 814780664; gaspi_offset_t remOff = 81478246; gaspi_offset_t size = 1800; gaspi_number_t queueSize, qmax; ASSERT (gaspi_queue_size_max(&qmax)); for(rankSend = 0; rankSend < numranks; rankSend++) { gaspi_printf("rank to send: %d\n", rankSend); gaspi_queue_size(1, &queueSize); if (queueSize > qmax - 24) ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_write(0, localOff, rankSend, 0, remOff, size, 1, GASPI_BLOCK)); } ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { gaspi_rank_t numranks, myrank; gaspi_rank_t rankSend; gaspi_size_t segSize; const gaspi_offset_t localOff= 0; const gaspi_offset_t remOff = 0; gaspi_number_t queueSize, qmax; gaspi_size_t commSize ; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_proc_num(&numranks)); ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_segment_create(0, _8MB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_queue_size_max(&qmax)); for(commSize= 1; commSize <= _8MB; commSize*=2 ) { for(rankSend = 0; rankSend < numranks; rankSend++) { gaspi_printf("rank to send: %d - %lu bytes\n", rankSend, commSize); gaspi_queue_size(1, &queueSize); if (queueSize > qmax - 24) ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_write(0, localOff, rankSend, 0, remOff, commSize, 1, GASPI_BLOCK)); } } ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { gaspi_configuration_t config = { 0 }; //argc, argv, "", (1UL << 30) gaspi_proc_init (config, GASPI_BLOCK); // 1 GiB DMA enabled memory per node gaspi_rank_t iProc, nProc; gaspi_proc_rank (&iProc); gaspi_proc_num (&nProc); void* temp_ptr; gaspi_segment_ptr(GPI_SEGMENT, &temp_ptr); int *mem = (int *) temp_ptr; // begin of DMA enabled memory int *src = mem; // offset 0 int *dst = mem + nProc; // offset nProc * sizeof(int) for (gaspi_rank_t p = 0; p < nProc; ++p) { src[p] = iProc * nProc + p; const unsigned long locOff = p * sizeof (int); const unsigned long remOff = (nProc + iProc) * sizeof (int); gaspi_write(GPI_SEGMENT, locOff, p, GPI_SEGMENT, remOff, sizeof (int), 0, GASPI_BLOCK); } gaspi_wait (0, GASPI_BLOCK); gaspi_barrier (GASPI_GROUP_ALL, GASPI_BLOCK); dump (src, iProc, nProc, "src"); dump (dst, iProc, nProc, "dst"); gaspi_proc_term (GASPI_BLOCK); return EXIT_SUCCESS; }
void send_global_msg_to_check_state(gaspi_state_vector_t health_vec, gaspi_rank_t *avoid_list) { int i, j; int num_simultaneous_fail_checks = 1; gaspi_timeout_t HEALTH_CHECK_TIMEOUT_TIME = GASPI_BLOCK; gaspi_printf("Checking global health state\n"); /* in order to check multiple simultaneous fail, health check has to be performed multiple times */ for(j = 0 ; j < num_simultaneous_fail_checks; ++j ) { for(i = 0; i < numprocs; ++i) { if(avoid_list[i] != 1) { ASSERT(gaspi_write(gm_seg_health_chk_array_id, myrank, i, gm_seg_health_chk_array_id, myrank, sizeof(int), queue_id, HEALTH_CHECK_TIMEOUT_TIME)); } } gaspi_wait(queue_id, HEALTH_CHECK_TIMEOUT_TIME); ASSERT(gaspi_state_vec_get(health_vec)); /* adding the dead processes to avoid_list */ /* so that message for health test is not sent to them next time. */ for(i = 0; i < numprocs; ++i) { if(health_vec[i] == 1) { avoid_list[i] = 1; } } } print_health_vec(health_vec); }
int main(int argc, char *argv[]) { TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); const unsigned long N = (1 << 13); gaspi_rank_t P, myrank; ASSERT (gaspi_proc_num(&P)); ASSERT (gaspi_proc_rank(&myrank)); gaspi_printf("P = %d N = %lu\n", P, N); gaspi_printf("Seg size: %lu MB\n", MAX (_4GB, 2 * ((N/P) * N * 2 * sizeof (double)))/1024/1024); if(gaspi_segment_create(0, _1GB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED) != GASPI_SUCCESS){ gaspi_printf("Failed to create segment\n"); return -1; } gaspi_pointer_t _vptr; if(gaspi_segment_ptr(0, &_vptr) != GASPI_SUCCESS) printf("gaspi_segment_ptr failed\n"); gaspi_number_t qmax ; ASSERT (gaspi_queue_size_max(&qmax)); gaspi_printf("Queue max: %lu\n", qmax); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); int i; gaspi_number_t queueSize; int rankSend = (myrank + 1) % P; gaspi_printf("rank to: %d\n", rankSend); for (i = 0; i < 2 * N; i ++) { gaspi_queue_size(1, &queueSize); if (queueSize > qmax - 24) { gaspi_return_t ret; do { ret = gaspi_wait(1, GASPI_TEST); assert (ret != GASPI_ERROR); } while(ret != GASPI_SUCCESS); gaspi_queue_size(1, &queueSize); assert(queueSize == 0); } ASSERT (gaspi_write(0, 4, rankSend, 0, 6, 32768, 1, GASPI_TEST)); } ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { gaspi_rank_t nprocs, myrank, i; int j, n; gaspi_rank_t *avoid_list; gaspi_group_t survivors; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT(gaspi_proc_num(&nprocs)); ASSERT(gaspi_proc_rank(&myrank)); ASSERT(gaspi_segment_create(0, _4MB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); avoid_list = (gaspi_rank_t *) malloc(nprocs * sizeof(gaspi_rank_t)); assert (avoid_list != NULL); memset(avoid_list, 0, nprocs * sizeof(gaspi_rank_t)); gaspi_state_vector_t vec = (gaspi_state_vector_t) malloc(nprocs); ASSERT(gaspi_state_vec_get(vec)); //check that everyone is healthy for(i = 0; i < nprocs; i++) { assert(vec[i] == 0); } //sync ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); //now last rank dies if(myrank == nprocs - 1) exit(-1); else { //create group of survivors ASSERT(gaspi_group_create(&survivors)); for(i = 0; i < nprocs - 1; i++) ASSERT(gaspi_group_add(survivors, i)); ASSERT(gaspi_group_commit(survivors, GASPI_BLOCK)); gaspi_printf("Done with groups\n"); sleep(2); } //the others communicate gaspi_return_t retval; for(j = 0; j < 10; j++) { gaspi_printf("Iteration %d\n", j); for(i = 0; i < nprocs; i++) { if( avoid_list[i] != 1 ) ASSERT(gaspi_write(0, 0, i, 0, 0, sizeof(int), 0, GASPI_BLOCK)); } retval = gaspi_wait(0, GASPI_BLOCK); //problem found -> recover if(retval != GASPI_SUCCESS) { ASSERT(gaspi_state_vec_get(vec)); for(n = 0; n < nprocs; n++) { if(vec[n] != GASPI_STATE_HEALTHY) { gaspi_printf("Problem with node %d detected\n", n); assert(n == (nprocs - 1)); ASSERT(recover()); avoid_list[n] = 1; } } } } ASSERT (gaspi_barrier(survivors, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); gaspi_printf("exiting\n"); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int i, iter; int ret = 0; gaspi_rank_t myrank, numranks; gaspi_size_t mem_size = 0UL, j; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_proc_num(&numranks)); if( numranks < 2 ) { return EXIT_SUCCESS; } mem_size = 2 * SLOT_SIZE * (numranks - 1); if(myrank == 0) { printf("Mem size: %lu (%.2f MB)\nProcs: %u Max Slot size %lu Iterations %d\n", mem_size, mem_size * 1.0f / 1024/ 1024, numranks, (gaspi_size_t) SLOT_SIZE, MAX_ITERATIONS); #ifdef WITH_SYNC printf("Using notifications only\n"); #endif } ASSERT (gaspi_segment_create(0, mem_size, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); gaspi_pointer_t _vptr; ASSERT (gaspi_segment_ptr(0, &_vptr)); float *mptr = (float *) _vptr; //generate random srand((unsigned)time(0)); srand48((unsigned) time(0)); gaspi_size_t cur_slot_size = SLOT_SIZE; for(cur_slot_size = SLOT_SIZE; cur_slot_size >= sizeof(float); cur_slot_size/=2) { if(myrank == 0) printf("===== Slot Size %lu ====\n", cur_slot_size); for(iter = 0; iter < MAX_ITERATIONS; iter++) { if(myrank == 0) printf("iteration %3d... ", iter); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); /* fill slots with randoms */ for(j = 0; j < (mem_size / sizeof(float) / 2); j++) { mptr[j]= drand48() + (myrank*1.0); } ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); gaspi_offset_t offset_in = 0, offset_out = mem_size / 2; /* rank 0 write to all others */ if(myrank == 0) { for (i = 1; i < numranks; i++) { offset_in = (i - 1) * cur_slot_size; #ifdef WITH_SYNC ASSERT (gaspi_write_notify(0, offset_in, i, 0, 0, cur_slot_size, 0, 1, 0, GASPI_BLOCK)); #else ASSERT (gaspi_write(0, offset_in, i, 0, 0, cur_slot_size, 0, GASPI_BLOCK)); #endif } ASSERT(gaspi_wait(0, GASPI_BLOCK)); } #ifdef WITH_SYNC else { gaspi_notification_id_t id; gaspi_notification_t val; ASSERT(gaspi_notify_waitsome(0, 0, 1, &id, GASPI_BLOCK)); ASSERT(gaspi_notify_reset(0, id, &val)); assert(val == 1); } #endif #ifndef WITH_SYNC ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); #endif /* other ranks all write back to 0 */ if(myrank != 0) { offset_in = 0; offset_out = (mem_size / 2) + (cur_slot_size * (myrank - 1)); #ifdef WITH_SYNC ASSERT (gaspi_write_notify(0, offset_in, 0, 0, offset_out, cur_slot_size, myrank, 1, 0, GASPI_BLOCK)); #else ASSERT (gaspi_write(0, offset_in, i, 0, offset_out, cur_slot_size, 0, GASPI_BLOCK)); #endif ASSERT(gaspi_wait(0, GASPI_BLOCK)); } #ifdef WITH_SYNC else { gaspi_notification_id_t id; gaspi_notification_t val; int notification_counter = 0; do { ASSERT(gaspi_notify_waitsome(0, 1, numranks - 1, &id, GASPI_BLOCK)); ASSERT(gaspi_notify_reset(0, id, &val)); assert(val == 1); notification_counter++; } while( notification_counter < numranks - 1); } #endif #ifndef WITH_SYNC ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); #endif if(myrank == 0) { /* check correctness */ float *in = (float *) _vptr; float *out = (float *) ((char *) _vptr + mem_size / 2); const gaspi_size_t total_elems = (cur_slot_size * (numranks - 1) / sizeof(float)); for(j = 0; j < total_elems; j++) { if(in[j] != out[j]) { printf("Different values at pos %lu: %f %f (iterations %d)\n", j, in[j], out[j], iter); ret = -1; goto end; } } printf("All fine!\n"); } } } end: ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return ret; }
int main(int argc, char *argv[]) { int k = 0; int ret = 0; unsigned long j; const gaspi_size_t size = 4096; const gaspi_size_t memSize = _4GB; gaspi_offset_t offset_write = 0; gaspi_offset_t offset_read = _2GB; gaspi_offset_t offset_check = 3221225472; gaspi_number_t qmax ; gaspi_number_t queueSize; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_queue_size_max(&qmax)); ASSERT (gaspi_segment_create(0, memSize, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); gaspi_pointer_t _vptr; ASSERT (gaspi_segment_ptr(0, &_vptr)); /* get memory area pointer */ float *mptr_f = (float *) _vptr; char *mptr_c = (char *) _vptr; gaspi_rank_t myrank, highestnode; ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_proc_num(&highestnode)); while(k <= RUNS) { //generate random srand((unsigned)time(0)); srand48((unsigned) time(0)); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); //clean for(j = 0; j < memSize; j++) mptr_c[j]= 0; /* fill randoms up to 1GB */ for(j = 0; j < (GB / sizeof(float)); j++) { mptr_f[j]= drand48() + (myrank * 1.0); } #ifdef DEBUG gaspi_printf("random value in pos 0 %f\n", mptr_f[0]); #endif ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); gaspi_printf("\n....Running iteration %d of %d...\n",k, RUNS); const unsigned long packets = (GB / size); for(j = 0; j < packets; j++) { ASSERT(gaspi_queue_size(0, &queueSize)); if (queueSize > qmax - 24) { ASSERT(gaspi_wait(0, GASPI_BLOCK)); } ASSERT (gaspi_write(0, offset_write, (myrank + 1) % highestnode, 0, offset_read, size, 0, GASPI_BLOCK)); offset_write += size; offset_read += size; } offset_write=0; offset_read = _2GB; #ifdef DEBUG gaspi_printf("%d bytes written!\n", packets * size); #endif /* notify remote that data is written */ ASSERT (gaspi_notify( 0, (myrank + 1) % highestnode, 0, 1, 0, GASPI_BLOCK)); gaspi_notification_id_t recv_id; ASSERT(gaspi_notify_waitsome(0, 0, 1, &recv_id, GASPI_BLOCK)); assert(recv_id == 0); gaspi_notification_t notification_val; ASSERT( gaspi_notify_reset(0, recv_id, ¬ification_val)); /* notify remote that data has arrived */ ASSERT (gaspi_notify( 0, (myrank + highestnode - 1) % highestnode, 1, 1, 0, GASPI_BLOCK)); gaspi_notification_id_t ack_id; ASSERT(gaspi_notify_waitsome(0, 1, 1, &ack_id, GASPI_BLOCK)); assert(ack_id == 1); ASSERT( gaspi_notify_reset(0, ack_id, ¬ification_val)); /* check if data was written successfully */ ASSERT (gaspi_read(0, offset_check, (myrank + 1) % highestnode, 0, offset_read, GB / 2, 0, GASPI_BLOCK)); ASSERT (gaspi_read(0, offset_check + (GB / 2), (myrank + 1) % highestnode, 0, offset_read + (GB / 2), GB / 2, 0, GASPI_BLOCK)); ASSERT (gaspi_wait(0, GASPI_BLOCK)); #ifdef DEBUG gaspi_printf("Values %f %f %f \n", mptr_f[0], mptr_f[offset_read / sizeof(float)], mptr_f[offset_check / sizeof(float)]); #endif j = 0; while(j < GB / sizeof(float) ) { if(mptr_f[j] != mptr_f[offset_check / sizeof(float) + j]){ gaspi_printf("value incorrect %f-%f at %d \n", mptr_f[j], mptr_f[offset_check / sizeof(float) + j], j); ret = -1; goto out; } j++; } #ifdef DEBUG gaspi_printf("Check!\n"); #endif k++; } out: gaspi_printf("Waiting to finish...\n"); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return ret; }
int main(int argc, char *argv[]) { TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); gaspi_rank_t numranks, myrank; ASSERT (gaspi_proc_num(&numranks)); ASSERT (gaspi_proc_rank(&myrank)); int rankSend = (myrank + 1) % numranks; ASSERT(gaspi_segment_create(0, _1MB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); gaspi_size_t segSize; ASSERT( gaspi_segment_size(0, myrank, &segSize)); unsigned char * pGlbMem; gaspi_pointer_t _vptr; ASSERT(gaspi_segment_ptr(0, &_vptr)); pGlbMem = ( unsigned char *) _vptr; gaspi_number_t queueSize, qmax ; ASSERT (gaspi_queue_size_max(&qmax)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); const unsigned long localOff = 0; const unsigned long remOff = 0; /* write_notify */ do { ASSERT(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); /* write */ do { ASSERT(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); /* write + write_notify */ do { ASSERT(gaspi_write(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax - 1); EXPECT_FAIL_WITH(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT(gaspi_write_notify(0, localOff, rankSend, 0, remOff, 1, (gaspi_notification_id_t) myrank, 1, 1, GASPI_BLOCK)); /* read */ do { ASSERT(gaspi_read(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); gaspi_queue_size(1, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH(gaspi_read(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT(gaspi_read(0, localOff, rankSend, 0, remOff, 1, 1, GASPI_BLOCK)); /* write_list_notify */ { const gaspi_number_t nListElems = 255; gaspi_number_t n; gaspi_segment_id_t localSegs[nListElems]; gaspi_offset_t localOffs[nListElems]; const gaspi_rank_t rank2send = (myrank + 1) % numranks; gaspi_segment_id_t remSegs[nListElems]; gaspi_offset_t remOffs[nListElems]; gaspi_size_t sizes[nListElems]; const unsigned int bytes = sizeof(int); gaspi_offset_t initLocOff = 0; gaspi_offset_t initRemOff = (bytes * nListElems + 64); for(n = 0; n < nListElems; n++) { sizes[n] = bytes; localSegs[n] = 0; localOffs[n] = initLocOff; initLocOff += bytes; remSegs[n] = 0; remOffs[n] = initRemOff; initRemOff += bytes; } do { ASSERT( gaspi_write_list_notify( nListElems, localSegs, localOffs, rank2send, remSegs, remOffs, sizes, 0, myrank, 1, 0, GASPI_BLOCK)); gaspi_queue_size(0, &queueSize); } while(queueSize < qmax); EXPECT_FAIL_WITH( gaspi_write_list_notify( nListElems, localSegs, localOffs, rank2send, remSegs, remOffs, sizes, 0, myrank, 1, 0, GASPI_BLOCK), GASPI_QUEUE_FULL); ASSERT (gaspi_wait(0, GASPI_BLOCK)); ASSERT( gaspi_write_list_notify( nListElems, localSegs, localOffs, rank2send, remSegs, remOffs, sizes, 0, myrank, 1, 0, GASPI_BLOCK)); } ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main(int argc, char* argv[]) { int i, j; gaspi_number_t gsize; int comm_state = WORKING; int num_failures = 0; int timesteps = 0; ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_proc_num(&numprocs)); read_params(argc, argv, ×teps, &numprocs_idle); numprocs_working = numprocs - numprocs_idle; numprocs_working_and_idle = numprocs_working + numprocs_idle; gaspi_rank_t *comm_main_ranks = malloc( numprocs_idle * sizeof(gaspi_rank_t)); init_array_2(comm_main_ranks, numprocs_working); /* contains info of all processes: which are working(0), broken(1) and idle(2). keeps updated all the time(iterations) */ int * status_processes = (int *) malloc(numprocs * sizeof(int)); init_array_3(status_processes, numprocs, WORKING); for(i = numprocs-1, j=0; j < numprocs_idle;--i,++j) { status_processes[i] = IDLE; // putting last processes to IDLE } // ===== GASPI group creation ===== if(status_processes[myrank]==WORKING) { ASSERT(gaspi_group_create(&COMM_MAIN)); gaspi_number_t i; for(i=0; i<numprocs; i++) { if(status_processes[i]==WORKING) { ASSERT(gaspi_group_add(COMM_MAIN, i)); ASSERT(gaspi_group_size(COMM_MAIN, &gsize)); } } ASSERT(gaspi_group_ranks (COMM_MAIN, comm_main_ranks)); ASSERT(gaspi_group_commit (COMM_MAIN, GASPI_BLOCK)); } /* ====== Init a SYNC FLAGS Segment ====== */ /* used to communicate the WORKING, BROKEN, or FINISHED_WORK status between the working and idle processes. */ gaspi_size_t SYNC_global_mem_size; SYNC_global_mem_size = numprocs * sizeof(int); gaspi_pointer_t gm_ptr_sync=NULL; ASSERT(init_segment (gm_seg_sync_flags_id, SYNC_global_mem_size)); ASSERT(gaspi_segment_ptr (gm_seg_sync_flags_id, &gm_ptr_sync)); int * sync_flags = (int *) gm_ptr_sync; init_array_3(sync_flags, numprocs, WORKING); /* ====== Init a health check write FLAGS Segment ====== */ /* This array is used to send the gaspi_write message write before health_chk routine, which will then update the gaspi internal health vector */ gaspi_size_t health_chk_global_mem_size; health_chk_global_mem_size = numprocs*sizeof(int); gaspi_pointer_t gm_ptr_health_chk=NULL; ASSERT(init_segment (gm_seg_health_chk_array_id, health_chk_global_mem_size)); ASSERT(gaspi_segment_ptr (gm_seg_health_chk_array_id, &gm_ptr_health_chk)); gaspi_state_vector_t health_vec = (gaspi_state_vector_t) malloc(numprocs); ASSERT(gaspi_state_vec_get(health_vec)); gaspi_rank_t * avoid_list= (gaspi_rank_t *) malloc(numprocs * sizeof(gaspi_rank_t)); for(i = 0;i < numprocs; ++i) avoid_list[i] = (gaspi_rank_t) 0; gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK); /* ===== TIME-STEP LOOP ===== */ if(status_processes[myrank]==IDLE) { /* IDLE processes remain in this loop */ while(1) { gaspi_printf("%d.", myrank); if(sync_flags[0] == WORKING) { /* NO FAILURE REPORTED */ usleep(1000000); } if(sync_flags[0] == BROKEN) { /* FAILURE REPORTED */ gaspi_printf("myrank: %d Broken reported\n", myrank); comm_state=BROKEN; break; } if(sync_flags[0] == WORKFINISHED) { /* WORKFINISHED REPORTED */ gaspi_printf("myrank: %d WorkFinished reported\n", myrank); comm_state = WORKFINISHED; break; } } } int time_step; for(time_step=1; time_step <= timesteps && comm_state!=WORKFINISHED; time_step++) { gaspi_printf("== time_step: %d ==\n", time_step); if(comm_state==WORKING && status_processes[myrank]==WORKING) { gaspi_barrier(COMM_MAIN, GASPI_TIMEOUT_TIME); sleep(1); // NOTE: this is the work section. if(time_step == 5 && myrank== 1) { exit (-1); } } if(time_step<timesteps ) { send_global_msg_to_check_state(health_vec, avoid_list); num_failures = check_comm_health(status_processes, health_vec); gaspi_printf("%d NUM_FAILURES at timestep %d = %d\n", myrank, time_step, num_failures); if( num_failures != 0 ) { rescue_process = numprocs_working; if(myrank==0) { // message the IDLE process sync_flags[0]=BROKEN; for(i = 0 ; i < num_failures ; ++i) { /* TODO: multiple failures at the same time. */ gaspi_printf("messaging rescue_process: %d\n", rescue_process); ASSERT(gaspi_write(gm_seg_sync_flags_id, 0, rescue_process, gm_seg_sync_flags_id, 0, sizeof(int), 0, GASPI_BLOCK)); rescue_process++; } } if(myrank==0 || myrank==rescue_process) gaspi_printf("%d REPAIRING COMM_MAIN FLAG 1\n", myrank); update_status_processes_array(status_processes, health_vec); numprocs_working_and_idle = refresh_numprocs_working_and_idle(status_processes); if(myrank != rescue_process) { ASSERT(gaspi_group_delete(COMM_MAIN)); ASSERT(recover()); } ASSERT(gaspi_group_create(&COMM_MAIN_NEW)); for(i = 0; i < numprocs; i++) { if(status_processes[i]==WORKING) { ASSERT(gaspi_group_add(COMM_MAIN_NEW, i)); ASSERT(gaspi_group_size(COMM_MAIN_NEW, &gsize)); if(gsize == numprocs_working) break; } } gaspi_printf("%d: COMM_MAIN_NEW size is: %hi\n", myrank, gsize); ASSERT(gaspi_group_commit (COMM_MAIN_NEW, GASPI_BLOCK)); init_array_2(comm_main_ranks, numprocs_working); ASSERT(gaspi_group_ranks (COMM_MAIN_NEW, comm_main_ranks)); gaspi_printf("printing group_ranks_main: \n"); gaspi_printf_array(comm_main_ranks, numprocs_working); comm_state = WORKING; gaspi_printf("%d REPAIRING COMM_MAIN_NEW FLAG 2\n", myrank); if(status_processes[myrank] == WORKING) { ASSERT(gaspi_barrier(COMM_MAIN_NEW, GASPI_BLOCK)); ASSERT(gaspi_barrier(COMM_MAIN_NEW, GASPI_BLOCK)); } /* set things to work again */ COMM_MAIN = COMM_MAIN_NEW; time_step = 5; } } } if(myrank == 0) { gaspi_printf("finished successfully\n"); } gaspi_proc_term(10000); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int j,i,k=0; int ret=0; const gaspi_size_t size=4096;//4k const gaspi_size_t memSize = 4294967296; //4GB gaspi_offset_t offset_write=0, offset_read = memSize / 2, offset_check = 3221225472 ; ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_segment_create(0, memSize, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED)); gaspi_pointer_t _vptr; ASSERT (gaspi_segment_ptr(0, &_vptr)); /* get memory area pointer */ #ifdef FLOAT float *mptr = (float *) _vptr; #else int *mptr = (int *) _vptr; #endif gaspi_rank_t myrank, highestnode; ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_proc_num(&highestnode)); while(k <= RUNS) { //generate random srand((unsigned)time(0)); #ifdef FLOAT srand48((unsigned) time(0)); #endif //clean for(j = 0; j < (memSize / 4); j++) mptr[j]= 0; //fill randoms up to 1GB for(j = 0; j < (memSize / 16); j++) { #ifdef FLOAT mptr[j]= drand48() + (myrank*1.0); #else mptr[j]= rand() + myrank; #endif } #ifdef DEBUG #ifdef FLOAT gaspi_printf("random value in pos 0 %f\n", mptr[0]); #else gaspi_printf("random value in pos 0 %d\n", mptr[0]); #endif #endif //DEBUG ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); gaspi_printf("\n....Running iteration %d of %d...\n",k, RUNS); for(i = 0; i < ITERATIONS; i++) { for(j = 0; j < ITERATIONS; j++) { ASSERT (gaspi_write(0, offset_write, (myrank + 1) % highestnode, 0, offset_read, size, 0, GASPI_BLOCK)); offset_write += size; offset_read += size; } ASSERT (gaspi_wait(0, GASPI_BLOCK)); } #ifdef DEBUG gaspi_printf("%d bytes written!\n", ITERATIONS * ITERATIONS * size); #endif //check if data was written successfully ASSERT (gaspi_read(0, offset_check, (myrank + 1) % highestnode, 0, memSize/2, GB, 0, GASPI_BLOCK)); ASSERT (gaspi_wait(0, GASPI_BLOCK)); #ifdef DEBUG gaspi_printf("%d bytes read!\n",GB); #endif j=0; #ifdef DEBUG #ifdef FLOAT gaspi_printf("Values %f %f %f \n", mptr[0], mptr[memSize/8], mptr[offset_check/4]); #else gaspi_printf("Values %d %d %d \n", mptr[0], mptr[memSize/8], mptr[offset_check/4]); #endif #endif//DEBUG while(j < GB / 4 ) { if(mptr[j] != mptr[offset_check / 4 + j]){ #ifdef FLOAT gaspi_printf("value incorrect %f-%f at %d \n",mptr[j],mptr[offset_check / 4],j); #else gaspi_printf("value incorrect %d-%d at %d \n",mptr[j],mptr[offset_check / 4],j); #endif ret = -1; goto out; } j++; } offset_write=0; offset_read = memSize / 2; #ifdef DEBUG gaspi_printf("Check!\n"); #endif k++; } out: gaspi_printf("Waiting to finish...\n"); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return ret; }
int main(int argc, char *argv[]) { gaspi_rank_t numranks, myrank; gaspi_rank_t rankSend; gaspi_size_t segSize; const gaspi_offset_t localOff_r= 0; const gaspi_offset_t remOff_r = 0; const gaspi_offset_t localOff_w = _128MB / 2 ; const gaspi_offset_t remOff_w = _128MB / 2; gaspi_number_t queueSize, qmax; const gaspi_size_t commSize = _8MB; int i; gaspi_gpu_t gpus[8]; gaspi_gpu_num nGPUs; TSUITE_INIT(argc, argv); ASSERT (gaspi_proc_init(GASPI_BLOCK)); ASSERT (gaspi_proc_num(&numranks)); ASSERT (gaspi_proc_rank(&myrank)); ASSERT (gaspi_init_GPUs()); ASSERT (gaspi_number_of_GPUs(&nGPUs)); ASSERT (gaspi_GPU_ids(gpus)); ASSERT (gaspi_segment_create(0, _128MB, GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED|GASPI_MEM_GPU)); ASSERT( gaspi_segment_size(0, myrank, &segSize)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_queue_size_max(&qmax)); for(i = 0; i < 100; i++ ) { for(rankSend = 0; rankSend < numranks; rankSend++) { if(rankSend == myrank) continue; gaspi_printf("partner rank: %d - %lu bytes (%d)\n", rankSend, commSize, i); ASSERT (gaspi_queue_size(1, &queueSize)); if (queueSize > qmax - 24) ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_read(0, localOff_r, rankSend, 0, remOff_r, commSize, 1, GASPI_BLOCK)); } } for(i = 0; i < 100; i++ ) { for(rankSend = 0; rankSend < numranks; rankSend++) { if(rankSend == myrank) continue; ASSERT (gaspi_queue_size(1, &queueSize)); if (queueSize > qmax - 24) ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_write(0, localOff_r, rankSend, 0, remOff_r, commSize, 1, GASPI_BLOCK)); } } ASSERT (gaspi_wait(1, GASPI_BLOCK)); ASSERT (gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); ASSERT (gaspi_proc_term(GASPI_BLOCK)); return EXIT_SUCCESS; }
int main (int argc, char *argv[]) { int i, j, t; gaspi_rank_t myrank; char *ptr0; //on numa architectures you have to map this process to the numa node where nic is installed if (start_bench (2) != 0) { printf ("Initialization failed\n"); exit (-1); } // BENCH // gaspi_proc_rank (&myrank); if (gaspi_segment_ptr (0, (void **) &ptr0) != GASPI_SUCCESS) { printf ("gaspi_segment_ptr failed !\n"); exit (-1); } gaspi_float cpu_freq; gaspi_cpu_frequency(&cpu_freq); if (myrank < 2) { if(myrank == 0) { printf("-----------------------------------\n"); printf ("%12s\t%5s\n", "Bytes", "Lat(usecs)"); printf("-----------------------------------\n"); } int bytes = 2; volatile char *postBuf = (volatile char *) ptr0; for (i = 1; i < 24; i++) { volatile char *pollBuf = (volatile char *) (ptr0 + ( 2 * bytes -1 )); int rcnt = 0; int cnt = 0; gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK); for (j = 0; j < ITERATIONS; j++) { if (rcnt < ITERATIONS && !(cnt < 1 && myrank == 1)) { rcnt++; while (*pollBuf != (char) rcnt) { #ifdef MIC _mm_delay_32(32); #else _mm_pause(); #endif } } stamp[j] = get_mcycles (); postBuf[bytes - 1] = (char) ++cnt; gaspi_write (0, 0, myrank ^ 0x1, 0, bytes, bytes, 0, GASPI_BLOCK); gaspi_wait (0, GASPI_BLOCK); } for (t = 0; t < (ITERATIONS - 1); t++) delta[t] = stamp[t + 1] - stamp[t]; qsort (delta, (ITERATIONS - 1), sizeof *delta, mcycles_compare); const double div = 1.0 / cpu_freq; const double ts = (double) delta[ITERATIONS / 2] * div * 0.5; if(myrank == 0) printf ("%12d\t%4.2f\n", bytes, ts); bytes <<= 1; } } end_bench (); return 0; }
int main(int argc, char *argv[]) { int i; gaspi_rank_t rank, nprocs; gaspi_notification_id_t id; const int num_elems = 1024; TSUITE_INIT( argc, argv ); ASSERT( gaspi_proc_init(GASPI_BLOCK) ); ASSERT( gaspi_proc_num(&nprocs) ); ASSERT( gaspi_proc_rank(&rank) ); const gaspi_rank_t left = (rank + nprocs - 1 ) % nprocs; const gaspi_rank_t right = (rank + nprocs + 1) % nprocs; /* Create and fill buffer */ int * const buf = (int *) malloc(num_elems * sizeof(int)); assert( buf != NULL); for (i = 0; i < num_elems; i++) { buf[i] = rank; } ASSERT( gaspi_segment_use( 0, buf, num_elems * sizeof(int), GASPI_GROUP_ALL, GASPI_BLOCK, 0) ); ASSERT( gaspi_segment_create( 1, num_elems * sizeof(int), GASPI_GROUP_ALL, GASPI_BLOCK, GASPI_MEM_INITIALIZED) ); ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); /* write data to neighbour */ ASSERT( gaspi_write( 0, 0, right, 1, 0, num_elems * sizeof(int), 0, GASPI_BLOCK) ); ASSERT( gaspi_notify( 1, right, 0, 1, 0, GASPI_BLOCK ) ); ASSERT( gaspi_notify_waitsome( 1, 0, 1, &id, GASPI_BLOCK ) ); ASSERT( gaspi_wait( 0, GASPI_BLOCK ) ); /* Check data */ gaspi_pointer_t seg1_ptr; ASSERT( gaspi_segment_ptr( 1, &seg1_ptr ) ); int * recv_buf = (int *) seg1_ptr; for (i = 0; i < num_elems; i++) { assert(recv_buf[i] == left); } ASSERT( gaspi_segment_delete(0)); ASSERT( gaspi_segment_delete(1)); for (i = 0; i < num_elems; i++) { assert(buf[i] == rank); } ASSERT( gaspi_barrier( GASPI_GROUP_ALL, GASPI_BLOCK ) ); ASSERT( gaspi_proc_term( GASPI_BLOCK ) ); return EXIT_SUCCESS; }
gaspi_return_t pgaspi_gpu_write(const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { if( glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId < 0 || size <= GASPI_GPU_DIRECT_MAX ) { return gaspi_write(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size, queue, timeout_ms); } if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; char* host_ptr = (char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].host_ptr + NOTIFY_OFFSET + offset_local); char* device_ptr = (char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr + offset_local); gaspi_gpu* agpu = _gaspi_find_gpu(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId); if( !agpu ) { gaspi_print_error("No GPU found or not initialized (gaspi_init_GPUs)."); return GASPI_ERROR; } int size_left = size; int copy_size = 0; int gpu_offset = 0; const int BLOCK_SIZE = GASPI_GPU_BUFFERED; const gaspi_cycles_t s0 = gaspi_get_cycles (); while(size_left > 0) { int i; for(i = 0; i < GASPI_CUDA_EVENTS; i++) { if(size_left > BLOCK_SIZE) copy_size = BLOCK_SIZE; else copy_size = size_left; if( cudaMemcpyAsync(host_ptr + gpu_offset, device_ptr + gpu_offset, copy_size, cudaMemcpyDeviceToHost, agpu->streams[queue])) { unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } glb_gaspi_ctx.ne_count_c[queue]++; agpu->events[queue][i].segment_remote = segment_id_remote; agpu->events[queue][i].segment_local = segment_id_local; agpu->events[queue][i].size = copy_size; agpu->events[queue][i].rank = rank; agpu->events[queue][i].offset_local = offset_local+gpu_offset; agpu->events[queue][i].offset_remote = offset_remote+gpu_offset; agpu->events[queue][i].in_use =1; cudaError_t err = cudaEventRecord(agpu->events[queue][i].event, agpu->streams[queue]); if(err != cudaSuccess) { glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } gpu_offset += copy_size; size_left -= copy_size; if(size_left == 0) break; if(agpu->events[queue][i].ib_use) { struct ibv_wc wc; int ne; do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqC[queue], 1, &wc); glb_gaspi_ctx.ne_count_c[queue] -= ne; if (ne == 0) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } } while(ne==0); agpu->events[queue][i].ib_use = 0; } } for(i = 0; i < GASPI_CUDA_EVENTS; i++) { cudaError_t error; if ( agpu->events[queue][i].in_use == 1 ) { do { error = cudaEventQuery(agpu->events[queue][i].event ); if( cudaSuccess == error ) { if (_gaspi_event_send(&agpu->events[queue][i],queue)) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } agpu->events[queue][i].in_use = 0; } else if(error == cudaErrorNotReady) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } else { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } } while(error != cudaSuccess); } } } unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_SUCCESS; }