gaspi_return_t pgaspi_write_list_notify (const gaspi_number_t num, gaspi_segment_id_t * const segment_id_local, gaspi_offset_t * const offset_local, const gaspi_rank_t rank, gaspi_segment_id_t * const segment_id_remote, gaspi_offset_t * const offset_remote, gaspi_size_t * const size, const gaspi_segment_id_t segment_id_notification, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { if(num == 0) return GASPI_ERR_INV_NUM; if(notification_value == 0) return GASPI_ERR_INV_NOTIF_VAL; #ifdef DEBUG gaspi_verify_init("gaspi_write_list_notify"); gaspi_verify_queue(queue); gaspi_number_t n; for(n = 0; n < num; n++) { gaspi_verify_local_off(offset_local[n], segment_id_local[n]); gaspi_verify_remote_off(offset_remote[n], segment_id_remote[n], rank); gaspi_verify_comm_size(size[n], segment_id_local[n], segment_id_remote[n], rank, GASPI_MAX_TSIZE_C); } #endif gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_write_list_notify(num, segment_id_local, offset_local, rank, segment_id_remote, offset_remote, (unsigned int *)size, segment_id_notification, notification_id, notification_value, queue); glb_gaspi_ctx.ne_count_c[queue] += (int) (num + 1); endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t pgaspi_passive_receive (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, gaspi_rank_t * const rem_rank, const gaspi_size_t size, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_passive_receive"); gaspi_verify_local_off(offset_local, segment_id_local, size); gaspi_verify_comm_size(size, segment_id_local, segment_id_local, glb_gaspi_ctx.rank, GASPI_MAX_TSIZE_P); gaspi_return_t eret = GASPI_ERROR; if( lock_gaspi_tout (&glb_gaspi_ctx.lockPR, timeout_ms) ) { return GASPI_TIMEOUT; } eret = pgaspi_dev_passive_receive(segment_id_local, offset_local, rem_rank, size, timeout_ms); unlock_gaspi (&glb_gaspi_ctx.lockPR); return eret; }
gaspi_return_t pgaspi_notify_reset (const gaspi_segment_id_t segment_id_local, const gaspi_notification_id_t notification_id, gaspi_notification_t * const old_notification_val) { gaspi_verify_init("gaspi_notify_reset"); gaspi_verify_segment(segment_id_local); gaspi_verify_null_ptr(glb_gaspi_ctx.rrmd[segment_id_local]); #ifdef DEBUG if(old_notification_val == NULL) { gaspi_print_warning("NULL pointer on parameter old_notification_val (gaspi_notify_reset)."); } #endif volatile unsigned char *segPtr; #ifdef GPI2_CUDA if(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId >= 0) segPtr = (volatile unsigned char*)glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].host_addr; else #endif segPtr = (volatile unsigned char *) glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr; volatile unsigned int *p = (volatile unsigned int *) segPtr; const unsigned int res = __sync_val_compare_and_swap (&p[notification_id], p[notification_id], 0); if(old_notification_val != NULL) *old_notification_val = res; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_gpu_write_notify(const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_gpu_write_notify"); gaspi_verify_local_off(offset_local, segment_id_local, size); gaspi_verify_remote_off(offset_remote, segment_id_remote, rank, size); gaspi_verify_queue(queue); gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C); if( notification_value == 0 ) { gaspi_printf("Zero is not allowed as notification value."); return GASPI_ERR_INV_NOTIF_VAL; } gaspi_return_t eret = GASPI_ERROR; gaspi_context_t * const gctx = &glb_gaspi_ctx; if(lock_gaspi_tout (&gctx->lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == gctx->ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_gpu_write_notify(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size, notification_id, notification_value, queue, timeout_ms); if( eret != GASPI_SUCCESS ) { /* gctx->qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; */ goto endL; } /* GPI2_STATS_INC_COUNT(GASPI_STATS_COUNTER_NUM_WRITE_NOT, 1); */ /* GPI2_STATS_INC_COUNT(GASPI_STATS_COUNTER_BYTES_WRITE, size); */ endL: unlock_gaspi (&gctx->lockC[queue]); return eret; }
gaspi_return_t pgaspi_proc_local_rank(gaspi_rank_t * const local_rank) { gaspi_verify_init("gaspi_proc_local_rank"); gaspi_verify_null_ptr(local_rank); *local_rank = (gaspi_rank_t) glb_gaspi_ctx.localSocket; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_proc_rank (gaspi_rank_t * const rank) { gaspi_verify_init("gaspi_proc_rank"); gaspi_verify_null_ptr(rank); *rank = (gaspi_rank_t) glb_gaspi_ctx.rank; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_proc_num (gaspi_rank_t * const proc_num) { gaspi_verify_init("gaspi_proc_num"); gaspi_verify_null_ptr(proc_num); *proc_num = (gaspi_rank_t) glb_gaspi_ctx.tnc; return GASPI_SUCCESS; }
gaspi_return_t gaspi_number_of_GPUs(gaspi_gpu_num *gpus) { gaspi_verify_init("gaspi_number_of_GPUs"); gaspi_verify_null_ptr(gpus); if( 0 == glb_gaspi_ctx.use_gpus ) { gaspi_print_error("GPUs are not initialized."); return GASPI_ERROR; } *gpus = glb_gaspi_ctx.gpu_count; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_proc_ping (const gaspi_rank_t rank, const gaspi_timeout_t timeout_ms) { gaspi_return_t eret = GASPI_ERROR; gaspi_verify_init("gaspi_proc_ping"); gaspi_verify_rank(rank); if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; eret = gaspi_sn_command(GASPI_SN_PROC_PING, rank, timeout_ms, NULL); unlock_gaspi (&glb_gaspi_ctx_lock); return eret; }
gaspi_return_t pgaspi_proc_local_num(gaspi_rank_t * const local_num) { gaspi_rank_t rank; gaspi_verify_init("gaspi_proc_local_num"); gaspi_verify_null_ptr(local_num); if(pgaspi_proc_rank(&rank) != GASPI_SUCCESS) return GASPI_ERROR; while(glb_gaspi_ctx.poff[rank + 1] != 0) rank++; *local_num = (gaspi_rank_t) ( glb_gaspi_ctx.poff[rank] + 1); return GASPI_SUCCESS; }
gaspi_return_t gaspi_gpu_number(gaspi_number_t* num_gpus) { gaspi_verify_init("gaspi_gpu_number"); gaspi_verify_null_ptr(num_gpus); gaspi_context_t const * const gctx = &glb_gaspi_ctx; if( 0 == gctx->use_gpus ) { gaspi_print_error("GPUs are not initialized."); return GASPI_ERROR; } *num_gpus = gctx->gpu_count; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_write_notify (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_write_notify"); gaspi_verify_local_off(offset_local, segment_id_local); gaspi_verify_remote_off(offset_remote, segment_id_remote, rank); gaspi_verify_queue(queue); gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C); if(notification_value == 0) return GASPI_ERR_INV_NOTIF_VAL; gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_write_notify(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size, notification_id, notification_value, queue); glb_gaspi_ctx.ne_count_c[queue] += 2; endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
/* TODO: Not clear to me why we need this function */ gaspi_return_t gaspi_GPU_ids(gaspi_gpu_t *gpu_ids) { gaspi_verify_init("gaspi_GPU_ids"); gaspi_verify_null_ptr(gpu_ids); if( 0 == glb_gaspi_ctx.use_gpus ) { gaspi_print_error("GPUs are not initialized."); return GASPI_ERROR; } int i; for (i = 0; i < glb_gaspi_ctx.gpu_count; i++) gpu_ids[i] = gpus[i].device_id; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_proc_term (const gaspi_timeout_t timeout) { int i; gaspi_verify_init("gaspi_proc_term"); if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout)) return GASPI_TIMEOUT; pthread_kill(glb_gaspi_ctx.snt, SIGSTKFLT); if(glb_gaspi_ctx.sockfd != NULL) { for(i = 0;i < glb_gaspi_ctx.tnc; i++) { shutdown(glb_gaspi_ctx.sockfd[i],2); if(glb_gaspi_ctx.sockfd[i] > 0) close(glb_gaspi_ctx.sockfd[i]); } free(glb_gaspi_ctx.sockfd); } #ifdef GPI2_WITH_MPI if(glb_gaspi_ctx.rank == 0) { if(remove(glb_gaspi_ctx.mfile) < 0) { gaspi_print_error("Failed to remove tmp file (%s)", glb_gaspi_ctx.mfile); } } #endif if(pgaspi_cleanup_core() != GASPI_SUCCESS) goto errL; unlock_gaspi (&glb_gaspi_ctx_lock); return GASPI_SUCCESS; errL: unlock_gaspi (&glb_gaspi_ctx_lock); return GASPI_ERROR; }
gaspi_return_t pgaspi_wait (const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_wait"); gaspi_verify_queue(queue); gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; eret = pgaspi_dev_wait(queue, &glb_gaspi_ctx.ne_count_c[queue], timeout_ms); unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t pgaspi_state_vec_get (gaspi_state_vector_t state_vector) { int i, j; gaspi_verify_null_ptr(state_vector); gaspi_verify_init("gaspi_state_vec_get"); memset (state_vector, 0, (size_t) glb_gaspi_ctx.tnc); for (i = 0; i < glb_gaspi_ctx.tnc; i++) { for (j = 0; j < (GASPI_MAX_QP + 3); j++) { state_vector[i] |= glb_gaspi_ctx.qp_state_vec[j][i]; } } return GASPI_SUCCESS; }
gaspi_return_t pgaspi_passive_send (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_size_t size, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_passive_send"); gaspi_verify_local_off(offset_local, segment_id_local, size); gaspi_verify_comm_size(size, segment_id_local, segment_id_local, glb_gaspi_ctx.rank, GASPI_MAX_TSIZE_P); gaspi_verify_rank(rank); gaspi_return_t eret = GASPI_ERROR; if( lock_gaspi_tout (&glb_gaspi_ctx.lockPS, timeout_ms) ) { return GASPI_TIMEOUT; } if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if( eret != GASPI_SUCCESS ) { goto endL; } } eret = pgaspi_dev_passive_send(segment_id_local, offset_local, rank, size, glb_gaspi_ctx.ne_count_p, timeout_ms); if( eret == GASPI_ERROR ) { glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][rank] = GASPI_STATE_CORRUPT; } endL: unlock_gaspi (&glb_gaspi_ctx.lockPS); return eret; }
gaspi_return_t pgaspi_notify (const gaspi_segment_id_t segment_id_remote, const gaspi_rank_t rank, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_notify"); gaspi_verify_segment(segment_id_remote); gaspi_verify_null_ptr(glb_gaspi_ctx.rrmd[segment_id_remote]); gaspi_verify_rank(rank); gaspi_verify_queue(queue); if(notification_value == 0) return GASPI_ERR_INV_NOTIF_VAL; gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_notify(segment_id_remote, rank, notification_id, notification_value, queue); glb_gaspi_ctx.ne_count_c[queue]++; endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t pgaspi_read (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_read"); gaspi_verify_local_off(offset_local, segment_id_local); gaspi_verify_remote_off(offset_remote, segment_id_remote, rank); gaspi_verify_queue(queue); gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C); gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_read(segment_id_local, offset_local, rank, segment_id_remote,offset_remote, (unsigned int) size, queue); glb_gaspi_ctx.ne_count_c[queue]++; endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
/* TODO: Do we really need this function or at least make it part of the GPU interface and allow clients to use it? */ gaspi_return_t gaspi_gpu_ids(gaspi_gpu_id_t* gpu_ids) { gaspi_verify_init("gaspi_gpu_ids"); gaspi_verify_null_ptr(gpu_ids); gaspi_context_t const * const gctx = &glb_gaspi_ctx; if( 0 == gctx->use_gpus ) { gaspi_print_error("GPUs are not found/initialized."); return GASPI_ERROR; } int i; for(i = 0; i < gctx->gpu_count; i++) { gpu_ids[i] = gpus[i].device_id; } return GASPI_SUCCESS; }
gaspi_return_t pgaspi_proc_kill (const gaspi_rank_t rank,const gaspi_timeout_t timeout_ms) { gaspi_return_t eret = GASPI_ERROR; gaspi_verify_init("gaspi_proc_kill"); gaspi_verify_rank(rank); if( rank == glb_gaspi_ctx.rank ) { gaspi_print_error("Invalid rank to kill"); return GASPI_ERR_INV_RANK; } if(lock_gaspi_tout(&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; eret = gaspi_sn_command(GASPI_SN_PROC_KILL, rank, timeout_ms, NULL); unlock_gaspi(&glb_gaspi_ctx_lock); return eret; }
gaspi_return_t pgaspi_notify_waitsome (const gaspi_segment_id_t segment_id_local, const gaspi_notification_id_t notification_begin, const gaspi_number_t num, gaspi_notification_id_t * const first_id, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_notify_waitsome"); gaspi_verify_segment(segment_id_local); gaspi_verify_null_ptr(glb_gaspi_ctx.rrmd[segment_id_local]); gaspi_verify_null_ptr(first_id); #ifdef DEBUG if( num >= GASPI_MAX_NOTIFICATION) return GASPI_ERR_INV_NUM; #endif volatile unsigned char *segPtr; int loop = 1; gaspi_notification_id_t n; if(num == 0) return GASPI_SUCCESS; #ifdef GPI2_CUDA if(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId >=0 ) { segPtr = (volatile unsigned char*)glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].host_addr; } else #endif segPtr = (volatile unsigned char *) glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr; volatile unsigned int *p = (volatile unsigned int *) segPtr; if (timeout_ms == GASPI_BLOCK) { while (loop) { for (n = notification_begin; n < (notification_begin + num); n++) { if (p[n]) { *first_id = n; return GASPI_SUCCESS; } } gaspi_delay (); } } else if (timeout_ms == GASPI_TEST) { for (n = notification_begin; n < (notification_begin + num); n++) { if (p[n]) { *first_id = n; return GASPI_SUCCESS; } } return GASPI_TIMEOUT; } const gaspi_cycles_t s0 = gaspi_get_cycles (); while (loop) { for (n = notification_begin; n < (notification_begin + num); n++) { if (p[n]) { *first_id = n; loop = 0; break; } } const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { return GASPI_TIMEOUT; } gaspi_delay (); } return GASPI_SUCCESS; }