gaspi_return_t pgaspi_write_list_notify (const gaspi_number_t num, gaspi_segment_id_t * const segment_id_local, gaspi_offset_t * const offset_local, const gaspi_rank_t rank, gaspi_segment_id_t * const segment_id_remote, gaspi_offset_t * const offset_remote, gaspi_size_t * const size, const gaspi_segment_id_t segment_id_notification, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { if(num == 0) return GASPI_ERR_INV_NUM; if(notification_value == 0) return GASPI_ERR_INV_NOTIF_VAL; #ifdef DEBUG gaspi_verify_init("gaspi_write_list_notify"); gaspi_verify_queue(queue); gaspi_number_t n; for(n = 0; n < num; n++) { gaspi_verify_local_off(offset_local[n], segment_id_local[n]); gaspi_verify_remote_off(offset_remote[n], segment_id_remote[n], rank); gaspi_verify_comm_size(size[n], segment_id_local[n], segment_id_remote[n], rank, GASPI_MAX_TSIZE_C); } #endif gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_write_list_notify(num, segment_id_local, offset_local, rank, segment_id_remote, offset_remote, (unsigned int *)size, segment_id_notification, notification_id, notification_value, queue); glb_gaspi_ctx.ne_count_c[queue] += (int) (num + 1); endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t pgaspi_passive_receive (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, gaspi_rank_t * const rem_rank, const gaspi_size_t size, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_passive_receive"); gaspi_verify_local_off(offset_local, segment_id_local, size); gaspi_verify_comm_size(size, segment_id_local, segment_id_local, glb_gaspi_ctx.rank, GASPI_MAX_TSIZE_P); gaspi_return_t eret = GASPI_ERROR; if( lock_gaspi_tout (&glb_gaspi_ctx.lockPR, timeout_ms) ) { return GASPI_TIMEOUT; } eret = pgaspi_dev_passive_receive(segment_id_local, offset_local, rem_rank, size, timeout_ms); unlock_gaspi (&glb_gaspi_ctx.lockPR); return eret; }
float gaspi_stats_get_timer_ms(enum gaspi_timer t) { lock_gaspi(&gaspi_stats_lock); float f = _timers[t].ttotal_ms; unlock_gaspi(&gaspi_stats_lock); return f; }
gaspi_return_t pgaspi_gpu_write_notify(const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_gpu_write_notify"); gaspi_verify_local_off(offset_local, segment_id_local, size); gaspi_verify_remote_off(offset_remote, segment_id_remote, rank, size); gaspi_verify_queue(queue); gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C); if( notification_value == 0 ) { gaspi_printf("Zero is not allowed as notification value."); return GASPI_ERR_INV_NOTIF_VAL; } gaspi_return_t eret = GASPI_ERROR; gaspi_context_t * const gctx = &glb_gaspi_ctx; if(lock_gaspi_tout (&gctx->lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == gctx->ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_gpu_write_notify(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size, notification_id, notification_value, queue, timeout_ms); if( eret != GASPI_SUCCESS ) { /* gctx->qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; */ goto endL; } /* GPI2_STATS_INC_COUNT(GASPI_STATS_COUNTER_NUM_WRITE_NOT, 1); */ /* GPI2_STATS_INC_COUNT(GASPI_STATS_COUNTER_BYTES_WRITE, size); */ endL: unlock_gaspi (&gctx->lockC[queue]); return eret; }
int gaspi_sn_segment_register(const gaspi_cd_header snp) { if(!glb_gaspi_dev_init) return -1; if( snp.seg_id < 0 && snp.seg_id >= GASPI_MAX_MSEGS) return -1; lock_gaspi_tout(&gaspi_mseg_lock, GASPI_BLOCK); if(glb_gaspi_ctx.rrmd[snp.seg_id] == NULL) { glb_gaspi_ctx.rrmd[snp.seg_id] = (gaspi_rc_mseg *) calloc (glb_gaspi_ctx.tnc, sizeof (gaspi_rc_mseg)); if( glb_gaspi_ctx.rrmd[snp.seg_id] == NULL ) { unlock_gaspi(&gaspi_mseg_lock); return -1; } } /* TODO: don't allow re-registration? */ /* for now we allow re-registration */ /* if(glb_gaspi_ctx.rrmd[snp.seg_id][snp.rem_rank].size) -> re-registration error case */ glb_gaspi_ctx.rrmd[snp.seg_id][snp.rank].rkey = snp.rkey; glb_gaspi_ctx.rrmd[snp.seg_id][snp.rank].addr = snp.addr; glb_gaspi_ctx.rrmd[snp.seg_id][snp.rank].size = snp.size; #ifdef GPI2_CUDA glb_gaspi_ctx.rrmd[snp.seg_id][snp.rank].host_rkey = snp.host_rkey; glb_gaspi_ctx.rrmd[snp.seg_id][snp.rank].host_addr = snp.host_addr; if(snp.host_addr != 0) glb_gaspi_ctx.rrmd[snp.seg_id][snp.rank].cudaDevId = 1; else glb_gaspi_ctx.rrmd[snp.seg_id][snp.rank].cudaDevId = -1; #endif unlock_gaspi(&gaspi_mseg_lock); return 0; }
gaspi_return_t pgaspi_proc_term (const gaspi_timeout_t timeout) { int i; gaspi_verify_init("gaspi_proc_term"); if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout)) return GASPI_TIMEOUT; pthread_kill(glb_gaspi_ctx.snt, SIGSTKFLT); if(glb_gaspi_ctx.sockfd != NULL) { for(i = 0;i < glb_gaspi_ctx.tnc; i++) { shutdown(glb_gaspi_ctx.sockfd[i],2); if(glb_gaspi_ctx.sockfd[i] > 0) close(glb_gaspi_ctx.sockfd[i]); } free(glb_gaspi_ctx.sockfd); } #ifdef GPI2_WITH_MPI if(glb_gaspi_ctx.rank == 0) { if(remove(glb_gaspi_ctx.mfile) < 0) { gaspi_print_error("Failed to remove tmp file (%s)", glb_gaspi_ctx.mfile); } } #endif if(pgaspi_cleanup_core() != GASPI_SUCCESS) goto errL; unlock_gaspi (&glb_gaspi_ctx_lock); return GASPI_SUCCESS; errL: unlock_gaspi (&glb_gaspi_ctx_lock); return GASPI_ERROR; }
void gaspi_stats_start_timer(enum gaspi_timer t) { if( _timers[t].running ) { return; } lock_gaspi(&gaspi_stats_lock); _timers[t].tstart = gaspi_get_cycles(); _timers[t].running = 1; unlock_gaspi(&gaspi_stats_lock); }
gaspi_return_t pgaspi_proc_ping (const gaspi_rank_t rank, const gaspi_timeout_t timeout_ms) { gaspi_return_t eret = GASPI_ERROR; gaspi_verify_init("gaspi_proc_ping"); gaspi_verify_rank(rank); if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; eret = gaspi_sn_command(GASPI_SN_PROC_PING, rank, timeout_ms, NULL); unlock_gaspi (&glb_gaspi_ctx_lock); return eret; }
gaspi_return_t pgaspi_write_notify (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_write_notify"); gaspi_verify_local_off(offset_local, segment_id_local); gaspi_verify_remote_off(offset_remote, segment_id_remote, rank); gaspi_verify_queue(queue); gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C); if(notification_value == 0) return GASPI_ERR_INV_NOTIF_VAL; gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_write_notify(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size, notification_id, notification_value, queue); glb_gaspi_ctx.ne_count_c[queue] += 2; endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t gaspi_sn_ping (const gaspi_rank_t rank, const gaspi_timeout_t timeout_ms) { gaspi_return_t ret; gaspi_sn_packet snp; if (!glb_gaspi_init) return GASPI_ERROR; if (lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; snp.cmd = 2; ret = gaspi_call_sn_threadDG (rank, snp, GASPI_OP_TIMEOUT); unlock_gaspi (&glb_gaspi_ctx_lock); return ret; }
gaspi_return_t pgaspi_wait (const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_wait"); gaspi_verify_queue(queue); gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; eret = pgaspi_dev_wait(queue, &glb_gaspi_ctx.ne_count_c[queue], timeout_ms); unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t pgaspi_notify (const gaspi_segment_id_t segment_id_remote, const gaspi_rank_t rank, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_notify"); gaspi_verify_segment(segment_id_remote); gaspi_verify_null_ptr(glb_gaspi_ctx.rrmd[segment_id_remote]); gaspi_verify_rank(rank); gaspi_verify_queue(queue); if(notification_value == 0) return GASPI_ERR_INV_NOTIF_VAL; gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_notify(segment_id_remote, rank, notification_id, notification_value, queue); glb_gaspi_ctx.ne_count_c[queue]++; endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t pgaspi_passive_send (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_size_t size, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_passive_send"); gaspi_verify_local_off(offset_local, segment_id_local, size); gaspi_verify_comm_size(size, segment_id_local, segment_id_local, glb_gaspi_ctx.rank, GASPI_MAX_TSIZE_P); gaspi_verify_rank(rank); gaspi_return_t eret = GASPI_ERROR; if( lock_gaspi_tout (&glb_gaspi_ctx.lockPS, timeout_ms) ) { return GASPI_TIMEOUT; } if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if( eret != GASPI_SUCCESS ) { goto endL; } } eret = pgaspi_dev_passive_send(segment_id_local, offset_local, rank, size, glb_gaspi_ctx.ne_count_p, timeout_ms); if( eret == GASPI_ERROR ) { glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][rank] = GASPI_STATE_CORRUPT; } endL: unlock_gaspi (&glb_gaspi_ctx.lockPS); return eret; }
gaspi_return_t pgaspi_read (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { gaspi_verify_init("gaspi_read"); gaspi_verify_local_off(offset_local, segment_id_local); gaspi_verify_remote_off(offset_remote, segment_id_remote, rank); gaspi_verify_queue(queue); gaspi_verify_comm_size(size, segment_id_local, segment_id_remote, rank, GASPI_MAX_TSIZE_C); gaspi_return_t eret = GASPI_ERROR; if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; if( GASPI_ENDPOINT_DISCONNECTED == glb_gaspi_ctx.ep_conn[rank].cstat ) { eret = pgaspi_connect((gaspi_rank_t) rank, timeout_ms); if ( eret != GASPI_SUCCESS) { goto endL; } } eret = pgaspi_dev_read(segment_id_local, offset_local, rank, segment_id_remote,offset_remote, (unsigned int) size, queue); glb_gaspi_ctx.ne_count_c[queue]++; endL: unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return eret; }
gaspi_return_t pgaspi_proc_kill (const gaspi_rank_t rank,const gaspi_timeout_t timeout_ms) { gaspi_return_t eret = GASPI_ERROR; gaspi_verify_init("gaspi_proc_kill"); gaspi_verify_rank(rank); if( rank == glb_gaspi_ctx.rank ) { gaspi_print_error("Invalid rank to kill"); return GASPI_ERR_INV_RANK; } if(lock_gaspi_tout(&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; eret = gaspi_sn_command(GASPI_SN_PROC_KILL, rank, timeout_ms, NULL); unlock_gaspi(&glb_gaspi_ctx_lock); return eret; }
void gaspi_stats_stop_timer(enum gaspi_timer t) { gaspi_context_t const * const gctx = &glb_gaspi_ctx; if( !_timers[t].running ) { return; } lock_gaspi(&gaspi_stats_lock); _timers[t].tend = gaspi_get_cycles(); _timers[t].ttotal += (_timers[t].tend - _timers[t].tstart); _timers[t].ttotal_ms = (float) _timers[t].ttotal * gctx->cycles_to_msecs; _timers[t].running = 0; _timers[GASPI_ALL_TIMER].ttotal += (_timers[t].tend - _timers[t].tstart); _timers[GASPI_ALL_TIMER].ttotal_ms = (float) _timers[GASPI_ALL_TIMER].ttotal * gctx->cycles_to_msecs; _timers[GASPI_ALL_TIMER].running = 0; unlock_gaspi(&gaspi_stats_lock); }
gaspi_return_t pgaspi_gpu_write(const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { if( glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId < 0 || size <= GASPI_GPU_DIRECT_MAX ) { return gaspi_write(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size, queue, timeout_ms); } if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; char* host_ptr = (char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].host_ptr + NOTIFY_OFFSET + offset_local); char* device_ptr = (char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr + offset_local); gaspi_gpu* agpu = _gaspi_find_gpu(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId); if( !agpu ) { gaspi_print_error("No GPU found or not initialized (gaspi_init_GPUs)."); return GASPI_ERROR; } int size_left = size; int copy_size = 0; int gpu_offset = 0; const int BLOCK_SIZE = GASPI_GPU_BUFFERED; const gaspi_cycles_t s0 = gaspi_get_cycles (); while(size_left > 0) { int i; for(i = 0; i < GASPI_CUDA_EVENTS; i++) { if(size_left > BLOCK_SIZE) copy_size = BLOCK_SIZE; else copy_size = size_left; if( cudaMemcpyAsync(host_ptr + gpu_offset, device_ptr + gpu_offset, copy_size, cudaMemcpyDeviceToHost, agpu->streams[queue])) { unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } glb_gaspi_ctx.ne_count_c[queue]++; agpu->events[queue][i].segment_remote = segment_id_remote; agpu->events[queue][i].segment_local = segment_id_local; agpu->events[queue][i].size = copy_size; agpu->events[queue][i].rank = rank; agpu->events[queue][i].offset_local = offset_local+gpu_offset; agpu->events[queue][i].offset_remote = offset_remote+gpu_offset; agpu->events[queue][i].in_use =1; cudaError_t err = cudaEventRecord(agpu->events[queue][i].event, agpu->streams[queue]); if(err != cudaSuccess) { glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } gpu_offset += copy_size; size_left -= copy_size; if(size_left == 0) break; if(agpu->events[queue][i].ib_use) { struct ibv_wc wc; int ne; do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqC[queue], 1, &wc); glb_gaspi_ctx.ne_count_c[queue] -= ne; if (ne == 0) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } } while(ne==0); agpu->events[queue][i].ib_use = 0; } } for(i = 0; i < GASPI_CUDA_EVENTS; i++) { cudaError_t error; if ( agpu->events[queue][i].in_use == 1 ) { do { error = cudaEventQuery(agpu->events[queue][i].event ); if( cudaSuccess == error ) { if (_gaspi_event_send(&agpu->events[queue][i],queue)) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } agpu->events[queue][i].in_use = 0; } else if(error == cudaErrorNotReady) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } else { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } } while(error != cudaSuccess); } } } unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_SUCCESS; }
gaspi_return_t pgaspi_proc_init (const gaspi_timeout_t timeout_ms) { gaspi_return_t eret = GASPI_ERROR; int i; const int num_queues = (int) glb_gaspi_cfg.queue_num; if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; if(glb_gaspi_sn_init == 0) { glb_gaspi_ctx.lockPS.lock = 0; glb_gaspi_ctx.lockPR.lock = 0; for (i = 0; i < num_queues; i++) glb_gaspi_ctx.lockC[i].lock = 0; memset (&glb_gaspi_ctx, 0, sizeof (gaspi_context)); struct utsname mbuf; if (uname (&mbuf) == 0) { snprintf (glb_gaspi_ctx.mtyp, 64, "%s", mbuf.machine); } //timing glb_gaspi_ctx.mhz = gaspi_get_cpufreq (); if (glb_gaspi_ctx.mhz == 0.0f) { gaspi_print_error ("Failed to get CPU frequency"); goto errL; } glb_gaspi_ctx.cycles_to_msecs = 1.0f / (glb_gaspi_ctx.mhz * 1000.0f); //handle environment if(gaspi_handle_env(&glb_gaspi_ctx)) { gaspi_print_error("Failed to handle environment"); eret = GASPI_ERR_ENV; goto errL; } //start sn_backend if(pthread_create(&glb_gaspi_ctx.snt, NULL, gaspi_sn_backend, NULL) != 0) { gaspi_print_error("Failed to create SN thread"); goto errL; } glb_gaspi_sn_init = 1; }//glb_gaspi_sn_init if(glb_gaspi_ctx.procType == MASTER_PROC) { if(glb_gaspi_dev_init == 0) { if(access (glb_gaspi_ctx.mfile, R_OK) == -1) { gaspi_print_error ("Incorrect permissions of machinefile"); eret = GASPI_ERR_ENV; goto errL; } //read hostnames char *line = NULL; size_t len = 0; int read; FILE *fp = fopen (glb_gaspi_ctx.mfile, "r"); if (fp == NULL) { gaspi_print_error("Failed to open machinefile"); eret = GASPI_ERR_ENV; goto errL; } glb_gaspi_ctx.tnc = 0; while ((read = getline (&line, &len, fp)) != -1) { //we assume a single hostname per line if ((read < 2) || (read > 64)) continue; glb_gaspi_ctx.tnc++; if (glb_gaspi_ctx.tnc >= GASPI_MAX_NODES) break; } rewind (fp); free (glb_gaspi_ctx.hn_poff); glb_gaspi_ctx.hn_poff = (char *) calloc (glb_gaspi_ctx.tnc, 65); if(glb_gaspi_ctx.hn_poff == NULL) { gaspi_print_error("Debug: Failed to allocate memory"); goto errL; } glb_gaspi_ctx.poff = glb_gaspi_ctx.hn_poff + glb_gaspi_ctx.tnc * 64; int id = 0; while((read = getline (&line, &len, fp)) != -1) { //we assume a single hostname per line if((read < 2) || (read >= 64)) continue; int inList = 0; for(i = 0; i < id; i++) { //already in list ? //TODO: 64? 63? Magic numbers -> just get cacheline from system or define as such const int hnlen = MAX (strlen (glb_gaspi_ctx.hn_poff + i * 64), MIN (strlen (line) - 1, 63)); if(strncmp (glb_gaspi_ctx.hn_poff + i * 64, line, hnlen) == 0) { inList++; } } glb_gaspi_ctx.poff[id] = inList; strncpy (glb_gaspi_ctx.hn_poff + id * 64, line, MIN (read - 1, 63)); id++; if(id >= GASPI_MAX_NODES) break; } fclose (fp); free (line); //master glb_gaspi_ctx.rank = 0; free(glb_gaspi_ctx.sockfd); glb_gaspi_ctx.sockfd = (int *) malloc (glb_gaspi_ctx.tnc * sizeof (int)); if(glb_gaspi_ctx.sockfd == NULL) { gaspi_print_error("Failed to allocate memory"); eret = GASPI_ERR_MEMALLOC; goto errL; } for(i = 0; i < glb_gaspi_ctx.tnc; i++) glb_gaspi_ctx.sockfd[i] = -1; }//glb_gaspi_dev_init }//MASTER_PROC else if(glb_gaspi_ctx.procType != WORKER_PROC) { gaspi_print_error ("Invalid node type (GASPI_TYPE)"); eret = GASPI_ERR_ENV; goto errL; } if( 0 != gaspi_sn_broadcast_topology(&glb_gaspi_ctx, GASPI_BLOCK) ) { gaspi_print_error("Failed topology broadcast"); eret = GASPI_ERROR; goto errL; } if( (eret = pgaspi_init_core()) != GASPI_SUCCESS ) { goto errL; } /* Unleash SN thread */ __sync_fetch_and_add( &gaspi_master_topo_data, 1); gaspi_init_collectives(); glb_gaspi_init = 1; unlock_gaspi (&glb_gaspi_ctx_lock); if(glb_gaspi_cfg.build_infrastructure) { /* configuration tells us to pre-connect */ if( GASPI_TOPOLOGY_STATIC == glb_gaspi_cfg.build_infrastructure ) { for(i = glb_gaspi_ctx.rank; i >= 0; i--) { if( (eret = pgaspi_connect((gaspi_rank_t) i, timeout_ms)) != GASPI_SUCCESS ) { goto errL; } } } eret = pgaspi_group_all_local_create(timeout_ms); if(eret == GASPI_SUCCESS) { eret = gaspi_barrier(GASPI_GROUP_ALL, timeout_ms); } else { gaspi_print_error("Failed to create GASPI_GROUP_ALL."); } } else /* dont build_infrastructure */ { /* just reserve GASPI_GROUP_ALL */ glb_gaspi_ctx.group_cnt = 1; glb_gaspi_group_ctx[GASPI_GROUP_ALL].id = -2;//disable eret = GASPI_SUCCESS; } #ifdef GPI2_CUDA /* init GPU counts */ glb_gaspi_ctx.use_gpus = 0; glb_gaspi_ctx.gpu_count = 0; #endif return eret; errL: unlock_gaspi (&glb_gaspi_ctx_lock); return eret; }
gaspi_return_t pgaspi_gpu_write_notify(const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const gaspi_size_t size, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue, const gaspi_timeout_t timeout_ms) { if(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId < 0 || size <= GASPI_GPU_DIRECT_MAX ) { return gaspi_write_notify(segment_id_local, offset_local, rank, segment_id_remote, offset_remote, size,notification_id, notification_value, queue, timeout_ms); } if(lock_gaspi_tout (&glb_gaspi_ctx.lockC[queue], timeout_ms)) return GASPI_TIMEOUT; char *host_ptr = (char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].host_ptr+NOTIFY_OFFSET+offset_local); char* device_ptr =(char*)(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr+offset_local); gaspi_gpu* agpu = _gaspi_find_gpu(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId); if( !agpu ) { gaspi_print_error("No GPU found or not initialized (gaspi_init_GPUs)."); unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } int copy_size = 0; int gpu_offset = 0; int size_left = size; int BLOCK_SIZE= GASPI_GPU_BUFFERED; const gaspi_cycles_t s0 = gaspi_get_cycles (); while(size_left > 0) { int i; for(i = 0; i < GASPI_CUDA_EVENTS; i++) { if(size_left > BLOCK_SIZE) copy_size = BLOCK_SIZE; else copy_size = size_left; if(cudaMemcpyAsync(host_ptr+gpu_offset, device_ptr + gpu_offset, copy_size, cudaMemcpyDeviceToHost, agpu->streams[queue])) { unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } glb_gaspi_ctx.ne_count_c[queue]++; agpu->events[queue][i].segment_remote = segment_id_remote; agpu->events[queue][i].segment_local = segment_id_local; agpu->events[queue][i].size = copy_size; agpu->events[queue][i].rank = rank; agpu->events[queue][i].offset_local = offset_local+gpu_offset; agpu->events[queue][i].offset_remote = offset_remote+gpu_offset; agpu->events[queue][i].in_use = 1; cudaError_t err = cudaEventRecord(agpu->events[queue][i].event,agpu->streams[queue]); if(err != cudaSuccess) { unlock_gaspi(&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } /* Thats not beautiful at all, however, else we have a overflow soon in the queue */ if(agpu->events[queue][i].ib_use) { struct ibv_wc wc; int ne; do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqC[queue], 1, &wc); glb_gaspi_ctx.ne_count_c[queue] -= ne; if (ne == 0) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } } while(ne == 0); agpu->events[queue][i].ib_use = 0; } gpu_offset += copy_size; size_left -= copy_size; if(size_left == 0) break; } for(i = 0; i < GASPI_CUDA_EVENTS; i++) { cudaError_t error; if (agpu->events[queue][i].in_use == 1 ) { do { error = cudaEventQuery(agpu->events[queue][i].event ); if( cudaSuccess == error ) { if (_gaspi_event_send(&agpu->events[queue][i],queue) ) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } agpu->events[queue][i].in_use = 0; } else if(error == cudaErrorNotReady) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_TIMEOUT; } } else { unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } } while(error != cudaSuccess); } } } struct ibv_send_wr *bad_wr; struct ibv_sge slistN; struct ibv_send_wr swrN; slistN.addr = (uintptr_t)(glb_gaspi_ctx.nsrc.buf + notification_id * sizeof(gaspi_notification_id_t)); *((unsigned int *) slistN.addr) = notification_value; slistN.length = sizeof(gaspi_notification_id_t); slistN.lkey =((struct ibv_mr *) glb_gaspi_ctx.nsrc.mr)->lkey; if((glb_gaspi_ctx.rrmd[segment_id_remote][rank].cudaDevId >= 0)) { swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_addr + notification_id * sizeof(gaspi_notification_id_t)); swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_rkey; } else { swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr + notification_id * sizeof(gaspi_notification_id_t)); swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].rkey; } swrN.sg_list = &slistN; swrN.num_sge = 1; swrN.wr_id = rank; swrN.opcode = IBV_WR_RDMA_WRITE; swrN.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;; swrN.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpC[queue][rank], &swrN, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_ERROR; } glb_gaspi_ctx.ne_count_c[queue]++; unlock_gaspi (&glb_gaspi_ctx.lockC[queue]); return GASPI_SUCCESS; }
gaspi_return_t pgaspi_passive_receive (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, gaspi_rank_t * const rem_rank, const gaspi_size_t size, const gaspi_timeout_t timeout_ms) { #ifdef DEBUG if (glb_gaspi_ctx_ib.rrmd[segment_id_local] == NULL) { gaspi_printf("Debug: Invalid local segment (gaspi_passive_receive)\n"); return GASPI_ERROR; } if( rem_rank == NULL) { gaspi_printf("Debug: Invalid pointer parameter: rem_rank (gaspi_passive_receive)\n"); return GASPI_ERROR; } if( offset_local > glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].size) { gaspi_printf("Debug: Invalid offsets (gaspi_passive_receive)\n"); return GASPI_ERROR; } if( size < 1 || size > GASPI_MAX_TSIZE_P ) { gaspi_printf("Debug: Invalid size (gaspi_passive_receive)\n"); return GASPI_ERROR; } #endif struct ibv_recv_wr *bad_wr; struct ibv_wc wc_recv; struct ibv_sge rlist; struct ibv_recv_wr rwr; struct ibv_cq *ev_cq; void *ev_ctx; int i; fd_set rfds; struct timeval tout; lock_gaspi_tout (&glb_gaspi_ctx.lockPR, timeout_ms); rlist.addr = (uintptr_t) (glb_gaspi_ctx_ib. rrmd[segment_id_local][glb_gaspi_ctx.rank].addr + NOTIFY_OFFSET + offset_local); rlist.length = size; rlist.lkey = glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].mr->lkey; rwr.wr_id = glb_gaspi_ctx.rank; rwr.sg_list = &rlist; rwr.num_sge = 1; rwr.next = NULL; if (ibv_post_srq_recv (glb_gaspi_ctx_ib.srqP, &rwr, &bad_wr)) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } FD_ZERO (&rfds); FD_SET (glb_gaspi_ctx_ib.channelP->fd, &rfds); const long ts = (timeout_ms / 1000); const long tus = (timeout_ms - ts * 1000) * 1000; tout.tv_sec = ts; tout.tv_usec = tus; const int selret = select (FD_SETSIZE, &rfds, NULL, NULL, &tout); if (selret < 0) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } else if (selret == 0) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_TIMEOUT; } if (ibv_get_cq_event (glb_gaspi_ctx_ib.channelP, &ev_cq, &ev_ctx)) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } ibv_ack_cq_events (ev_cq, 1); if (ev_cq != glb_gaspi_ctx_ib.rcqP) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } if (ibv_req_notify_cq (glb_gaspi_ctx_ib.rcqP, 0)) { unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } int ne = 0; do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.rcqP, 1, &wc_recv); } while (ne == 0); if ((ne < 0) || (wc_recv.status != IBV_WC_SUCCESS)) { glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][wc_recv.wr_id] = 1; unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_ERROR; } *rem_rank = 0xffff; for (i = 0; i < glb_gaspi_ctx.tnc; i++) { if (glb_gaspi_ctx_ib.qpP[i]->qp_num == wc_recv.qp_num) { *rem_rank = i; break; } } unlock_gaspi (&glb_gaspi_ctx.lockPR); return GASPI_SUCCESS; }
void *gaspi_sn_backend(void *arg) { int esock, lsock, n, i; struct epoll_event ev; struct epoll_event *ret_ev; gaspi_mgmt_header *ev_mgmt, *mgmt; signal(SIGSTKFLT, gaspi_sn_cleanup); signal(SIGPIPE, SIG_IGN); while(gaspi_master_topo_data == 0) gaspi_delay(); lsock = socket(AF_INET, SOCK_STREAM, 0); if(lsock < 0) { gaspi_print_error("Failed to create socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; return NULL; } if( 0 != gaspi_sn_set_default_opts(lsock) ) { gaspi_print_error("Failed to modify socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } signal(SIGPIPE, SIG_IGN); struct sockaddr_in listeningAddress; listeningAddress.sin_family = AF_INET; listeningAddress.sin_port = htons((glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket)); listeningAddress.sin_addr.s_addr = htonl(INADDR_ANY); if(bind(lsock, (struct sockaddr*)(&listeningAddress), sizeof(listeningAddress)) < 0) { gaspi_print_error("Failed to bind socket (port %d)", glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERR_SN_PORT; close(lsock); return NULL; } if ( 0 != gaspi_sn_set_non_blocking(lsock) ) { gaspi_print_error("Failed to set socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } if(listen(lsock, SOMAXCONN) < 0) { gaspi_print_error("Failed to listen on socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } esock = epoll_create(GASPI_EPOLL_CREATE); if(esock < 0) { gaspi_print_error("Failed to create IO event facility"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } /* add lsock to epoll instance */ ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) ); if(ev.data.ptr == NULL) { gaspi_print_error("Failed to allocate memory"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } ev_mgmt = ev.data.ptr; ev_mgmt->fd = lsock; ev.events = EPOLLIN; if(epoll_ctl(esock, EPOLL_CTL_ADD, lsock, &ev) < 0) { gaspi_print_error("Failed to modify IO event facility"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } ret_ev = calloc(GASPI_EPOLL_MAX_EVENTS, sizeof(ev)); if(ret_ev == NULL) { gaspi_print_error("Failed to allocate memory"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } /* main events loop */ while(1) { n = epoll_wait(esock,ret_ev, GASPI_EPOLL_MAX_EVENTS, -1); /* loop over all triggered events */ for( i = 0; i < n; i++ ) { mgmt = ret_ev[i].data.ptr; if( (ret_ev[i].events & EPOLLERR) || (ret_ev[i].events & EPOLLHUP) || !((ret_ev[i].events & EPOLLIN) || (ret_ev[i].events & EPOLLOUT )) ) { /* an error has occured on this fd. close it => removed from event list. */ gaspi_print_error( "Erroneous event." ); shutdown(mgmt->fd, SHUT_RDWR); close(mgmt->fd); free(mgmt); continue; } else if(mgmt->fd == lsock) { /* process all new connections */ struct sockaddr in_addr; socklen_t in_len = sizeof(in_addr); int nsock = accept( lsock, &in_addr, &in_len ); if(nsock < 0) { if( (errno == EAGAIN) || (errno == EWOULDBLOCK) ) { /* we have processed incoming connection */ break; } else { /* at least check/fix open files limit */ int errsv = errno; if(errsv == EMFILE) { if( 0 == _gaspi_check_ofile_limit() ) { nsock = accept( lsock, &in_addr, &in_len ); } } /* still erroneous? => makes no sense to continue */ if(nsock < 0) { gaspi_print_error( "Failed to accept connection." ); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } } } /* new socket */ if( 0 != gaspi_sn_set_non_blocking( nsock ) ) { gaspi_print_error( "Failed to set socket options." ); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(nsock); return NULL; } /* add nsock */ ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) ); if(ev.data.ptr == NULL) { gaspi_print_error("Failed to allocate memory."); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(nsock); return NULL; } ev_mgmt = ev.data.ptr; ev_mgmt->fd = nsock; ev_mgmt->blen = sizeof(gaspi_cd_header); ev_mgmt->bdone = 0; ev_mgmt->op = GASPI_SN_HEADER; ev.events = EPOLLIN ; /* read only */ if(epoll_ctl( esock, EPOLL_CTL_ADD, nsock, &ev ) < 0) { gaspi_print_error("Failed to modify IO event facility"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(nsock); return NULL; } continue; }/* if new connection(s) */ else { /* read or write ops */ int io_err = 0; if( ret_ev[i].events & EPOLLIN ) { while( 1 ) { int rcount = 0; int rsize = mgmt->blen - mgmt->bdone; char *ptr = NULL; if( mgmt->op == GASPI_SN_HEADER ) { /* TODO: is it valid? */ ptr = (char *) &mgmt->cdh; rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize ); } else if( mgmt->op == GASPI_SN_CONNECT ) { while( !glb_gaspi_dev_init ) gaspi_delay(); ptr = pgaspi_dev_get_rrcd(mgmt->cdh.rank); rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize ); } /* errno==EAGAIN => we have read all data */ int errsv = errno; if(rcount < 0) { if (errsv == ECONNRESET || errsv == ENOTCONN) { gaspi_print_error(" Failed to read (op %d)", mgmt->op); } if(errsv != EAGAIN || errsv != EWOULDBLOCK) { gaspi_print_error(" Failed to read (op %d).", mgmt->op); io_err = 1; } break; } else if(rcount == 0) /* the remote side has closed the connection */ { io_err = 1; break; } else { mgmt->bdone += rcount; /* read all data? */ if(mgmt->bdone == mgmt->blen) { /* we got header, what do we have to do ? */ if(mgmt->op == GASPI_SN_HEADER) { if(mgmt->cdh.op == GASPI_SN_PROC_KILL) { _exit(-1); } else if(mgmt->cdh.op == GASPI_SN_CONNECT) { GASPI_SN_RESET_EVENT( mgmt, mgmt->cdh.op_len, mgmt->cdh.op ); } else if(mgmt->cdh.op == GASPI_SN_PROC_PING) { GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else if(mgmt->cdh.op == GASPI_SN_GRP_CHECK) { struct{gaspi_group_t group;int tnc, cs, ret;} gb; memset(&gb, 0, sizeof(gb)); gb.ret = -1; gb.cs = 0; const int group = mgmt->cdh.rank; const int tnc = mgmt->cdh.tnc; lock_gaspi_tout (&glb_gaspi_group_ctx[group].del, GASPI_BLOCK); if(glb_gaspi_group_ctx[group].id >= 0) { if(glb_gaspi_group_ctx[group].tnc == tnc) { int i; gb.ret = 0; gb.tnc = tnc; for(i = 0; i < tnc; i++) { if( NULL != glb_gaspi_group_ctx[group].rank_grp ) gb.cs ^= glb_gaspi_group_ctx[group].rank_grp[i]; } } } unlock_gaspi (&glb_gaspi_group_ctx[group].del); if(gaspi_sn_writen( mgmt->fd, &gb, sizeof(gb) ) < sizeof(gb) ) { gaspi_print_error("Failed response to group check."); io_err = 1; break; } GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else if(mgmt->cdh.op == GASPI_SN_GRP_CONNECT) { while( !glb_gaspi_dev_init || ( glb_gaspi_group_ctx[mgmt->cdh.ret].id == -1) ) gaspi_delay(); /* TODO: check the pointer */ if(gaspi_sn_writen( mgmt->fd, &glb_gaspi_group_ctx[mgmt->cdh.ret].rrcd[glb_gaspi_ctx.rank], sizeof(gaspi_rc_mseg) ) < sizeof(gaspi_rc_mseg) ) { gaspi_print_error("Failed to connect group."); io_err = 1; break; } GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else if(mgmt->cdh.op == GASPI_SN_SEG_REGISTER) { int rret = gaspi_sn_segment_register(mgmt->cdh); /* write back result of registration */ if(gaspi_sn_writen( mgmt->fd, &rret, sizeof(int) ) < sizeof(int) ) { gaspi_print_error("Failed response to segment register."); io_err = 1; break; } GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } }/* !header */ else if(mgmt->op == GASPI_SN_CONNECT) { /* TODO: to remove */ while( !glb_gaspi_dev_init ) gaspi_delay(); const size_t len = pgaspi_dev_get_sizeof_rc(); char *ptr = NULL; gaspi_return_t eret = pgaspi_create_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK); if( eret == GASPI_SUCCESS ) { eret = pgaspi_connect_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK); if( eret == GASPI_SUCCESS) { ptr = pgaspi_dev_get_lrcd(mgmt->cdh.rank); } } if( eret != GASPI_SUCCESS ) { /* We set io_err, connection is closed and remote peer reads EOF */ io_err = 1; } else { if( NULL != ptr ) { if( gaspi_sn_writen( mgmt->fd, ptr, len ) < sizeof(len) ) { gaspi_print_error("Failed response to connection request from %u.", mgmt->cdh.rank); io_err = 1; } } } GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else { gaspi_print_error("Received unknown SN operation"); GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } break; } /* if all data */ }/* else */ }/* while(1) read */ }/* read in */ if( io_err ) { shutdown(mgmt->fd, SHUT_RDWR); close(mgmt->fd); free(mgmt); } } } /* for each event */ }/* event loop while(1) */ return NULL; }
gaspi_return_t pgaspi_passive_send (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_size_t size, const gaspi_timeout_t timeout_ms) { #ifdef DEBUG if (glb_gaspi_ctx_ib.rrmd[segment_id_local] == NULL) { gaspi_printf("Debug: Invalid local segment (gaspi_passive_send)\n"); return GASPI_ERROR; } if( rank >= glb_gaspi_ctx.tnc) { gaspi_printf("Debug: Invalid rank (gaspi_passive_send)\n"); return GASPI_ERROR; } if( offset_local > glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].size) { gaspi_printf("Debug: Invalid offsets (gaspi_passive_send)\n"); return GASPI_ERROR; } if( size < 1 || size > GASPI_MAX_TSIZE_P ) { gaspi_printf("Debug: Invalid size (gaspi_passive_send)\n"); return GASPI_ERROR; } #endif struct ibv_send_wr *bad_wr; struct ibv_sge slist; struct ibv_send_wr swr; struct ibv_wc wc_send; gaspi_cycles_t s0; lock_gaspi_tout (&glb_gaspi_ctx.lockPS, timeout_ms); const int byte_id = rank >> 3; const int bit_pos = rank - (byte_id * 8); const unsigned char bit_cmp = 1 << bit_pos; if (glb_gaspi_ctx_ib.ne_count_p[byte_id] & bit_cmp) goto checkL; slist.addr = (uintptr_t) (glb_gaspi_ctx_ib. rrmd[segment_id_local][glb_gaspi_ctx.rank].addr + NOTIFY_OFFSET + offset_local); slist.length = size; slist.lkey = glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].mr->lkey; swr.sg_list = &slist; swr.num_sge = 1; swr.opcode = IBV_WR_SEND; swr.wr_id = rank; swr.send_flags = IBV_SEND_SIGNALED; swr.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpP[rank], &swr, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][rank] = 1; unlock_gaspi (&glb_gaspi_ctx.lockPS); return GASPI_ERROR; } glb_gaspi_ctx_ib.ne_count_p[byte_id] |= bit_cmp; checkL: s0 = gaspi_get_cycles (); int ne = 0; do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqP, 1, &wc_send); if (ne == 0) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { unlock_gaspi (&glb_gaspi_ctx.lockPS); return GASPI_TIMEOUT; } } } while (ne == 0); if ((ne < 0) || (wc_send.status != IBV_WC_SUCCESS)) { glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][wc_send.wr_id] = 1; unlock_gaspi (&glb_gaspi_ctx.lockPS); return GASPI_ERROR; } glb_gaspi_ctx_ib.ne_count_p[byte_id] &= (~bit_cmp); unlock_gaspi (&glb_gaspi_ctx.lockPS); return GASPI_SUCCESS; }
gaspi_return_t pgaspi_atomic_fetch_add (const gaspi_segment_id_t segment_id, const gaspi_offset_t offset, const gaspi_rank_t rank, const gaspi_atomic_value_t val_add, gaspi_atomic_value_t * const val_old, const gaspi_timeout_t timeout_ms) { #ifdef DEBUG if (glb_gaspi_ctx_ib.rrmd[segment_id] == NULL) { gaspi_printf("Debug: Invalid segment (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } if( rank >= glb_gaspi_ctx.tnc) { gaspi_printf("Debug: Invalid rank (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } if( offset > glb_gaspi_ctx_ib.rrmd[segment_id][rank].size) { gaspi_printf("Debug: Invalid offsets (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } if( val_old == NULL) { gaspi_printf("Debug: Invalid pointer in parameter val_old (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } #endif struct ibv_send_wr *bad_wr; struct ibv_sge slist; struct ibv_send_wr swr; int i; if (offset & 0x7) { gaspi_print_error("Unaligned offset"); return GASPI_ERROR; } lock_gaspi_tout (&glb_gaspi_group_ib[0].gl, timeout_ms); slist.addr = (uintptr_t) (glb_gaspi_group_ib[0].buf + NEXT_OFFSET); slist.length = 8; slist.lkey = glb_gaspi_group_ib[0].mr->lkey; swr.wr.atomic.remote_addr = glb_gaspi_ctx_ib.rrmd[segment_id][rank].addr + NOTIFY_OFFSET + offset; swr.wr.atomic.rkey = glb_gaspi_ctx_ib.rrmd[segment_id][rank].rkey; swr.wr.atomic.compare_add = val_add; swr.wr_id = rank; swr.sg_list = &slist; swr.num_sge = 1; swr.opcode = IBV_WR_ATOMIC_FETCH_AND_ADD; swr.send_flags = IBV_SEND_SIGNALED; swr.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpGroups[rank], &swr, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[GASPI_COLL_QP][rank] = 1; unlock_gaspi (&glb_gaspi_group_ib[0].gl); return GASPI_ERROR; } glb_gaspi_ctx_ib.ne_count_grp++; int ne = 0; for (i = 0; i < glb_gaspi_ctx_ib.ne_count_grp; i++) { do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqGroups, 1, glb_gaspi_ctx_ib.wc_grp_send); } while (ne == 0); if ((ne < 0) || (glb_gaspi_ctx_ib.wc_grp_send[i].status != IBV_WC_SUCCESS)) { glb_gaspi_ctx. qp_state_vec[GASPI_COLL_QP][glb_gaspi_ctx_ib.wc_grp_send[i]. wr_id] = 1; unlock_gaspi (&glb_gaspi_group_ib[0].gl); return GASPI_ERROR; } } glb_gaspi_ctx_ib.ne_count_grp = 0; *val_old = *((gaspi_atomic_value_t *) (glb_gaspi_group_ib[0].buf + NEXT_OFFSET)); unlock_gaspi (&glb_gaspi_group_ib[0].gl); return GASPI_SUCCESS; }