gaspi_return_t pgaspi_set_socket_affinity (const gaspi_uchar socket) { cpu_set_t sock_mask; if (socket >= 4) { gaspi_print_error("GPI-2 only allows up to a maximum of 4 NUMA sockets"); return GASPI_ERROR; } if (gaspi_get_affinity_mask (socket, &sock_mask) < 0) { gaspi_print_error ("Failed to get affinity mask"); return GASPI_ERROR; } else { if (sched_setaffinity (0, sizeof (cpu_set_t), &sock_mask) != 0) { gaspi_print_error ("Failed to set affinity"); return GASPI_ERROR; } } return GASPI_SUCCESS; }
static inline int _gaspi_sn_segment_register_command(const gaspi_rank_t rank, void * arg) { gaspi_segment_id_t segment_id = * (gaspi_segment_id_t *) arg; gaspi_cd_header cdh; memset(&cdh, 0, sizeof(gaspi_cd_header)); cdh.op_len = 0; /* in-place */ cdh.op = GASPI_SN_SEG_REGISTER; cdh.rank = glb_gaspi_ctx.rank; cdh.seg_id = segment_id; cdh.rkey = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].rkey; cdh.addr = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].addr; cdh.size = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].size; #ifdef GPI2_CUDA cdh.host_rkey = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].host_rkey; cdh.host_addr = glb_gaspi_ctx.rrmd[segment_id][glb_gaspi_ctx.rank].host_addr; #endif ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[rank], &cdh, sizeof(gaspi_cd_header)); if(ret != sizeof(gaspi_cd_header)) { gaspi_print_error("Failed to write to rank %u (args: %d %p %lu)", rank, glb_gaspi_ctx.sockfd[rank], &cdh, sizeof(gaspi_cd_header)); return -1; } int result = 1; ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[rank], &result, sizeof(int)); if( rret != sizeof(int) ) { gaspi_print_error("Failed to read from rank %u (args: %d %p %lu)", rank, glb_gaspi_ctx.sockfd[rank], &rret, sizeof(int)); return -1; } /* Registration failed on the remote side */ if( result != 0) return -1; return 0; }
/* TODO: deal with timeout */ int gaspi_sn_broadcast_topology(gaspi_context *ctx, const gaspi_timeout_t timeout_ms) { int mask = 0x1; int relative_rank; int dst, src; const int root = 0; relative_rank = (ctx->rank >= root) ? ctx->rank - root : ctx->rank - root + ctx->tnc; while(mask <= ctx->tnc) { if(relative_rank & mask) { src = ctx->rank - mask; if(src < 0) src += ctx->tnc; if(gaspi_sn_recv_topology(ctx) != 0) { gaspi_print_error("Failed to receive topology."); return -1; } break; } mask <<=1; } mask >>=1; while (mask > 0) { if(relative_rank + mask < ctx->tnc) { dst = ctx->rank + mask; if(dst >= ctx->tnc) dst -= ctx->tnc; if(gaspi_sn_send_topology(ctx, dst, timeout_ms) != 0) { gaspi_print_error("Failed to send topology to %d", dst); return -1; } } mask >>=1; } return 0; }
static int gaspi_sn_connect2port_intern(const char *hn, const unsigned short port) { int ret; int sockfd = -1; struct sockaddr_in Host; struct hostent *serverData; sockfd = socket ( AF_INET, SOCK_STREAM, 0 ); if( -1 == sockfd ) { /* at least deal with open files limit */ int errsv = errno; if(errsv == EMFILE) { if( 0 == _gaspi_check_ofile_limit() ) { sockfd = socket(AF_INET,SOCK_STREAM,0); if(sockfd == -1) return -1; } else return -2; } else return -1; } Host.sin_family = AF_INET; Host.sin_port = htons(port); if((serverData = gethostbyname(hn)) == NULL) { close(sockfd); return -1; } memcpy(&Host.sin_addr, serverData->h_addr, serverData->h_length); /* TODO: we need to be able to distinguish between an initialization connection attemp and a connection attempt during run-time where the remote node is gone (FT) */ ret = connect( sockfd, (struct sockaddr *) &Host, sizeof(Host) ); if( 0 != ret ) { close( sockfd ); return -1; } if( 0 != gaspi_sn_set_default_opts(sockfd) ) { gaspi_print_error("Failed to set options on socket"); close(sockfd); return -1; } return sockfd; }
int gaspi_sn_set_default_opts(int sockfd) { int opt = 1; if(setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) { gaspi_print_error("Failed to set options on socket"); return -1; } if(setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)) < 0) { gaspi_print_error("Failed to set options on socket"); return -1; } return 0; }
gaspi_return_t pgaspi_dev_wait (const gaspi_queue_id_t queue, int * counter, const gaspi_timeout_t timeout_ms) { int ne = 0, i; struct ibv_wc wc; const int nr = *counter; const gaspi_cycles_t s0 = gaspi_get_cycles (); for (i = 0; i < nr; i++) { do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqC[queue], 1, &wc); *counter -= ne; if (ne == 0) { const gaspi_cycles_t s1 = gaspi_get_cycles (); const gaspi_cycles_t tdelta = s1 - s0; const float ms = (float) tdelta * glb_gaspi_ctx.cycles_to_msecs; if (ms > timeout_ms) { return GASPI_TIMEOUT; } } } while (ne == 0); if ((ne < 0) || (wc.status != IBV_WC_SUCCESS)) { gaspi_print_error("Failed request to %lu. Queue %d might be broken %s", wc.wr_id, queue, ibv_wc_status_str(wc.status) ); glb_gaspi_ctx.qp_state_vec[queue][wc.wr_id] = GASPI_STATE_CORRUPT; return GASPI_ERROR; } } #ifdef GPI2_CUDA int j,k; for(k = 0;k < glb_gaspi_ctx.gpu_count; k++) { for(j = 0; j < GASPI_CUDA_EVENTS; j++) gpus[k].events[queue][j].ib_use = 0; } #endif return GASPI_SUCCESS; }
static inline int _gaspi_sn_group_connect(const gaspi_rank_t rank, void *arg) { int i = (int) rank; gaspi_group_t group = *(gaspi_group_t *) arg; gaspi_group_ctx *group_to_commit = &(glb_gaspi_group_ctx[group]); gaspi_cd_header cdh; memset(&cdh, 0, sizeof(gaspi_cd_header)); cdh.op_len = sizeof(gaspi_rc_mseg); cdh.op = GASPI_SN_GRP_CONNECT; cdh.rank = glb_gaspi_ctx.rank; cdh.ret = group; ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header)); if( ret != sizeof(gaspi_cd_header) ) { gaspi_print_error("Failed to write to %u (%ld %d %p %lu)", i, ret, glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header)); return -1; } ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[i], &group_to_commit->rrcd[i], sizeof(gaspi_rc_mseg)); if( rret != sizeof(gaspi_rc_mseg) ) { gaspi_print_error("Failed to read from %d (%ld %d %p %lu)", i, ret, glb_gaspi_ctx.sockfd[i], &group_to_commit->rrcd[i], sizeof(gaspi_rc_mseg)); return -1; } return 0; }
static inline int _gaspi_sn_connect_command(const gaspi_rank_t rank) { const int i = (int) rank; gaspi_cd_header cdh; memset(&cdh, 0, sizeof(gaspi_cd_header)); const size_t rc_size = pgaspi_dev_get_sizeof_rc(); cdh.op_len = (int) rc_size; cdh.op = GASPI_SN_CONNECT; cdh.rank = glb_gaspi_ctx.rank; /* if we have something to exchange */ if(rc_size > 0 ) { ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header)); if(ret != sizeof(gaspi_cd_header)) { gaspi_print_error("Failed to write to %d", i); return -1; } ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], pgaspi_dev_get_lrcd(i), rc_size); if(ret != (ssize_t) rc_size) { gaspi_print_error("Failed to write to %d", i); return -1; } ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[i], pgaspi_dev_get_rrcd(i), rc_size); if( rret != (ssize_t) rc_size ) { gaspi_print_error("Failed to read from %d", i); return -1; } } return 0; }
gaspi_return_t gaspi_number_of_GPUs(gaspi_gpu_num *gpus) { gaspi_verify_init("gaspi_number_of_GPUs"); gaspi_verify_null_ptr(gpus); if( 0 == glb_gaspi_ctx.use_gpus ) { gaspi_print_error("GPUs are not initialized."); return GASPI_ERROR; } *gpus = glb_gaspi_ctx.gpu_count; return GASPI_SUCCESS; }
gaspi_return_t gaspi_gpu_number(gaspi_number_t* num_gpus) { gaspi_verify_init("gaspi_gpu_number"); gaspi_verify_null_ptr(num_gpus); gaspi_context_t const * const gctx = &glb_gaspi_ctx; if( 0 == gctx->use_gpus ) { gaspi_print_error("GPUs are not initialized."); return GASPI_ERROR; } *num_gpus = gctx->gpu_count; return GASPI_SUCCESS; }
static int _gaspi_find_GPU_numa_node(int cudevice) { CUresult cres; int domain, bus, dev; char path[128]; FILE *sysfile = NULL; domain = 0; #ifdef CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID cres = cuDeviceGetAttribute(&domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice); if( CUDA_SUCCESS != cres ) { errno = ENOSYS; return -1; } #endif cres = cuDeviceGetAttribute(&bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice); if( CUDA_SUCCESS != cres ) { return -1; } cres = cuDeviceGetAttribute(&dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice); if( CUDA_SUCCESS != cres ) { return -1; } sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/numa_node", domain, bus, dev); sysfile = fopen(path, "r"); if( !sysfile ) { gaspi_print_error("Failed to open %s.", path); return -1; } int numa_node = -1; fscanf (sysfile, "%1d", &numa_node); fclose(sysfile); return numa_node; }
/* TODO: Not clear to me why we need this function */ gaspi_return_t gaspi_GPU_ids(gaspi_gpu_t *gpu_ids) { gaspi_verify_init("gaspi_GPU_ids"); gaspi_verify_null_ptr(gpu_ids); if( 0 == glb_gaspi_ctx.use_gpus ) { gaspi_print_error("GPUs are not initialized."); return GASPI_ERROR; } int i; for (i = 0; i < glb_gaspi_ctx.gpu_count; i++) gpu_ids[i] = gpus[i].device_id; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_numa_socket(gaspi_uchar * const socket) { char * numaPtr = getenv ("GASPI_SET_NUMA_SOCKET"); if(numaPtr) { if(atoi(numaPtr) == 1) { *socket = (gaspi_uchar) glb_gaspi_ctx.localSocket; return GASPI_SUCCESS; } } gaspi_print_error("NUMA was not enabled (-N option of gaspi_run)"); return GASPI_ERR_ENV; }
gaspi_return_t pgaspi_proc_term (const gaspi_timeout_t timeout) { int i; gaspi_verify_init("gaspi_proc_term"); if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout)) return GASPI_TIMEOUT; pthread_kill(glb_gaspi_ctx.snt, SIGSTKFLT); if(glb_gaspi_ctx.sockfd != NULL) { for(i = 0;i < glb_gaspi_ctx.tnc; i++) { shutdown(glb_gaspi_ctx.sockfd[i],2); if(glb_gaspi_ctx.sockfd[i] > 0) close(glb_gaspi_ctx.sockfd[i]); } free(glb_gaspi_ctx.sockfd); } #ifdef GPI2_WITH_MPI if(glb_gaspi_ctx.rank == 0) { if(remove(glb_gaspi_ctx.mfile) < 0) { gaspi_print_error("Failed to remove tmp file (%s)", glb_gaspi_ctx.mfile); } } #endif if(pgaspi_cleanup_core() != GASPI_SUCCESS) goto errL; unlock_gaspi (&glb_gaspi_ctx_lock); return GASPI_SUCCESS; errL: unlock_gaspi (&glb_gaspi_ctx_lock); return GASPI_ERROR; }
gaspi_return_t pgaspi_cpu_frequency (gaspi_float * const cpu_mhz) { gaspi_verify_null_ptr(cpu_mhz); if (!glb_gaspi_init) { *cpu_mhz = gaspi_get_cpufreq (); } else { *cpu_mhz = glb_gaspi_ctx.mhz; } if (*cpu_mhz == 0.0f) { gaspi_print_error ("Failed to get CPU frequency"); return GASPI_ERROR; } return GASPI_SUCCESS; }
static int _gaspi_find_GPU_ib_numa_node() { char path[128]; int numa_node; FILE *sysfile = NULL; sprintf(path, "/sys/class/infiniband/%s/device/numa_node", ibv_get_device_name(glb_gaspi_ctx_ib.ib_dev)); sysfile = fopen(path, "r"); if (!sysfile) { gaspi_print_error("Failed to open %s.", path); return -1; } fscanf (sysfile, "%1d", &numa_node); fclose(sysfile); return numa_node; }
gaspi_return_t pgaspi_proc_kill (const gaspi_rank_t rank,const gaspi_timeout_t timeout_ms) { gaspi_return_t eret = GASPI_ERROR; gaspi_verify_init("gaspi_proc_kill"); gaspi_verify_rank(rank); if( rank == glb_gaspi_ctx.rank ) { gaspi_print_error("Invalid rank to kill"); return GASPI_ERR_INV_RANK; } if(lock_gaspi_tout(&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; eret = gaspi_sn_command(GASPI_SN_PROC_KILL, rank, timeout_ms, NULL); unlock_gaspi(&glb_gaspi_ctx_lock); return eret; }
/* TODO: Do we really need this function or at least make it part of the GPU interface and allow clients to use it? */ gaspi_return_t gaspi_gpu_ids(gaspi_gpu_id_t* gpu_ids) { gaspi_verify_init("gaspi_gpu_ids"); gaspi_verify_null_ptr(gpu_ids); gaspi_context_t const * const gctx = &glb_gaspi_ctx; if( 0 == gctx->use_gpus ) { gaspi_print_error("GPUs are not found/initialized."); return GASPI_ERROR; } int i; for(i = 0; i < gctx->gpu_count; i++) { gpu_ids[i] = gpus[i].device_id; } return GASPI_SUCCESS; }
static inline int _gaspi_sn_single_command(const gaspi_rank_t rank, const enum gaspi_sn_ops op) { gaspi_cd_header cdh; memset(&cdh, 0, sizeof(gaspi_cd_header)); cdh.op_len = 1; cdh.op = op; cdh.rank = rank; cdh.tnc = glb_gaspi_ctx.tnc; ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[rank], &cdh, sizeof(gaspi_cd_header)); if( ret != sizeof(gaspi_cd_header) ) { gaspi_print_error("Failed to write to %u (%d %p %lu)", rank, glb_gaspi_ctx.sockfd[rank], &cdh, sizeof(gaspi_cd_header)); return -1; } return 0; }
gaspi_return_t gaspi_sn_connect_to_rank(const gaspi_rank_t rank, gaspi_timeout_t timeout_ms) { struct timeb t0, t1; ftime(&t0); #ifdef DEBUG if( strcmp(gaspi_get_hn(rank), "") == 0 ) { gaspi_print_error("Failed to obtain hostname for rank %u", rank); return GASPI_ERROR; } #endif /* TODO: introduce backoff delay? */ while(glb_gaspi_ctx.sockfd[rank] == -1) { glb_gaspi_ctx.sockfd[rank] = gaspi_sn_connect2port(gaspi_get_hn(rank), glb_gaspi_cfg.sn_port + glb_gaspi_ctx.poff[rank], timeout_ms); if( -2 == glb_gaspi_ctx.sockfd[rank] ) return GASPI_ERR_EMFILE; if( -1 == glb_gaspi_ctx.sockfd[rank] ) { ftime(&t1); const unsigned int delta_ms = (t1.time - t0.time) * 1000 + (t1.millitm - t0.millitm); if(delta_ms > timeout_ms) return GASPI_TIMEOUT; } } return GASPI_SUCCESS; }
gaspi_return_t pgaspi_proc_init (const gaspi_timeout_t timeout_ms) { gaspi_return_t eret = GASPI_ERROR; int i; const int num_queues = (int) glb_gaspi_cfg.queue_num; if(lock_gaspi_tout (&glb_gaspi_ctx_lock, timeout_ms)) return GASPI_TIMEOUT; if(glb_gaspi_sn_init == 0) { glb_gaspi_ctx.lockPS.lock = 0; glb_gaspi_ctx.lockPR.lock = 0; for (i = 0; i < num_queues; i++) glb_gaspi_ctx.lockC[i].lock = 0; memset (&glb_gaspi_ctx, 0, sizeof (gaspi_context)); struct utsname mbuf; if (uname (&mbuf) == 0) { snprintf (glb_gaspi_ctx.mtyp, 64, "%s", mbuf.machine); } //timing glb_gaspi_ctx.mhz = gaspi_get_cpufreq (); if (glb_gaspi_ctx.mhz == 0.0f) { gaspi_print_error ("Failed to get CPU frequency"); goto errL; } glb_gaspi_ctx.cycles_to_msecs = 1.0f / (glb_gaspi_ctx.mhz * 1000.0f); //handle environment if(gaspi_handle_env(&glb_gaspi_ctx)) { gaspi_print_error("Failed to handle environment"); eret = GASPI_ERR_ENV; goto errL; } //start sn_backend if(pthread_create(&glb_gaspi_ctx.snt, NULL, gaspi_sn_backend, NULL) != 0) { gaspi_print_error("Failed to create SN thread"); goto errL; } glb_gaspi_sn_init = 1; }//glb_gaspi_sn_init if(glb_gaspi_ctx.procType == MASTER_PROC) { if(glb_gaspi_dev_init == 0) { if(access (glb_gaspi_ctx.mfile, R_OK) == -1) { gaspi_print_error ("Incorrect permissions of machinefile"); eret = GASPI_ERR_ENV; goto errL; } //read hostnames char *line = NULL; size_t len = 0; int read; FILE *fp = fopen (glb_gaspi_ctx.mfile, "r"); if (fp == NULL) { gaspi_print_error("Failed to open machinefile"); eret = GASPI_ERR_ENV; goto errL; } glb_gaspi_ctx.tnc = 0; while ((read = getline (&line, &len, fp)) != -1) { //we assume a single hostname per line if ((read < 2) || (read > 64)) continue; glb_gaspi_ctx.tnc++; if (glb_gaspi_ctx.tnc >= GASPI_MAX_NODES) break; } rewind (fp); free (glb_gaspi_ctx.hn_poff); glb_gaspi_ctx.hn_poff = (char *) calloc (glb_gaspi_ctx.tnc, 65); if(glb_gaspi_ctx.hn_poff == NULL) { gaspi_print_error("Debug: Failed to allocate memory"); goto errL; } glb_gaspi_ctx.poff = glb_gaspi_ctx.hn_poff + glb_gaspi_ctx.tnc * 64; int id = 0; while((read = getline (&line, &len, fp)) != -1) { //we assume a single hostname per line if((read < 2) || (read >= 64)) continue; int inList = 0; for(i = 0; i < id; i++) { //already in list ? //TODO: 64? 63? Magic numbers -> just get cacheline from system or define as such const int hnlen = MAX (strlen (glb_gaspi_ctx.hn_poff + i * 64), MIN (strlen (line) - 1, 63)); if(strncmp (glb_gaspi_ctx.hn_poff + i * 64, line, hnlen) == 0) { inList++; } } glb_gaspi_ctx.poff[id] = inList; strncpy (glb_gaspi_ctx.hn_poff + id * 64, line, MIN (read - 1, 63)); id++; if(id >= GASPI_MAX_NODES) break; } fclose (fp); free (line); //master glb_gaspi_ctx.rank = 0; free(glb_gaspi_ctx.sockfd); glb_gaspi_ctx.sockfd = (int *) malloc (glb_gaspi_ctx.tnc * sizeof (int)); if(glb_gaspi_ctx.sockfd == NULL) { gaspi_print_error("Failed to allocate memory"); eret = GASPI_ERR_MEMALLOC; goto errL; } for(i = 0; i < glb_gaspi_ctx.tnc; i++) glb_gaspi_ctx.sockfd[i] = -1; }//glb_gaspi_dev_init }//MASTER_PROC else if(glb_gaspi_ctx.procType != WORKER_PROC) { gaspi_print_error ("Invalid node type (GASPI_TYPE)"); eret = GASPI_ERR_ENV; goto errL; } if( 0 != gaspi_sn_broadcast_topology(&glb_gaspi_ctx, GASPI_BLOCK) ) { gaspi_print_error("Failed topology broadcast"); eret = GASPI_ERROR; goto errL; } if( (eret = pgaspi_init_core()) != GASPI_SUCCESS ) { goto errL; } /* Unleash SN thread */ __sync_fetch_and_add( &gaspi_master_topo_data, 1); gaspi_init_collectives(); glb_gaspi_init = 1; unlock_gaspi (&glb_gaspi_ctx_lock); if(glb_gaspi_cfg.build_infrastructure) { /* configuration tells us to pre-connect */ if( GASPI_TOPOLOGY_STATIC == glb_gaspi_cfg.build_infrastructure ) { for(i = glb_gaspi_ctx.rank; i >= 0; i--) { if( (eret = pgaspi_connect((gaspi_rank_t) i, timeout_ms)) != GASPI_SUCCESS ) { goto errL; } } } eret = pgaspi_group_all_local_create(timeout_ms); if(eret == GASPI_SUCCESS) { eret = gaspi_barrier(GASPI_GROUP_ALL, timeout_ms); } else { gaspi_print_error("Failed to create GASPI_GROUP_ALL."); } } else /* dont build_infrastructure */ { /* just reserve GASPI_GROUP_ALL */ glb_gaspi_ctx.group_cnt = 1; glb_gaspi_group_ctx[GASPI_GROUP_ALL].id = -2;//disable eret = GASPI_SUCCESS; } #ifdef GPI2_CUDA /* init GPU counts */ glb_gaspi_ctx.use_gpus = 0; glb_gaspi_ctx.gpu_count = 0; #endif return eret; errL: unlock_gaspi (&glb_gaspi_ctx_lock); return eret; }
static gaspi_return_t pgaspi_init_core() { int i; if (glb_gaspi_dev_init) return -1; memset (&glb_gaspi_group_ctx, 0, GASPI_MAX_GROUPS * sizeof (gaspi_group_ctx)); for (i = 0; i < GASPI_MAX_GROUPS; i++) { GASPI_RESET_GROUP(glb_gaspi_group_ctx, i); glb_gaspi_group_ctx[i].gl.lock = 0; glb_gaspi_group_ctx[i].del.lock = 0; } /* change/override num of queues at large scale */ if (glb_gaspi_ctx.tnc > 1000 && glb_gaspi_cfg.queue_num > 1) { if(glb_gaspi_ctx.rank == 0) gaspi_printf("Warning: setting number of queues to 1\n"); glb_gaspi_cfg.queue_num = 1; } /* Create internal memory space */ const unsigned int size = NOTIFY_OFFSET + sizeof(gaspi_atomic_value_t); const long page_size = sysconf (_SC_PAGESIZE); if(page_size < 0) { gaspi_print_error ("Failed to get system's page size."); return GASPI_ERROR; } glb_gaspi_ctx.nsrc.size = size; if(posix_memalign ((void **) &glb_gaspi_ctx.nsrc.ptr, page_size, size)!= 0) { gaspi_print_error ("Memory allocation (posix_memalign) failed"); return GASPI_ERR_MEMALLOC; } memset(glb_gaspi_ctx.nsrc.buf, 0, size); for(i = 0; i < GASPI_MAX_MSEGS; i++) { glb_gaspi_ctx.rrmd[i] = NULL; } glb_gaspi_ctx.ep_conn = (gaspi_endpoint_conn_t *) calloc(glb_gaspi_ctx.tnc, sizeof(gaspi_endpoint_conn_t)); if (glb_gaspi_ctx.ep_conn == NULL) return GASPI_ERR_MEMALLOC; if(pgaspi_dev_init_core(&glb_gaspi_cfg) != 0) return GASPI_ERR_DEVICE; for(i = 0; i < GASPI_MAX_QP + 3; i++) { glb_gaspi_ctx.qp_state_vec[i] = (unsigned char *) calloc (glb_gaspi_ctx.tnc, sizeof(unsigned char)); if(!glb_gaspi_ctx.qp_state_vec[i]) { return GASPI_ERR_MEMALLOC; } } glb_gaspi_dev_init = 1; return GASPI_SUCCESS; }
gaspi_return_t pgaspi_atomic_fetch_add (const gaspi_segment_id_t segment_id, const gaspi_offset_t offset, const gaspi_rank_t rank, const gaspi_atomic_value_t val_add, gaspi_atomic_value_t * const val_old, const gaspi_timeout_t timeout_ms) { #ifdef DEBUG if (glb_gaspi_ctx_ib.rrmd[segment_id] == NULL) { gaspi_printf("Debug: Invalid segment (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } if( rank >= glb_gaspi_ctx.tnc) { gaspi_printf("Debug: Invalid rank (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } if( offset > glb_gaspi_ctx_ib.rrmd[segment_id][rank].size) { gaspi_printf("Debug: Invalid offsets (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } if( val_old == NULL) { gaspi_printf("Debug: Invalid pointer in parameter val_old (gaspi_atomic_fetch_add)\n"); return GASPI_ERROR; } #endif struct ibv_send_wr *bad_wr; struct ibv_sge slist; struct ibv_send_wr swr; int i; if (offset & 0x7) { gaspi_print_error("Unaligned offset"); return GASPI_ERROR; } lock_gaspi_tout (&glb_gaspi_group_ib[0].gl, timeout_ms); slist.addr = (uintptr_t) (glb_gaspi_group_ib[0].buf + NEXT_OFFSET); slist.length = 8; slist.lkey = glb_gaspi_group_ib[0].mr->lkey; swr.wr.atomic.remote_addr = glb_gaspi_ctx_ib.rrmd[segment_id][rank].addr + NOTIFY_OFFSET + offset; swr.wr.atomic.rkey = glb_gaspi_ctx_ib.rrmd[segment_id][rank].rkey; swr.wr.atomic.compare_add = val_add; swr.wr_id = rank; swr.sg_list = &slist; swr.num_sge = 1; swr.opcode = IBV_WR_ATOMIC_FETCH_AND_ADD; swr.send_flags = IBV_SEND_SIGNALED; swr.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpGroups[rank], &swr, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[GASPI_COLL_QP][rank] = 1; unlock_gaspi (&glb_gaspi_group_ib[0].gl); return GASPI_ERROR; } glb_gaspi_ctx_ib.ne_count_grp++; int ne = 0; for (i = 0; i < glb_gaspi_ctx_ib.ne_count_grp; i++) { do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqGroups, 1, glb_gaspi_ctx_ib.wc_grp_send); } while (ne == 0); if ((ne < 0) || (glb_gaspi_ctx_ib.wc_grp_send[i].status != IBV_WC_SUCCESS)) { glb_gaspi_ctx. qp_state_vec[GASPI_COLL_QP][glb_gaspi_ctx_ib.wc_grp_send[i]. wr_id] = 1; unlock_gaspi (&glb_gaspi_group_ib[0].gl); return GASPI_ERROR; } } glb_gaspi_ctx_ib.ne_count_grp = 0; *val_old = *((gaspi_atomic_value_t *) (glb_gaspi_group_ib[0].buf + NEXT_OFFSET)); unlock_gaspi (&glb_gaspi_group_ib[0].gl); return GASPI_SUCCESS; }
gaspi_return_t gaspi_sn_command(const enum gaspi_sn_ops op, const gaspi_rank_t rank, const gaspi_timeout_t timeout_ms, void * arg) { int ret = -1; gaspi_return_t eret = GASPI_ERROR; const int i = (int) rank; eret = gaspi_sn_connect_to_rank(rank, timeout_ms); if(eret != GASPI_SUCCESS) { return eret; } eret = GASPI_ERROR; switch(op) { case GASPI_SN_CONNECT: { ret = _gaspi_sn_connect_command(rank); break; } case GASPI_SN_PROC_PING: case GASPI_SN_PROC_KILL: { ret = _gaspi_sn_single_command(rank, op); break; } case GASPI_SN_SEG_REGISTER: { ret = _gaspi_sn_segment_register_command(rank, arg); break; } case GASPI_SN_GRP_CHECK: { ret = _gaspi_sn_group_check(rank, timeout_ms, arg); break; } case GASPI_SN_GRP_CONNECT: { ret = _gaspi_sn_group_connect(rank, arg); break; } default: { gaspi_print_error("Unknown SN op"); eret = GASPI_ERROR; } }; if( 0 == ret ) eret = GASPI_SUCCESS; if( 1 == ret ) eret = GASPI_TIMEOUT; if(gaspi_sn_close(glb_gaspi_ctx.sockfd[i]) != 0) { gaspi_print_error("Failed to close socket to %d", i); } glb_gaspi_ctx.sockfd[i] = -1; return eret; }
void * gaspi_sn_thread (void *arg) { gaspi_sn_packet snp; struct sockaddr_in cliAddr; fd_set rfds; int i, ret; const int dsock = gaspi_setup_dg_socket (); if (dsock == -1) { gaspi_print_error ("Failed to setup create SN thread socket"); return NULL; } if (__sync_fetch_and_add (&glb_gaspi_sn_init, 1) != 0) gaspi_print_error ("Failed SN init"); int local_fd = gaspi_listen2port (GASPI_INT_PORT + glb_gaspi_ctx.localSocket, GASPI_BLOCK); if (local_fd < 0) { gaspi_print_error ("Failed to initialize SN thread"); return NULL; } while (1) { FD_ZERO (&rfds); FD_SET (dsock, &rfds); const int selret = select (FD_SETSIZE, &rfds, NULL, NULL, NULL); if (selret <= 0) { continue; } if (FD_ISSET (dsock, &rfds)) { const int cliLen = sizeof (cliAddr); const int rlen = recvfrom (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL, (struct sockaddr *) &cliAddr, (socklen_t *) & cliLen); if ((rlen != sizeof (gaspi_sn_packet)) || (snp.magic != GASPI_SNP_MAGIC)) goto checkL; char hn[128]; int hn_found = 0; getnameinfo ((struct sockaddr *) &cliAddr, cliLen, hn, 128, NULL, 0, NI_NOFQDN); const char *fhn = strtok (hn, "."); for (i = 0; i < glb_gaspi_ctx.tnc; i++) { if (strncmp ((glb_gaspi_ctx.hn + i * 64), fhn, 64) == 0) { hn_found = 1; break; } if (strncmp ("localhost", fhn, 64) == 0) { hn_found = 1; break; } } if (!hn_found) { snp.ret = -1; int ret = sendto (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL, (struct sockaddr *) &cliAddr, sizeof (cliAddr)); if (ret != sizeof (gaspi_sn_packet)) { gaspi_print_error ("Hostname not part of machinefile"); } goto checkL; } if (snp.magic == GASPI_SNP_MAGIC) { switch (snp.cmd) { case 1: snp.ret = 0; ret = sendto (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL, (struct sockaddr *) &cliAddr, sizeof (cliAddr)); if (ret != sizeof (gaspi_sn_packet)) { gaspi_print_error ("SN thread failed to send cmd 1"); } return NULL; break; case 2: snp.ret = 0; ret = sendto (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL, (struct sockaddr *) &cliAddr, sizeof (cliAddr)); if (ret != sizeof (gaspi_sn_packet)) { gaspi_print_error ("SN thread failed to send cmd 2"); } break; case 3: snp.ret = 0; ret = sendto (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL, (struct sockaddr *) &cliAddr, sizeof (cliAddr)); if (ret != sizeof (gaspi_sn_packet)) { gaspi_print_error ("SN thread failed to send cmd 3"); } exit (-1); break; case 4: snp.ret = gaspi_seg_reg_sn (snp); ret = sendto (dsock, &snp, sizeof (gaspi_sn_packet), MSG_WAITALL, (struct sockaddr *) &cliAddr, sizeof (cliAddr)); if (ret != sizeof (gaspi_sn_packet)) { gaspi_print_error ("SN thread failed to send cmd 4"); } break; default: break; }; //switch } //if } //if(dsock... checkL: continue; } //while(1) return NULL; }
gaspi_return_t gaspi_gpu_init(void) { gaspi_context_t * const gctx = &glb_gaspi_ctx; int deviceCount; cudaError_t cuda_error_id = cudaGetDeviceCount(&deviceCount); if( cuda_error_id != cudaSuccess ) { gaspi_print_error("Failed cudaGetDeviceCount." ); return GASPI_ERR_DEVICE; } if( deviceCount <= 0 ) { gaspi_print_error("No CUDA capable devices found."); return GASPI_ERR_DEVICE; } const int ib_numa_node = _gaspi_find_dev_numa_node(); int device_id = 0; int gaspi_devices = 0; int direct_devices[GPI2_GPU_MAX_DIRECT_DEVS]; struct cudaDeviceProp deviceProp; for(device_id = 0; device_id < deviceCount; device_id++) { //TODO: possibly add functionality to show properties structure cuda_error_id = cudaGetDeviceProperties(&deviceProp, device_id); if( cuda_error_id != cudaSuccess) { return GASPI_ERR_DEVICE; } if( deviceProp.major >= 3 ) /* TODO: magic number */ { cuda_error_id = cudaSetDevice(device_id); if( cuda_error_id != cudaSuccess ) { return GASPI_ERR_DEVICE; } if( ib_numa_node == _gaspi_find_GPU_numa_node(device_id) ) { if( gaspi_devices < GPI2_GPU_MAX_DIRECT_DEVS - 1 ) { direct_devices[gaspi_devices] = device_id; gaspi_devices++; } } } } if( 0 == gaspi_devices ) { gaspi_print_error("No GPU Direct RDMA capable devices on the correct NUMA-socket were found."); return GASPI_ERROR; } gpus = (gaspi_gpu_t*) malloc(sizeof(gaspi_gpu_t) * gaspi_devices); if( gpus == NULL ) { gaspi_print_error("Failed to allocate memory."); return GASPI_ERR_MEMALLOC; } int i, j, k; for(k = 0 ; k < gaspi_devices; k++) { cuda_error_id = cudaSetDevice(direct_devices[k]); if( cuda_error_id != cudaSuccess ) { return GASPI_ERR_DEVICE; } for(i = 0; i < GASPI_MAX_QP; i++) { cuda_error_id = cudaStreamCreate(&gpus[k].streams[i]); if( cuda_error_id != cudaSuccess ) { return GASPI_ERR_DEVICE; } for(j = 0; j < GASPI_CUDA_EVENTS; j++) { cuda_error_id = cudaEventCreateWithFlags(&gpus[k].events[i][j].event, cudaEventDisableTiming); if( cuda_error_id != cudaSuccess ) { return GASPI_ERR_DEVICE; } } cuda_error_id = cudaStreamCreateWithFlags(&gpus[k].streams[i], cudaStreamNonBlocking); if( cuda_error_id != cudaSuccess ) { return GASPI_ERR_DEVICE; } } gpus[k].device_id = direct_devices[k]; } gctx->gpu_count = gaspi_devices; gctx->use_gpus = 1; return GASPI_SUCCESS; }
static int gaspi_sn_send_topology(gaspi_context *ctx, const int i, const gaspi_timeout_t timeout_ms) { if( (ctx->sockfd[i] = gaspi_sn_connect2port(gaspi_get_hn(i), (glb_gaspi_cfg.sn_port + 64 + ctx->poff[i]), timeout_ms)) < 0) { gaspi_print_error("Failed to connect to %d", i); return -1; } if( 0 != gaspi_sn_set_default_opts(ctx->sockfd[i]) ) { gaspi_print_error("Failed to opts"); close(ctx->sockfd[i]); return -1; } gaspi_cd_header cdh; memset(&cdh, 0, sizeof(gaspi_cd_header)); cdh.op_len = ctx->tnc * 65; //TODO: 65 is magic cdh.op = GASPI_SN_TOPOLOGY; cdh.rank = i; cdh.tnc = ctx->tnc; int retval = 0; size_t len = sizeof(gaspi_cd_header); void * ptr = &cdh; int sockfd = ctx->sockfd[i]; if (sockfd <= 0 ) { gaspi_print_error("Wrong fd %d %d", i, ctx->sockfd[i] ); retval = -1; goto endL; } if ( gaspi_sn_writen( sockfd, ptr, len) != len ) { gaspi_print_error("Failed to send topology header to %d.", i); retval = -1; goto endL; } /* the de facto topology */ ptr = ctx->hn_poff; len = ctx->tnc * 65; if ( gaspi_sn_writen( sockfd, ptr, len) != len ) { gaspi_print_error("Failed to send topology command to %d.", i); retval = -1; goto endL; } endL: ctx->sockfd[i] = -1; if(gaspi_sn_close( sockfd ) != 0) retval = -1; return retval; }
gaspi_return_t pgaspi_dev_atomic_compare_swap (const gaspi_segment_id_t segment_id, const gaspi_offset_t offset, const gaspi_rank_t rank, const gaspi_atomic_value_t comparator, const gaspi_atomic_value_t val_new) { struct ibv_send_wr *bad_wr; struct ibv_sge slist; struct ibv_send_wr swr; int i; slist.addr = (uintptr_t) (glb_gaspi_ctx.nsrc.buf + NOTIFY_OFFSET); slist.length = sizeof(gaspi_atomic_value_t); slist.lkey = ((struct ibv_mr *) glb_gaspi_ctx.nsrc.mr)->lkey; swr.wr.atomic.remote_addr = glb_gaspi_ctx.rrmd[segment_id][rank].addr + NOTIFY_OFFSET + offset; swr.wr.atomic.rkey = glb_gaspi_ctx.rrmd[segment_id][rank].rkey; swr.wr.atomic.compare_add = comparator; swr.wr.atomic.swap = val_new; swr.wr_id = rank; swr.sg_list = &slist; swr.num_sge = 1; swr.opcode = IBV_WR_ATOMIC_CMP_AND_SWP; swr.send_flags = IBV_SEND_SIGNALED; swr.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpGroups[rank], &swr, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[GASPI_COLL_QP][rank] = GASPI_STATE_CORRUPT; return GASPI_ERROR; } glb_gaspi_ctx.ne_count_grp++; int ne = 0; for (i = 0; i < glb_gaspi_ctx.ne_count_grp; i++) { do { ne = ibv_poll_cq (glb_gaspi_ctx_ib.scqGroups, 1, glb_gaspi_ctx_ib.wc_grp_send); } while (ne == 0); if ((ne < 0) || (glb_gaspi_ctx_ib.wc_grp_send[i].status != IBV_WC_SUCCESS)) { glb_gaspi_ctx.qp_state_vec[GASPI_COLL_QP][glb_gaspi_ctx_ib.wc_grp_send[i].wr_id] = GASPI_STATE_CORRUPT; gaspi_print_error("Failed request to %lu : %s", glb_gaspi_ctx_ib.wc_grp_send[i].wr_id, ibv_wc_status_str(glb_gaspi_ctx_ib.wc_grp_send[i].status)); return GASPI_ERROR; } } glb_gaspi_ctx.ne_count_grp = 0; return GASPI_SUCCESS; }
void *gaspi_sn_backend(void *arg) { int esock, lsock, n, i; struct epoll_event ev; struct epoll_event *ret_ev; gaspi_mgmt_header *ev_mgmt, *mgmt; signal(SIGSTKFLT, gaspi_sn_cleanup); signal(SIGPIPE, SIG_IGN); while(gaspi_master_topo_data == 0) gaspi_delay(); lsock = socket(AF_INET, SOCK_STREAM, 0); if(lsock < 0) { gaspi_print_error("Failed to create socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; return NULL; } if( 0 != gaspi_sn_set_default_opts(lsock) ) { gaspi_print_error("Failed to modify socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } signal(SIGPIPE, SIG_IGN); struct sockaddr_in listeningAddress; listeningAddress.sin_family = AF_INET; listeningAddress.sin_port = htons((glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket)); listeningAddress.sin_addr.s_addr = htonl(INADDR_ANY); if(bind(lsock, (struct sockaddr*)(&listeningAddress), sizeof(listeningAddress)) < 0) { gaspi_print_error("Failed to bind socket (port %d)", glb_gaspi_cfg.sn_port + glb_gaspi_ctx.localSocket); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERR_SN_PORT; close(lsock); return NULL; } if ( 0 != gaspi_sn_set_non_blocking(lsock) ) { gaspi_print_error("Failed to set socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } if(listen(lsock, SOMAXCONN) < 0) { gaspi_print_error("Failed to listen on socket"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } esock = epoll_create(GASPI_EPOLL_CREATE); if(esock < 0) { gaspi_print_error("Failed to create IO event facility"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } /* add lsock to epoll instance */ ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) ); if(ev.data.ptr == NULL) { gaspi_print_error("Failed to allocate memory"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } ev_mgmt = ev.data.ptr; ev_mgmt->fd = lsock; ev.events = EPOLLIN; if(epoll_ctl(esock, EPOLL_CTL_ADD, lsock, &ev) < 0) { gaspi_print_error("Failed to modify IO event facility"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } ret_ev = calloc(GASPI_EPOLL_MAX_EVENTS, sizeof(ev)); if(ret_ev == NULL) { gaspi_print_error("Failed to allocate memory"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } /* main events loop */ while(1) { n = epoll_wait(esock,ret_ev, GASPI_EPOLL_MAX_EVENTS, -1); /* loop over all triggered events */ for( i = 0; i < n; i++ ) { mgmt = ret_ev[i].data.ptr; if( (ret_ev[i].events & EPOLLERR) || (ret_ev[i].events & EPOLLHUP) || !((ret_ev[i].events & EPOLLIN) || (ret_ev[i].events & EPOLLOUT )) ) { /* an error has occured on this fd. close it => removed from event list. */ gaspi_print_error( "Erroneous event." ); shutdown(mgmt->fd, SHUT_RDWR); close(mgmt->fd); free(mgmt); continue; } else if(mgmt->fd == lsock) { /* process all new connections */ struct sockaddr in_addr; socklen_t in_len = sizeof(in_addr); int nsock = accept( lsock, &in_addr, &in_len ); if(nsock < 0) { if( (errno == EAGAIN) || (errno == EWOULDBLOCK) ) { /* we have processed incoming connection */ break; } else { /* at least check/fix open files limit */ int errsv = errno; if(errsv == EMFILE) { if( 0 == _gaspi_check_ofile_limit() ) { nsock = accept( lsock, &in_addr, &in_len ); } } /* still erroneous? => makes no sense to continue */ if(nsock < 0) { gaspi_print_error( "Failed to accept connection." ); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(lsock); return NULL; } } } /* new socket */ if( 0 != gaspi_sn_set_non_blocking( nsock ) ) { gaspi_print_error( "Failed to set socket options." ); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(nsock); return NULL; } /* add nsock */ ev.data.ptr = malloc( sizeof(gaspi_mgmt_header) ); if(ev.data.ptr == NULL) { gaspi_print_error("Failed to allocate memory."); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(nsock); return NULL; } ev_mgmt = ev.data.ptr; ev_mgmt->fd = nsock; ev_mgmt->blen = sizeof(gaspi_cd_header); ev_mgmt->bdone = 0; ev_mgmt->op = GASPI_SN_HEADER; ev.events = EPOLLIN ; /* read only */ if(epoll_ctl( esock, EPOLL_CTL_ADD, nsock, &ev ) < 0) { gaspi_print_error("Failed to modify IO event facility"); gaspi_sn_status = GASPI_SN_STATE_ERROR; gaspi_sn_err = GASPI_ERROR; close(nsock); return NULL; } continue; }/* if new connection(s) */ else { /* read or write ops */ int io_err = 0; if( ret_ev[i].events & EPOLLIN ) { while( 1 ) { int rcount = 0; int rsize = mgmt->blen - mgmt->bdone; char *ptr = NULL; if( mgmt->op == GASPI_SN_HEADER ) { /* TODO: is it valid? */ ptr = (char *) &mgmt->cdh; rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize ); } else if( mgmt->op == GASPI_SN_CONNECT ) { while( !glb_gaspi_dev_init ) gaspi_delay(); ptr = pgaspi_dev_get_rrcd(mgmt->cdh.rank); rcount = read( mgmt->fd, ptr + mgmt->bdone, rsize ); } /* errno==EAGAIN => we have read all data */ int errsv = errno; if(rcount < 0) { if (errsv == ECONNRESET || errsv == ENOTCONN) { gaspi_print_error(" Failed to read (op %d)", mgmt->op); } if(errsv != EAGAIN || errsv != EWOULDBLOCK) { gaspi_print_error(" Failed to read (op %d).", mgmt->op); io_err = 1; } break; } else if(rcount == 0) /* the remote side has closed the connection */ { io_err = 1; break; } else { mgmt->bdone += rcount; /* read all data? */ if(mgmt->bdone == mgmt->blen) { /* we got header, what do we have to do ? */ if(mgmt->op == GASPI_SN_HEADER) { if(mgmt->cdh.op == GASPI_SN_PROC_KILL) { _exit(-1); } else if(mgmt->cdh.op == GASPI_SN_CONNECT) { GASPI_SN_RESET_EVENT( mgmt, mgmt->cdh.op_len, mgmt->cdh.op ); } else if(mgmt->cdh.op == GASPI_SN_PROC_PING) { GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else if(mgmt->cdh.op == GASPI_SN_GRP_CHECK) { struct{gaspi_group_t group;int tnc, cs, ret;} gb; memset(&gb, 0, sizeof(gb)); gb.ret = -1; gb.cs = 0; const int group = mgmt->cdh.rank; const int tnc = mgmt->cdh.tnc; lock_gaspi_tout (&glb_gaspi_group_ctx[group].del, GASPI_BLOCK); if(glb_gaspi_group_ctx[group].id >= 0) { if(glb_gaspi_group_ctx[group].tnc == tnc) { int i; gb.ret = 0; gb.tnc = tnc; for(i = 0; i < tnc; i++) { if( NULL != glb_gaspi_group_ctx[group].rank_grp ) gb.cs ^= glb_gaspi_group_ctx[group].rank_grp[i]; } } } unlock_gaspi (&glb_gaspi_group_ctx[group].del); if(gaspi_sn_writen( mgmt->fd, &gb, sizeof(gb) ) < sizeof(gb) ) { gaspi_print_error("Failed response to group check."); io_err = 1; break; } GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else if(mgmt->cdh.op == GASPI_SN_GRP_CONNECT) { while( !glb_gaspi_dev_init || ( glb_gaspi_group_ctx[mgmt->cdh.ret].id == -1) ) gaspi_delay(); /* TODO: check the pointer */ if(gaspi_sn_writen( mgmt->fd, &glb_gaspi_group_ctx[mgmt->cdh.ret].rrcd[glb_gaspi_ctx.rank], sizeof(gaspi_rc_mseg) ) < sizeof(gaspi_rc_mseg) ) { gaspi_print_error("Failed to connect group."); io_err = 1; break; } GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else if(mgmt->cdh.op == GASPI_SN_SEG_REGISTER) { int rret = gaspi_sn_segment_register(mgmt->cdh); /* write back result of registration */ if(gaspi_sn_writen( mgmt->fd, &rret, sizeof(int) ) < sizeof(int) ) { gaspi_print_error("Failed response to segment register."); io_err = 1; break; } GASPI_SN_RESET_EVENT(mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } }/* !header */ else if(mgmt->op == GASPI_SN_CONNECT) { /* TODO: to remove */ while( !glb_gaspi_dev_init ) gaspi_delay(); const size_t len = pgaspi_dev_get_sizeof_rc(); char *ptr = NULL; gaspi_return_t eret = pgaspi_create_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK); if( eret == GASPI_SUCCESS ) { eret = pgaspi_connect_endpoint_to(mgmt->cdh.rank, GASPI_BLOCK); if( eret == GASPI_SUCCESS) { ptr = pgaspi_dev_get_lrcd(mgmt->cdh.rank); } } if( eret != GASPI_SUCCESS ) { /* We set io_err, connection is closed and remote peer reads EOF */ io_err = 1; } else { if( NULL != ptr ) { if( gaspi_sn_writen( mgmt->fd, ptr, len ) < sizeof(len) ) { gaspi_print_error("Failed response to connection request from %u.", mgmt->cdh.rank); io_err = 1; } } } GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } else { gaspi_print_error("Received unknown SN operation"); GASPI_SN_RESET_EVENT( mgmt, sizeof(gaspi_cd_header), GASPI_SN_HEADER ); } break; } /* if all data */ }/* else */ }/* while(1) read */ }/* read in */ if( io_err ) { shutdown(mgmt->fd, SHUT_RDWR); close(mgmt->fd); free(mgmt); } } } /* for each event */ }/* event loop while(1) */ return NULL; }
static inline int _gaspi_sn_group_check(const gaspi_rank_t rank, gaspi_timeout_t timeout_ms, void *arg) { struct group_desc *gb = (struct group_desc *) arg; struct group_desc rem_gb; int i = (int) rank; struct timeb t0, t1; ftime(&t0); gaspi_cd_header cdh; memset(&cdh, 0, sizeof(gaspi_cd_header)); cdh.op_len = sizeof (*gb); cdh.op = GASPI_SN_GRP_CHECK; cdh.rank = gb->group; cdh.tnc = gb->tnc; cdh.ret = gb->cs; do { memset(&rem_gb, 0, sizeof(rem_gb)); ssize_t ret = gaspi_sn_writen(glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header)); if(ret != sizeof(gaspi_cd_header) ) { gaspi_print_error("Failed to write (%d %p %lu)", glb_gaspi_ctx.sockfd[i], &cdh, sizeof(gaspi_cd_header)); return -1; } ssize_t rret = gaspi_sn_readn(glb_gaspi_ctx.sockfd[i], &rem_gb, sizeof(rem_gb)); if( rret != sizeof(rem_gb) ) { gaspi_print_error("Failed to read (%d %p %lu)", glb_gaspi_ctx.sockfd[i],&rem_gb,sizeof(rem_gb)); return -1; } if((rem_gb.ret < 0) || (gb->cs != rem_gb.cs)) { ftime(&t1); const unsigned int delta_ms = (t1.time - t0.time) * 1000 + (t1.millitm - t0.millitm); if(delta_ms > timeout_ms) { return 1; } if(gaspi_thread_sleep(250) < 0) { gaspi_printf("gaspi_thread_sleep Error %d: (%s)\n",ret, (char*)strerror(errno)); } //check if groups match /* if(gb.cs != rem_gb.cs) */ /* { */ /* gaspi_print_error("Mismatch with rank %d: ranks in group dont match\n", */ /* group_to_commit->rank_grp[i]); */ /* eret = GASPI_ERROR; */ /* goto errL; */ /* } */ //usleep(250000); //gaspi_delay(); } else { break; } }while(1); return 0; }