int main() { int ib_port = 1; int gid_idx = 1; int rc; int rank, nprocs; struct ibv_sge sge_list; struct ibv_wc wc; struct ibv_send_wr *sr; unsigned long long start, end; float time; mypmiInit(&rank, &nprocs); fprintf(stderr, "[%d] nprocs(%d)\n", rank, nprocs); rc = resource_create(&res, ib_port, rank); gid_idx = rank; rc = connect_qp(&res, ib_port, gid_idx, rank); create_sge(&res, buf, SIZE, &sge_list); memset(&wc, 0, sizeof(struct ibv_wc)); sr = malloc(sizeof(*sr)); memset(sr, 0, sizeof(*sr)); mypmiBarrier(); fprintf(stderr, "[%d] START\n", rank); memset(buf, 0, SIZE); mypmiBarrier(); if (rank == 0) { struct ibv_mr *mr; for (int size = RDMA_MIN_SIZE; size < RDMA_MAX_SIZE; size += STEP) { char *received = calloc(size, sizeof(char)); mr = ibv_reg_mr(res.pd, received, size, IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); INT_TO_BE(buf, mr->rkey); INT_TO_BE(buf + 4, (((intptr_t)mr->addr) >> 32)); INT_TO_BE(buf + 8, (((intptr_t)mr->addr) & 0xffffffff)); if (post_ibsend(&res, IBV_WR_SEND, &sge_list, sr, 1)) { fprintf(stderr, "[%d] failed to post SR\n", rank); goto end; } while ((rc = poll_cq(&res, &wc, 1, SCQ_FLG)) == 0) { } /* printf("[%d] memory region is sent. key(%x) addr(%lx) rc(%d)\n", rank, mr->rkey, (intptr_t)mr->addr, rc); */ /* wait for done */ post_ibreceive(&res, &sge_list, 1); while (poll_cq(&res, &wc, 1, RCQ_FLG) == 0) { } /* printf("[%d] %d byte has received (opcode=%d)\n", rank, wc.byte_len, wc.opcode); */ /* printf("[%d] Received message: %s\n", rank, buf); */ /* display_received(received, size); */ ibv_dereg_mr(mr); free(received); } } else {
int openib_initialize() { // Use the previously cached info me = l_state.rank; nprocs = l_state.size; assert(l_state.world_comm); // initialize the envs openib_init_envs(); //Initialize the registration cache reg_cache_init(nprocs, 0); init_params(); if(open_hca()) { release_resources(); exit(1); } if(create_cq()) { release_resources(); exit(1); } if(get_lid()) { release_resources(); exit(1); } if(create_qp()) { release_resources(); exit(1); } if(exch_addr()) { release_resources(); exit(1); } if(connect_qp()) { release_resources(); exit(1); } // Create network locks openib_create_locks(); // Allocate buffers for one sided operations openib_alloc_buf(); MPI_Barrier(l_state.world_comm); return 0; }
static void* rdma_thread(void *ptr) { int i, j, rc; struct rdma_resource_t *rdma_resource; struct user_param_t *user_param; struct thread_context_t *t_ctx; struct rdma_req_t rdma_req; double lat; t_ctx = (struct thread_context_t*)ptr; rdma_resource = t_ctx->rdma_resource; user_param = &(rdma_resource->user_param); t_ctx->thread_id = pthread_self(); t_ctx->num_of_iter = user_param->num_of_iter; if (create_rdma_buf_pool(t_ctx)) { ERROR("Failed to create MR pool.\n"); return NULL; } { uint32_t qp_type; if (user_param->server_ip != NULL) { qp_type = htonl(user_param->qp_type); } sock_c2d(&(t_ctx->sock), sizeof(qp_type), &qp_type); if (user_param->server_ip == NULL) { user_param->qp_type = ntohl(qp_type); } t_ctx->qp_type = user_param->qp_type; /// redesign } if (create_qp(t_ctx)) { ERROR("Failed to create QP.\n"); return NULL; } { struct thread_sync_info_t { uint32_t qp_num; uint32_t direction; uint32_t opcode; uint32_t qkey; uint32_t psn; uint32_t num_of_iter; uint16_t lid; } ATTR_PACKED; struct thread_sync_info_t local_info; struct thread_sync_info_t remote_info; local_info.lid = htons(rdma_resource->port_attr.lid); local_info.qp_num = htonl(t_ctx->qp->qp_num); local_info.direction = htonl(user_param->direction); local_info.opcode = htonl(user_param->opcode); /// enum ibv_wr_opcode local_info.qkey = htonl(0); local_info.psn = htonl(0); local_info.num_of_iter = htonl(t_ctx->num_of_iter); rc = sock_sync_data(&(t_ctx->sock), sizeof(local_info), &local_info, &remote_info); if (rc) { ERROR("failed to sync data.\n"); return NULL; } t_ctx->remote_lid = ntohs(remote_info.lid); t_ctx->remote_qpn = ntohl(remote_info.qp_num); t_ctx->remote_qkey = ntohl(remote_info.qkey); t_ctx->remote_psn = ntohl(remote_info.psn); if (user_param->server_ip == NULL) { user_param->direction = ntohl(remote_info.direction); user_param->opcode = ntohl(remote_info.opcode); t_ctx->num_of_iter = ntohl(remote_info.num_of_iter); if (user_param->direction == 0 || user_param->direction == 1) { t_ctx->is_requestor = 0; } else if (user_param->direction == 2) { t_ctx->is_requestor = 1; } } else { if (user_param->direction == 0 || user_param->direction == 1) { t_ctx->is_requestor = 1; } else if (user_param->direction == 2) { t_ctx->is_requestor = 0; } } } t_ctx->t_a = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_a == NULL) { ERROR("Failed to allocate memory.\n"); return NULL; } t_ctx->t_b = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_b == NULL) { free(t_ctx->t_a); ERROR("Failed to allocate memory.\n"); return NULL; } t_ctx->t_c = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_c == NULL) { free(t_ctx->t_b); free(t_ctx->t_a); ERROR("Failed to allocate memory.\n"); return NULL; } for (i = 0; i < LAT_LEVEL; i++) { t_ctx->lat[i] = 0; } if (connect_qp(t_ctx)) { ERROR("Failed to connect QP.\n"); return NULL; } for(i = 0; i < user_param->num_of_oust; i++) { rdma_req.rdma_buf = get_rdma_buf(t_ctx); rdma_req.num_of_oust = 1; rdma_req.data_size = DEF_BUF_SIZE; rc = post_receive(t_ctx, &rdma_req); if (rc) { ERROR("Failed to post_receive, i:%d.\n", i); return NULL; } } sock_sync_ready(&t_ctx->sock); for (i = 0; i < t_ctx->num_of_iter; i++) { t_ctx->t_a[i] = get_cycles(); DEBUG("do_rdma_transaction, t_ctx->num_of_iter=%d, i=%d.\n", t_ctx->num_of_iter, i); rc = do_rdma_transaction(t_ctx, i); if (rc) { ERROR("Failed to do_rdma_transaction, i:%d.\n", i); return NULL; } t_ctx->t_c[i] = get_cycles(); if (user_param->direction == 0 || (!t_ctx->is_requestor)) { rdma_req.rdma_buf = get_rdma_buf(t_ctx); if (rdma_req.rdma_buf == NULL) { ERROR("Failed to get RDMA buffer.\n"); return NULL; /// Memory Leak and remove hung RX buffers } rdma_req.num_of_oust = 1; post_receive(t_ctx, &rdma_req); } if (user_param->interval) { usleep(user_param->interval); } } /// Memory leak, release the hung RX rdma_buf; destroy_qp(t_ctx); t_ctx->min_lat = 0x7fffffff; t_ctx->max_lat = 0; for (i = 0; i < t_ctx->num_of_iter; i++) { lat = (t_ctx->t_c[i] - t_ctx->t_a[i]) / rdma_resource->freq_mhz; if (lat < t_ctx->min_lat) { t_ctx->min_lat = lat; t_ctx->min_lat_iter_num = i; } if (lat > t_ctx->max_lat) { t_ctx->max_lat = lat; t_ctx->max_lat_iter_num = i; } for (j = 0; j < LAT_LEVEL; j++) { if (j < 7) { if (lat < (1 + j)) { t_ctx->lat[j]++; break; } } else { if (lat < (1 << (j - 4))) { t_ctx->lat[j]++; break; } } } if (j == LAT_LEVEL) { t_ctx->lat[LAT_LEVEL - 1]++; } } free(t_ctx->t_a); free(t_ctx->t_b); free(t_ctx->t_c); if (!user_param->server_ip) { /// sock_close_multi(&(t_ctx->sock), sock_bind); // how to close sock_fd. free(t_ctx); /// Need to improve. } INFO("RDMA testing thread successfully exited.\n"); return NULL; }