void MV_Generate_Address_Handles() { int i; /*Create address handles */ for (i = 0; i < mvdev.np; i++) { mvdev_connection_t * c = &(mvdev.connections[i]); int j; int total_count = 0; int k; for(k = 0; k < mvparams.max_sl; k++) { for(j = 0; j < mvparams.max_lmc_total; j++) { struct ibv_ah_attr ah_attr; memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.is_global = 0; ah_attr.dlid = mvdev.lids[i] + j; ah_attr.sl = k; ah_attr.src_path_bits = 0; ah_attr.port_num = mvparams.default_port; c->data_ud_ah[total_count] = ibv_create_ah(mvdev.default_hca->pd, &ah_attr); if (!c->data_ud_ah[total_count]) { error_abort_all(IBV_RETURN_ERR, "Failed to create AH"); } total_count++; } } } mvparams.max_ah_total = mvparams.max_lmc_total * mvparams.max_sl; }
int mca_oob_ud_peer_update_with_uri (mca_oob_ud_peer_t *peer, const char *uri) { opal_list_item_t *item; struct ibv_ah_attr ah_attr; mca_oob_ud_device_t *device; uint32_t qp_num; /* NTH: port is 16-bit here because C90 does not support hh in sscanf */ uint16_t lid, port_num; int rc; rc = mca_oob_ud_parse_uri (uri, &qp_num, &lid, &port_num); if (ORTE_SUCCESS != rc) { return rc; } if (peer->peer_lid != lid || peer->peer_port != port_num) { if (NULL != peer->peer_ah) { (void) ibv_destroy_ah (peer->peer_ah); peer->peer_ah = NULL; } } peer->peer_qpn = qp_num; peer->peer_qkey = 0; /* NTH: todo -- add qkey support if needed */ peer->peer_lid = lid; peer->peer_port = port_num; if (NULL == peer->peer_ah) { memset (&ah_attr, 0, sizeof (ah_attr)); ah_attr.dlid = lid; ah_attr.port_num = port_num; for (item = opal_list_get_first (&mca_oob_ud_component.ud_devices); item != opal_list_get_end (&mca_oob_ud_component.ud_devices); item = opal_list_get_next (item)) { device = (mca_oob_ud_device_t *)item; /* try to create an address handle using this device */ peer->peer_ah = ibv_create_ah (device->ib_pd, &ah_attr); if (NULL != peer->peer_ah) { peer->peer_context = (void *) item; break; } } if (NULL == peer->peer_ah) { free (peer); return ORTE_ERROR; } } return ORTE_SUCCESS; }
struct ibv_ah *uct_ib_create_ah(uct_ib_iface_t *iface, uint16_t dlid) { uct_ib_pd_t *ib_pd = ucs_derived_of(iface->super.pd, uct_ib_pd_t); struct ibv_ah_attr ah_attr; memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.port_num = iface->port_num; ah_attr.sl = iface->sl; ah_attr.is_global = 0; ah_attr.dlid = dlid; return ibv_create_ah(ib_pd->pd, &ah_attr); }
struct ibv_ah *uct_ib_create_ah(uct_ib_iface_t *iface, uint16_t dlid) { struct ibv_ah_attr ah_attr; uct_ib_device_t *dev = uct_ib_iface_device(iface); memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.port_num = iface->port_num; ah_attr.sl = iface->sl; ah_attr.is_global = 0; ah_attr.dlid = dlid; return ibv_create_ah(dev->pd, &ah_attr); }
mca_oob_ud_peer_t *mca_oob_ud_get_peer (struct mca_oob_ud_port_t *port, orte_process_name_t *name, uint32_t qpn, uint32_t qkey, uint16_t lid, uint8_t port_num) { struct ibv_ah_attr ah_attr; mca_oob_ud_peer_t *peer; int rc; rc = mca_oob_ud_peer_lookup (name, &peer); if (ORTE_SUCCESS == rc) { OPAL_OUTPUT_VERBOSE((20, mca_oob_base_output, "%s oob:ud:peer_from_msg_hdr using " "cached peer", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return peer; } OPAL_OUTPUT_VERBOSE((10, mca_oob_base_output, "%s oob:ud:peer_from_msg_hdr creating " "peer from return address", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); peer = OBJ_NEW(mca_oob_ud_peer_t); if (NULL == peer) { return NULL; } peer->peer_qpn = qpn; peer->peer_qkey = qkey; peer->peer_name = *name; peer->peer_lid = lid; peer->peer_port = port_num; memset (&ah_attr, 0, sizeof (ah_attr)); ah_attr.dlid = peer->peer_lid; ah_attr.port_num = peer->peer_port; peer->peer_ah = ibv_create_ah (port->device->ib_pd, &ah_attr); if (NULL == peer->peer_ah) { free (peer); return NULL; } peer->peer_context = port->device; OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_lock); opal_hash_table_set_value_uint64(&mca_oob_ud_component.ud_peers, orte_util_hash_name(name), (void *) peer); OPAL_THREAD_UNLOCK(&mca_oob_ud_component.ud_lock); return peer; }
static int resolved_handler(struct cmatest_node *node, struct rdma_cm_event *event) { node->remote_qpn = event->param.ud.qp_num; node->remote_qkey = event->param.ud.qkey; node->ah = ibv_create_ah(node->pd, &event->param.ud.ah_attr); if (!node->ah) { printf("udaddy: failure creating address handle\n"); goto err; } node->connected = 1; test.connects_left--; return 0; err: connect_error(); return -1; }
static struct ibv_ah *create_ah(RdmaBackendDev *backend_dev, struct ibv_pd *pd, uint8_t sgid_idx, union ibv_gid *dgid) { GBytes *ah_key = g_bytes_new(dgid, sizeof(*dgid)); struct ibv_ah *ah = g_hash_table_lookup(ah_hash, ah_key); if (ah) { trace_rdma_create_ah_cache_hit(be64_to_cpu(dgid->global.subnet_prefix), be64_to_cpu(dgid->global.interface_id)); g_bytes_unref(ah_key); } else { struct ibv_ah_attr ah_attr = { .is_global = 1, .port_num = backend_dev->port_num, .grh.hop_limit = 1, }; ah_attr.grh.dgid = *dgid; ah_attr.grh.sgid_index = sgid_idx; ah = ibv_create_ah(pd, &ah_attr); if (ah) { g_hash_table_insert(ah_hash, ah_key, ah); } else { g_bytes_unref(ah_key); rdma_error_report("Failed to create AH for gid <0x%" PRIx64", 0x%"PRIx64">", be64_to_cpu(dgid->global.subnet_prefix), be64_to_cpu(dgid->global.interface_id)); } trace_rdma_create_ah_cache_miss(be64_to_cpu(dgid->global.subnet_prefix), be64_to_cpu(dgid->global.interface_id)); } return ah; } static void destroy_ah_hash_key(gpointer data) { g_bytes_unref(data); }
struct ibv_ah_1_0 *__ibv_create_ah_1_0(struct ibv_pd_1_0 *pd, struct ibv_ah_attr *attr) { struct ibv_ah *real_ah; struct ibv_ah_1_0 *ah; ah = malloc(sizeof *ah); if (!ah) return NULL; real_ah = ibv_create_ah(pd->real_pd, attr); if (!real_ah) { free(ah); return NULL; } ah->context = pd->context; ah->pd = pd; ah->real_ah = real_ah; return ah; }
/* create ud vc */ int mv2_ud_set_vc_info (mv2_ud_vc_info_t *ud_vc_info, mv2_ud_exch_info_t *rem_info, struct ibv_pd *pd, int rdma_default_port) { struct ibv_ah_attr ah_attr; PRINT_DEBUG(DEBUG_UD_verbose>0,"lid:%d\n", rem_info->lid ); memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.is_global = 0; ah_attr.dlid = rem_info->lid; ah_attr.sl = rdma_default_service_level; ah_attr.src_path_bits = 0; ah_attr.port_num = rdma_default_port; ud_vc_info->ah = ibv_create_ah(pd, &ah_attr); if(!(ud_vc_info->ah)){ fprintf(stderr, "Error in creating address handle\n"); return -1; } ud_vc_info->lid = rem_info->lid; ud_vc_info->qpn = rem_info->qpn; return 0; }
struct ibv_ah_1_0 *__ibv_create_ah_1_0(struct ibv_pd_1_0 *pd, struct ibv_ah_attr *attr) { fprintf(stderr, "%s:%s:%d \n", __func__, __FILE__, __LINE__); struct ibv_ah *real_ah; struct ibv_ah_1_0 *ah; ah = malloc(sizeof *ah); if (!ah) return NULL; real_ah = ibv_create_ah(pd->real_pd, attr); if (!real_ah) { free(ah); return NULL; } ah->context = pd->context; ah->pd = pd; ah->real_ah = real_ah; return ah; }
ucs_status_t uct_ib_iface_create_ah(uct_ib_iface_t *iface, const uct_ib_address_t *ib_addr, uint8_t path_bits, struct ibv_ah **ah_p, int *is_global_p) { struct ibv_ah_attr ah_attr; struct ibv_ah *ah; char buf[128]; char *p, *endp; uct_ib_iface_fill_ah_attr(iface, ib_addr, path_bits, &ah_attr); ah = ibv_create_ah(uct_ib_iface_md(iface)->pd, &ah_attr); if (ah == NULL) { p = buf; endp = buf + sizeof(buf); snprintf(p, endp - p, "dlid=%d sl=%d port=%d src_path_bits=%d", ah_attr.dlid, ah_attr.sl, ah_attr.port_num, ah_attr.src_path_bits); p += strlen(p); if (ah_attr.is_global) { snprintf(p, endp - p, " dgid="); p += strlen(p); inet_ntop(AF_INET6, &ah_attr.grh.dgid, p, endp - p); p += strlen(p); snprintf(p, endp - p, " sgid_index=%d", ah_attr.grh.sgid_index); } ucs_error("ibv_create_ah(%s) on "UCT_IB_IFACE_FMT" failed: %m", buf, UCT_IB_IFACE_ARG(iface)); return UCS_ERR_INVALID_ADDR; } *ah_p = ah; *is_global_p = ah_attr.is_global; return UCS_OK; }
static int join_handler(struct cmatest_node *node, struct rdma_ud_param *param) { char buf[40]; inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40); printf("mckey: joined dgid: %s mlid 0x%x sl %d\n", buf, param->ah_attr.dlid, param->ah_attr.sl); node->remote_qpn = param->qp_num; node->remote_qkey = param->qkey; node->ah = ibv_create_ah(node->pd, ¶m->ah_attr); if (!node->ah) { printf("mckey: failure creating address handle\n"); goto err; } node->connected = 1; test.connects_left--; return 0; err: connect_error(); return -1; }
void *run_client(void *arg) { int ud_qp_i = 0; struct thread_params params = *(struct thread_params *) arg; /* * The local HID of a control block should be <= 64 to keep the SHM key low. * But the number of clients over all machines can be larger. */ int clt_gid = params.id; /* Global ID of this client thread */ int clt_local_hid = clt_gid % params.num_threads; int srv_gid = clt_gid % NUM_SERVER_THREADS; int ib_port_index = params.dual_port == 0 ? 0 : srv_gid % 2; struct hrd_ctrl_blk *cb = hrd_ctrl_blk_init(clt_local_hid, ib_port_index, -1, /* port_index, numa_node_id */ 0, 0, /* conn qps, use uc */ NULL, 0, -1, /* prealloc conn buf, conn buf size, key */ 1, BUF_SIZE, -1); /* num_dgram_qps, dgram_buf_size, key */ /* Buffer to receive responses into */ memset((void *) cb->dgram_buf, 0, BUF_SIZE); /* Buffer to send requests from */ uint8_t *req_buf = malloc(params.size); assert(req_buf != 0); memset(req_buf, clt_gid, params.size); printf("main: Client %d waiting for server %d\n", clt_gid, srv_gid); struct hrd_qp_attr *srv_qp[NUM_UD_QPS] = {NULL}; for(ud_qp_i = 0; ud_qp_i < NUM_UD_QPS; ud_qp_i++) { char srv_name[HRD_QP_NAME_SIZE]; sprintf(srv_name, "server-%d-%d", srv_gid, ud_qp_i); while(srv_qp[ud_qp_i] == NULL) { srv_qp[ud_qp_i] = hrd_get_published_qp(srv_name); if(srv_qp[ud_qp_i] == NULL) { usleep(200000); } } } ud_qp_i = 0; printf("main: Client %d found server! Now posting SENDs.\n", clt_gid); /* We need only 1 ah because a client contacts only 1 server */ struct ibv_ah_attr ah_attr = { .is_global = 0, .dlid = srv_qp[0]->lid, /* All srv_qp have same LID */ .sl = 0, .src_path_bits = 0, .port_num = cb->dev_port_id, }; struct ibv_ah *ah = ibv_create_ah(cb->pd, &ah_attr); assert(ah != NULL); struct ibv_send_wr wr[MAX_POSTLIST], *bad_send_wr; struct ibv_wc wc[MAX_POSTLIST]; struct ibv_sge sgl[MAX_POSTLIST]; long long rolling_iter = 0; /* For throughput measurement */ long long nb_tx = 0; int w_i = 0; /* Window index */ int ret; struct timespec start, end; clock_gettime(CLOCK_REALTIME, &start); while(1) { if(rolling_iter >= M_2) { clock_gettime(CLOCK_REALTIME, &end); double seconds = (end.tv_sec - start.tv_sec) + (double) (end.tv_nsec - start.tv_nsec) / 1000000000; printf("main: Client %d: %.2f Mops\n", clt_gid, rolling_iter / seconds); rolling_iter = 0; clock_gettime(CLOCK_REALTIME, &start); } for(w_i = 0; w_i < params.postlist; w_i++) { wr[w_i].wr.ud.ah = ah; wr[w_i].wr.ud.remote_qpn = srv_qp[ud_qp_i]->qpn; wr[w_i].wr.ud.remote_qkey = HRD_DEFAULT_QKEY; wr[w_i].opcode = IBV_WR_SEND_WITH_IMM; wr[w_i].num_sge = 1; wr[w_i].next = (w_i == params.postlist - 1) ? NULL : &wr[w_i + 1]; wr[w_i].imm_data = 3185; wr[w_i].sg_list = &sgl[w_i]; /* * UNSIG_BATCH >= 2 * postlist ensures that we poll for a * completed send() only after we have performed a signaled send(). */ wr[w_i].send_flags = (nb_tx & UNSIG_BATCH_) == 0 ? IBV_SEND_SIGNALED : 0; if((nb_tx & UNSIG_BATCH_) == UNSIG_BATCH_) { hrd_poll_cq(cb->dgram_send_cq[0], 1, wc); } wr[w_i].send_flags |= IBV_SEND_INLINE; sgl[w_i].addr = (uint64_t) (uintptr_t) req_buf; sgl[w_i].length = params.size; rolling_iter++; nb_tx++; } ret = ibv_post_send(cb->dgram_qp[0], &wr[0], &bad_send_wr); CPE(ret, "ibv_post_send error", ret); HRD_MOD_ADD(ud_qp_i, NUM_UD_QPS); } return NULL; }
static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, struct pingpong_dest *dest,struct user_parameters *user_parm) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; if (user_parm->connection_type != UD) { switch (user_parm->mtu) { case 256 : attr.path_mtu = IBV_MTU_256; break; case 512 : attr.path_mtu = IBV_MTU_512; break; case 1024 : attr.path_mtu = IBV_MTU_1024; break; case 2048 : attr.path_mtu = IBV_MTU_2048; break; case 4096 : attr.path_mtu = IBV_MTU_4096; break; } printf("Mtu : %d\n", user_parm->mtu); attr.dest_qp_num = dest->qpn; attr.rq_psn = dest->psn; } if (user_parm->connection_type==RC) { attr.max_dest_rd_atomic = 1; attr.min_rnr_timer = 12; } if (user_parm->gid_index < 0) { attr.ah_attr.is_global = 0; attr.ah_attr.dlid = dest->lid; attr.ah_attr.sl = sl; } else { attr.ah_attr.is_global = 1; attr.ah_attr.grh.dgid = dest->dgid; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.sl = 0; } attr.ah_attr.src_path_bits = 0; attr.ah_attr.port_num = port; if ((user_parm->connection_type==UD) && (user_parm->use_mcg)) { uint8_t mcg_gid[16] = MCG_GID; /* send the message to the mcg of the other side */ mcg_gid[11] = (user_parm->servername) ? 1 : 0; *(uint32_t *)(&mcg_gid[12]) = dest->qpn; attr.ah_attr.dlid = MCG_LID; attr.ah_attr.is_global = 1; attr.ah_attr.grh.sgid_index = 0; memcpy(attr.ah_attr.grh.dgid.raw, mcg_gid, 16); } if (user_parm->connection_type==RC) { if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MIN_RNR_TIMER | IBV_QP_MAX_DEST_RD_ATOMIC)) { fprintf(stderr, "Failed to modify RC QP to RTR\n"); return 1; } attr.timeout = user_parm->qp_timeout; attr.retry_cnt = 7; attr.rnr_retry = 7; } else if (user_parm->connection_type==UC) { if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) { fprintf(stderr, "Failed to modify UC QP to RTR\n"); return 1; } } else { if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE )) { fprintf(stderr, "Failed to modify UC QP to RTR\n"); return 1; } } attr.qp_state = IBV_QPS_RTS; attr.sq_psn = my_psn; if (user_parm->connection_type==RC) { attr.max_rd_atomic = 1; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify RC QP to RTS\n"); return 1; } } else { /*both UC and UD */ if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify UC/UD QP to RTS\n"); return 1; } } if (user_parm->connection_type==UD) { ctx->ah = ibv_create_ah(ctx->pd, &attr.ah_attr); if (!ctx->ah) { fprintf(stderr, "Failed to create AH for UD\n"); return 1; } } /* post recieve max msg size*/ { int i; struct ibv_recv_wr *bad_wr_recv; ctx->recv_list.addr = (uintptr_t) ctx->buf; if (user_parm->connection_type==UD) { ctx->recv_list.length = ctx->size + 40; } else { ctx->recv_list.length = ctx->size; } ctx->recv_list.lkey = ctx->mr->lkey; for (i = 0; i < user_parm->tx_depth / 2; ++i) { if (ibv_post_recv(ctx->qp, &ctx->rwr, &bad_wr_recv)) { fprintf(stderr, "Couldn't post recv: counter=%d\n", i); return 14; } } } return 0; }
int rdma_client_connect(struct pingpong_context *ctx,struct perftest_parameters *user_param) { char *service; int temp,num_of_retry= NUM_OF_RETRIES; struct sockaddr_in sin; struct addrinfo *res; struct rdma_cm_event *event; struct rdma_conn_param conn_param; struct addrinfo hints; memset(&hints, 0, sizeof hints); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; if (check_add_port(&service,user_param->port,user_param->servername,&hints,&res)) { fprintf(stderr, "Problem in resolving basic adress and port\n"); return FAILURE; } sin.sin_addr.s_addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr; sin.sin_family = PF_INET; sin.sin_port = htons((unsigned short)user_param->port); while (1) { if (num_of_retry == 0) { fprintf(stderr, "Received %d times ADDR_ERROR\n",NUM_OF_RETRIES); return FAILURE; } if (rdma_resolve_addr(ctx->cm_id, NULL,(struct sockaddr *)&sin,2000)) { fprintf(stderr, "rdma_resolve_addr failed\n"); return FAILURE; } if (rdma_get_cm_event(ctx->cm_channel,&event)) { fprintf(stderr, "rdma_get_cm_events failed\n"); return FAILURE; } if (event->event == RDMA_CM_EVENT_ADDR_ERROR) { num_of_retry--; rdma_ack_cm_event(event); continue; } if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) { fprintf(stderr, "unexpected CM event %d\n",event->event); rdma_ack_cm_event(event); return FAILURE; } rdma_ack_cm_event(event); break; } if (user_param->tos != DEF_TOS) { if (rdma_set_option(ctx->cm_id,RDMA_OPTION_ID,RDMA_OPTION_ID_TOS,&user_param->tos,sizeof(uint8_t))) { fprintf(stderr, " Set TOS option failed: %d\n",event->event); return FAILURE; } } while (1) { if (num_of_retry <= 0) { fprintf(stderr, "Received %d times ADDR_ERROR - aborting\n",NUM_OF_RETRIES); return FAILURE; } if (rdma_resolve_route(ctx->cm_id,2000)) { fprintf(stderr, "rdma_resolve_route failed\n"); return FAILURE; } if (rdma_get_cm_event(ctx->cm_channel,&event)) { fprintf(stderr, "rdma_get_cm_events failed\n"); return FAILURE; } if (event->event == RDMA_CM_EVENT_ROUTE_ERROR) { num_of_retry--; rdma_ack_cm_event(event); continue; } if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) { fprintf(stderr, "unexpected CM event %d\n",event->event); rdma_ack_cm_event(event); return FAILURE; } rdma_ack_cm_event(event); break; } ctx->context = ctx->cm_id->verbs; temp = user_param->work_rdma_cm; user_param->work_rdma_cm = ON; if (ctx_init(ctx,user_param)) { fprintf(stderr," Unable to create the resources needed by comm struct\n"); return FAILURE; } memset(&conn_param, 0, sizeof conn_param); if (user_param->verb == READ || user_param->verb == ATOMIC) { conn_param.responder_resources = user_param->out_reads; conn_param.initiator_depth = user_param->out_reads; } user_param->work_rdma_cm = temp; conn_param.retry_count = user_param->retry_count; conn_param.rnr_retry_count = 7; if (user_param->work_rdma_cm == OFF) { if (post_one_recv_wqe(ctx)) { fprintf(stderr, "Couldn't post send \n"); return 1; } } if (rdma_connect(ctx->cm_id,&conn_param)) { fprintf(stderr, "Function rdma_connect failed\n"); return FAILURE; } if (rdma_get_cm_event(ctx->cm_channel,&event)) { fprintf(stderr, "rdma_get_cm_events failed\n"); return FAILURE; } if (event->event != RDMA_CM_EVENT_ESTABLISHED) { rdma_ack_cm_event(event); fprintf(stderr, "Unexpected CM event bl blka %d\n", event->event); return FAILURE; } if (user_param->connection_type == UD) { user_param->rem_ud_qpn = event->param.ud.qp_num; user_param->rem_ud_qkey = event->param.ud.qkey; ctx->ah[0] = ibv_create_ah(ctx->pd,&event->param.ud.ah_attr); if (!ctx->ah) { printf(" Unable to create address handler for UD QP\n"); return FAILURE; } if (user_param->tst == LAT || (user_param->tst == BW && user_param->duplex)) { if (send_qp_num_for_ah(ctx,user_param)) { printf(" Unable to send my QP number\n"); return FAILURE; } } } rdma_ack_cm_event(event); return SUCCESS; }
void *run_server(void *arg) { int i; struct thread_params params = *(struct thread_params *) arg; int srv_gid = params.id; /* Global ID of this server thread */ int ib_port_index = params.dual_port == 0 ? 0 : srv_gid % 2; struct hrd_ctrl_blk *cb = hrd_ctrl_blk_init(srv_gid, /* local_hid */ ib_port_index, -1, /* port_index, numa_node_id */ 0, 0, /* conn qps, use uc */ NULL, 0, -1, /* prealloc conn buf, conn buf size, key */ NUM_UD_QPS, BUF_SIZE, -1); /* num_dgram_qps, dgram_buf_size, key */ /* Buffer to receive requests into */ memset((void *) cb->dgram_buf, 0, BUF_SIZE); /* Buffer to send responses from */ uint8_t *resp_buf = malloc(params.size); assert(resp_buf != 0); memset(resp_buf, 1, params.size); /* Create an address handle for each client */ struct ibv_ah *ah[NUM_CLIENTS]; memset(ah, 0, NUM_CLIENTS * sizeof(uintptr_t)); struct hrd_qp_attr *clt_qp[NUM_CLIENTS]; /* * Connect this server to NUM_CLIENTS clients whose global IDs are the * same as this server's modulo 2. This ensures that the connected * clients are on the same port as the server. */ for(i = 0; i < NUM_CLIENTS; i++) { char clt_name[HRD_QP_NAME_SIZE]; /* ah[i] maps to client clt_id */ int clt_id = params.dual_port == 0 ? i : 2 * i + (srv_gid % 2); sprintf(clt_name, "client-%d", clt_id); /* Get the UD queue pair for the ith client */ clt_qp[i] = NULL; while(clt_qp[i] == NULL) { clt_qp[i] = hrd_get_published_qp(clt_name); if(clt_qp[i] == NULL) { usleep(200000); } } printf("main: Server %d got client %d (clt_id = %d) of %d clients.\n", srv_gid, i, clt_id, NUM_CLIENTS); struct ibv_ah_attr ah_attr = { .is_global = 0, .dlid = clt_qp[i]->lid, .sl = 0, .src_path_bits = 0, .port_num = cb->dev_port_id, }; ah[i]= ibv_create_ah(cb->pd, &ah_attr); assert(ah[i] != NULL); } struct ibv_send_wr wr[MAX_POSTLIST], *bad_send_wr; struct ibv_wc wc[MAX_POSTLIST]; struct ibv_sge sgl[MAX_POSTLIST]; long long rolling_iter = 0; /* For throughput measurement */ long long nb_tx[NUM_UD_QPS] = {0}; /* For selective signaling */ int ud_qp_i = 0; /* Round-robin between QPs across postlists */ int w_i = 0; /* Window index */ int ret; struct timespec start, end; clock_gettime(CLOCK_REALTIME, &start); while(1) { if(rolling_iter >= M_4) { clock_gettime(CLOCK_REALTIME, &end); double seconds = (end.tv_sec - start.tv_sec) + (double) (end.tv_nsec - start.tv_nsec) / 1000000000; double my_tput = M_4 / seconds; printf("main: Server %d: %.2f Mops. \n", srv_gid, my_tput); params.tput[srv_gid] = my_tput; if(srv_gid == 0) { double total_tput = 0; for(i = 0; i < params.num_threads; i++) { total_tput += params.tput[i]; } hrd_red_printf("main: Total tput = %.2f Mops.\n", total_tput); } rolling_iter = 0; clock_gettime(CLOCK_REALTIME, &start); } for(w_i = 0; w_i < params.postlist; w_i++) { int cn = nb_tx[ud_qp_i] & NUM_CLIENTS_; wr[w_i].wr.ud.ah = ah[cn]; wr[w_i].wr.ud.remote_qpn = clt_qp[cn]->qpn; wr[w_i].wr.ud.remote_qkey = HRD_DEFAULT_QKEY; wr[w_i].opcode = IBV_WR_SEND; wr[w_i].num_sge = 1; wr[w_i].next = (w_i == params.postlist - 1) ? NULL : &wr[w_i + 1]; wr[w_i].sg_list = &sgl[w_i]; wr[w_i].send_flags = ((nb_tx[ud_qp_i] & UNSIG_BATCH_) == 0) ? IBV_SEND_SIGNALED : 0; if((nb_tx[ud_qp_i] & UNSIG_BATCH_) == 0 && nb_tx[ud_qp_i] > 0) { hrd_poll_cq(cb->dgram_send_cq[ud_qp_i], 1, wc); } wr[w_i].send_flags |= IBV_SEND_INLINE; sgl[w_i].addr = (uint64_t) (uintptr_t) resp_buf; sgl[w_i].length = params.size; nb_tx[ud_qp_i]++; rolling_iter++; } ret = ibv_post_send(cb->dgram_qp[ud_qp_i], &wr[0], &bad_send_wr); CPE(ret, "ibv_post_send error", ret); /* Use a different QP for the next postlist */ ud_qp_i++; if(ud_qp_i == NUM_UD_QPS) { ud_qp_i = 0; } } return NULL; }
static int init_device(struct ibv_context *context_arg, struct mca_btl_openib_sa_qp_cache *cache, uint32_t port_num) { struct ibv_ah_attr aattr; struct ibv_port_attr pattr; int rc; cache->context = ibv_open_device(context_arg->device); if (NULL == cache->context) { BTL_ERROR(("error obtaining device context for %s errno says %s", ibv_get_device_name(context_arg->device), strerror(errno))); return OPAL_ERROR; } cache->device_name = strdup(ibv_get_device_name(cache->context->device)); cache->port_num = port_num; /* init all sl_values to be SL_NOT_PRESENT */ memset(&cache->sl_values, SL_NOT_PRESENT, sizeof(cache->sl_values)); cache->next = sa_qp_cache; sa_qp_cache = cache; /* allocate the protection domain for the device */ cache->pd = ibv_alloc_pd(cache->context); if (NULL == cache->pd) { BTL_ERROR(("error allocating protection domain for %s errno says %s", ibv_get_device_name(context_arg->device), strerror(errno))); return OPAL_ERROR; } /* register memory region */ cache->mr = ibv_reg_mr(cache->pd, cache->send_recv_buffer, sizeof(cache->send_recv_buffer), IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); if (NULL == cache->mr) { BTL_ERROR(("error registering memory region, errno says %s", strerror(errno))); return OPAL_ERROR; } /* init the ud qp */ rc = init_ud_qp(context_arg, cache); if (OPAL_ERROR == rc) { return OPAL_ERROR; } rc = ibv_query_port(cache->context, cache->port_num, &pattr); if (rc) { BTL_ERROR(("error getting port attributes for device %s " "port number %d errno says %s", ibv_get_device_name(context_arg->device), cache->port_num, strerror(errno))); return OPAL_ERROR; } /* create address handle */ memset(&aattr, 0, sizeof(aattr)); aattr.dlid = pattr.sm_lid; aattr.sl = pattr.sm_sl; aattr.port_num = cache->port_num; cache->ah = ibv_create_ah(cache->pd, &aattr); if (NULL == cache->ah) { BTL_ERROR(("error creating address handle: %s", strerror(errno))); return OPAL_ERROR; } memset(&(cache->rwr), 0, sizeof(cache->rwr)); cache->rwr.num_sge = 1; cache->rwr.sg_list = &(cache->rsge); memset(&(cache->rsge), 0, sizeof(cache->rsge)); cache->rsge.addr = (uint64_t)(void *) (cache->send_recv_buffer + MAD_BLOCK_SIZE); cache->rsge.length = MAD_BLOCK_SIZE + 40; cache->rsge.lkey = cache->mr->lkey; return 0; }