static NNTI_result_t create_buffer( const NNTI_transport_t *trans_hdl, const NNTI_buf_ops_t op, const uint32_t buffer_size, NNTI_buffer_t **buffer) { NNTI_result_t nnti_rc; char *b; log_debug(bq_debug_level, "enter"); b=(char *)malloc(buffer_size); assert(b); memset(b, 0, buffer_size); *buffer=(NNTI_buffer_t *)malloc(sizeof(NNTI_buffer_t)); assert(*buffer); nnti_rc=NNTI_register_memory( trans_hdl, b, buffer_size, 1, op, NULL, *buffer); if (nnti_rc != NNTI_OK) { log_error(bq_debug_level, "failed registering queue buffer: %d", nnti_rc); } log_debug(bq_debug_level, "exit"); return(nnti_rc); }
void server(void) { NNTI_result_t rc=NNTI_OK; NNTI_status_t queue_status; NNTI_status_t send_status; char *c_ptr; void *packed=NULL; int32_t packed_size=0; int num_elements=nclients+(4*nclients*num_sends); char *queue_buf=(char *)malloc(num_elements*NNTI_REQUEST_BUFFER_SIZE); memset(queue_buf, 0, num_elements*NNTI_REQUEST_BUFFER_SIZE); NNTI_register_memory(&trans_hdl, queue_buf, NNTI_REQUEST_BUFFER_SIZE, num_elements, NNTI_RECV_QUEUE, NULL, &queue_mr); char *send_buf=(char *)malloc(NNTI_REQUEST_BUFFER_SIZE); memset(send_buf, 0, NNTI_REQUEST_BUFFER_SIZE); NNTI_register_memory(&trans_hdl, send_buf, NNTI_REQUEST_BUFFER_SIZE, 1, NNTI_SEND_SRC, NULL, &send_mr); char *server_ack_buf=(char *)malloc(NNTI_REQUEST_BUFFER_SIZE); memset(server_ack_buf, 0, NNTI_REQUEST_BUFFER_SIZE); NNTI_register_memory(&trans_hdl, server_ack_buf, NNTI_REQUEST_BUFFER_SIZE, 1, NNTI_RECV_DST, NULL, &server_ack_mr); char *get_src_buf=(char *)malloc(nclients*get_size); memset(get_src_buf, 0, nclients*get_size); NNTI_register_memory(&trans_hdl, get_src_buf, nclients*get_size, 1, NNTI_GET_SRC, NULL, &get_src_mr); char *put_dst_buf=(char *)malloc(nclients*put_size); memset(put_dst_buf, 0, nclients*put_size); NNTI_register_memory(&trans_hdl, put_dst_buf, nclients*put_size, 1, NNTI_PUT_DST, NULL, &put_dst_mr); /* * Phase 1 - exchange buffers handles */ // wait for the client to send it's recv_mr NNTI_wait(&queue_mr, NNTI_RECV_QUEUE, -1, &queue_status); c_ptr=(char*)queue_status.start+queue_status.offset; buffer_unpack(c_ptr, queue_status.length, &client_ack_mr, (xdrproc_t)&xdr_NNTI_buffer_t); // fprint_NNTI_buffer(logger_get_file(), "client_ack_mr", // "received client ack hdl", &client_ack_mr); // send our server_ack_mr, get_src_mr and put_dst_mr back to the client buffer_pack(&server_ack_mr, &packed, &packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); if (packed_size > NNTI_REQUEST_BUFFER_SIZE) { log_error(nntiperf_debug_level, "buffer_pack() says encoded NNTI_buffer_t is larger than NNTI_REQUEST_BUFFER_SIZE"); MPI_Abort(MPI_COMM_WORLD, -10); } char *ptr=send_buf; memcpy(ptr, &packed_size, sizeof(packed_size)); ptr += sizeof(packed_size); memcpy(ptr, packed, packed_size); ptr += packed_size; buffer_pack_free(packed, packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); buffer_pack(&get_src_mr, &packed, &packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); if (packed_size > NNTI_REQUEST_BUFFER_SIZE) { log_error(nntiperf_debug_level, "buffer_pack() says encoded NNTI_buffer_t is larger than NNTI_REQUEST_BUFFER_SIZE"); MPI_Abort(MPI_COMM_WORLD, -10); } memcpy(ptr, &packed_size, sizeof(packed_size)); ptr += sizeof(packed_size); memcpy(ptr, packed, packed_size); ptr += packed_size; buffer_pack_free(packed, packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); buffer_pack(&put_dst_mr, &packed, &packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); if (packed_size > NNTI_REQUEST_BUFFER_SIZE) { log_error(nntiperf_debug_level, "buffer_pack() says encoded NNTI_buffer_t is larger than NNTI_REQUEST_BUFFER_SIZE"); MPI_Abort(MPI_COMM_WORLD, -10); } memcpy(ptr, &packed_size, sizeof(packed_size)); ptr += sizeof(packed_size); memcpy(ptr, packed, packed_size); ptr += packed_size; buffer_pack_free(packed, packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); rc=NNTI_send(&queue_status.src, &send_mr, &client_ack_mr); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_send() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } rc=NNTI_wait(&send_mr, NNTI_SEND_SRC, 5000, &send_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 2 - client sends sync requests */ for (int i=0;i<nclients*num_sends;i++) { rc=NNTI_wait(&queue_mr, NNTI_RECV_QUEUE, 1000, &queue_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 3 - client sends async requests */ for (int i=0;i<nclients*num_sends;i++) { rc=NNTI_wait(&queue_mr, NNTI_RECV_QUEUE, 1000, &queue_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 4 - client does sync gets */ MPI_Barrier(MPI_COMM_WORLD); /* * Phase 5 - client does async gets */ MPI_Barrier(MPI_COMM_WORLD); /* * Phase 6 - client does sync puts */ MPI_Barrier(MPI_COMM_WORLD); /* * Phase 7 - client does async puts */ MPI_Barrier(MPI_COMM_WORLD); NNTI_unregister_memory(&queue_mr); free(queue_buf); NNTI_unregister_memory(&send_mr); free(send_buf); NNTI_unregister_memory(&server_ack_mr); free(server_ack_buf); NNTI_unregister_memory(&get_src_mr); free(get_src_buf); NNTI_unregister_memory(&put_dst_mr); free(put_dst_buf); return; }
void client(void) { NNTI_result_t rc=NNTI_OK; NNTI_status_t rdma_status; NNTI_status_t send_status; NNTI_status_t client_ack_status; void *packed=NULL; int32_t packed_size=0; double op_timer; // Teuchos::oblackholestream blackhole; // std::ostream &out = ( rank == 1 ? std::cout : blackhole ); std::ostream &out = std::cout; NNTI_connect(&trans_hdl, url, 5000, &server_hdl); char *send_buf=(char *)malloc(NNTI_REQUEST_BUFFER_SIZE); memset(send_buf, 0, NNTI_REQUEST_BUFFER_SIZE); NNTI_register_memory(&trans_hdl, send_buf, NNTI_REQUEST_BUFFER_SIZE, 1, NNTI_SEND_SRC, NULL, &send_mr); char *client_ack_buf=(char *)malloc(NNTI_REQUEST_BUFFER_SIZE); memset(client_ack_buf, 0, NNTI_REQUEST_BUFFER_SIZE); NNTI_register_memory(&trans_hdl, client_ack_buf, NNTI_REQUEST_BUFFER_SIZE, 1, NNTI_RECV_DST, NULL, &client_ack_mr); char *get_dst_buf=(char *)malloc(get_size); memset(get_dst_buf, 0, get_size); NNTI_register_memory(&trans_hdl, get_dst_buf, get_size, 1, NNTI_GET_DST, NULL, &get_dst_mr); char *put_src_buf=(char *)malloc(put_size); memset(put_src_buf, 0, put_size); NNTI_register_memory(&trans_hdl, put_src_buf, put_size, 1, NNTI_PUT_SRC, NULL, &put_src_mr); /* * Phase 1 - exchange buffer handles */ buffer_pack(&client_ack_mr, &packed, &packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); if (packed_size > NNTI_REQUEST_BUFFER_SIZE) { log_error(nntiperf_debug_level, "buffer_pack() says encoded NNTI_buffer_t is larger than NNTI_REQUEST_BUFFER_SIZE"); MPI_Abort(MPI_COMM_WORLD, -10); } // send the server the recv_mr so it can send back it's ack_mr memcpy(send_buf, packed, packed_size); buffer_pack_free(packed, packed_size, (xdrproc_t)&xdr_NNTI_buffer_t); rc=NNTI_send(&server_hdl, &send_mr, NULL); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_send() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } rc=NNTI_wait(&send_mr, NNTI_SEND_SRC, 5000, &send_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } // wait for the server to send back it's recv_mr rc=NNTI_wait(&client_ack_mr, NNTI_RECV_DST, -1, &client_ack_status); char *ptr=(char*)client_ack_status.start+client_ack_status.offset; memcpy(&packed_size, ptr, sizeof(packed_size)); ptr += sizeof(packed_size); memcpy(packed, ptr, packed_size); ptr += packed_size; buffer_unpack(packed, packed_size, &server_ack_mr, (xdrproc_t)&xdr_NNTI_buffer_t); memcpy(&packed_size, ptr, sizeof(packed_size)); ptr += sizeof(packed_size); memcpy(packed, ptr, packed_size); ptr += packed_size; buffer_unpack(packed, packed_size, &get_src_mr, (xdrproc_t)&xdr_NNTI_buffer_t); memcpy(&packed_size, ptr, sizeof(packed_size)); ptr += sizeof(packed_size); memcpy(packed, ptr, packed_size); ptr += packed_size; buffer_unpack(packed, packed_size, &put_dst_mr, (xdrproc_t)&xdr_NNTI_buffer_t); // fprint_NNTI_buffer(logger_get_file(), "server_ack_mr", // "received server ack hdl", &server_ack_mr); // fprint_NNTI_buffer(logger_get_file(), "get_src_mr", // "received get src hdl", &get_src_mr); // fprint_NNTI_buffer(logger_get_file(), "put_dst_mr", // "received put dst hdl", &put_dst_mr); MPI_Barrier(MPI_COMM_WORLD); /* * Phase 2 - test sync request performance */ op_timer=trios_get_time(); for (int i=0;i<num_sends;i++) { rc=NNTI_send(&server_hdl, &send_mr, NULL); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_send() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } rc=NNTI_wait(&send_mr, NNTI_SEND_SRC, 1000, &send_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time()-op_timer; if (num_sends > 0) { out << " sync requests per second == " << num_sends/op_timer << std::endl; } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 3 - test async request performance */ op_timer=trios_get_time(); for (int i=0;i<num_sends;i++) { rc=NNTI_send(&server_hdl, &send_mr, NULL); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_send() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } for (int i=0;i<num_sends;i++) { rc=NNTI_wait(&send_mr, NNTI_SEND_SRC, 1000, &send_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time()-op_timer; if (num_sends > 0) { out << "async requests per second == " << num_sends/op_timer << std::endl; } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 4 - test sync get performance */ // warm up the pipes for (int i=0;i<num_gets;i++) { rc=NNTI_get(&get_src_mr, client_rank*get_size, get_size, &get_dst_mr, 0); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_get() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } rc=NNTI_wait(&get_dst_mr, NNTI_GET_DST, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time(); for (int i=0;i<num_gets;i++) { rc=NNTI_get(&get_src_mr, client_rank*get_size, get_size, &get_dst_mr, 0); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_get() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } rc=NNTI_wait(&get_dst_mr, NNTI_GET_DST, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time()-op_timer; if (num_gets > 0) { out << " sync get (" << get_size << " byte transfer) == " << (double)(num_gets*get_size)/one_mb/op_timer << " MBps" << std::endl; } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 5 - test async get performance */ // warm up the pipes for (int i=0;i<num_gets;i++) { rc=NNTI_get(&get_src_mr, client_rank*get_size, get_size, &get_dst_mr, 0); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_get() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } for (int i=0;i<num_gets;i++) { rc=NNTI_wait(&get_dst_mr, NNTI_GET_DST, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time(); for (int i=0;i<num_gets;i++) { rc=NNTI_get(&get_src_mr, client_rank*get_size, get_size, &get_dst_mr, 0); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_get() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } for (int i=0;i<num_gets;i++) { rc=NNTI_wait(&get_dst_mr, NNTI_GET_DST, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time()-op_timer; if (num_gets > 0) { out << "async get (" << get_size << " byte transfer) == " << (double)(num_gets*get_size)/one_mb/op_timer << " MBps" << std::endl; } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 6 - test sync put performance */ // warm up the pipes for (int i=0;i<num_puts;i++) { rc=NNTI_put(&put_src_mr, 0, put_size, &put_dst_mr, client_rank*put_size); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_put() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } rc=NNTI_wait(&put_src_mr, NNTI_PUT_SRC, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time(); for (int i=0;i<num_puts;i++) { rc=NNTI_put(&put_src_mr, 0, put_size, &put_dst_mr, client_rank*put_size); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_put() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } rc=NNTI_wait(&put_src_mr, NNTI_PUT_SRC, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time()-op_timer; if (num_puts > 0) { out << " sync put (" << put_size << " byte transfer) == " << (double)(num_puts*put_size)/one_mb/op_timer << " MBps" << std::endl; } MPI_Barrier(MPI_COMM_WORLD); /* * Phase 7 - test async put performance */ // warm up the pipes for (int i=0;i<num_puts;i++) { rc=NNTI_put(&put_src_mr, 0, put_size, &put_dst_mr, client_rank*put_size); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_put() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } for (int i=0;i<num_puts;i++) { rc=NNTI_wait(&put_src_mr, NNTI_PUT_SRC, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time(); for (int i=0;i<num_puts;i++) { rc=NNTI_put(&put_src_mr, 0, put_size, &put_dst_mr, client_rank*put_size); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_put() returned an error: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } for (int i=0;i<num_puts;i++) { rc=NNTI_wait(&put_src_mr, NNTI_PUT_SRC, 1000, &rdma_status); if (rc != NNTI_OK) { log_error(nntiperf_debug_level, "NNTI_wait() did not return NNTI_OK: %d", rc); MPI_Abort(MPI_COMM_WORLD, rc); } } op_timer=trios_get_time()-op_timer; if (num_puts > 0) { out << "async put (" << put_size << " byte transfer) == " << (double)(num_puts*put_size)/one_mb/op_timer << " MBps" << std::endl; } MPI_Barrier(MPI_COMM_WORLD); NNTI_unregister_memory(&send_mr); free(send_buf); NNTI_unregister_memory(&client_ack_mr); free(client_ack_buf); NNTI_unregister_memory(&get_dst_mr); free(get_dst_buf); NNTI_unregister_memory(&put_src_mr); free(put_src_buf); return; }