static void on_completion(struct ibv_wc *wc) { struct rdma_cm_id *id = (struct rdma_cm_id *)(uintptr_t)wc->wr_id; struct conn_context *ctx = (struct conn_context *)id->context; if (wc->opcode == IBV_WC_RECV_RDMA_WITH_IMM) { uint32_t size = ntohl(wc->imm_data); if (size == 0) { ctx->msg->id = MSG_DONE; send_message(id); // don't need post_receive() since we're done with this connection } else if (ctx->file_name[0]) { ssize_t ret; printf("received %i bytes.\n", size); ret = write(ctx->fd, ctx->buffer, size); if (ret != size) rc_die("write() failed"); post_receive(id); ctx->msg->id = MSG_READY; send_message(id); } else { memcpy(ctx->file_name, ctx->buffer, (size > MAX_FILE_NAME) ? MAX_FILE_NAME : size); ctx->file_name[size - 1] = '\0'; printf("opening file %s\n", ctx->file_name); ctx->fd = open(ctx->file_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (ctx->fd == -1) rc_die("open() failed"); post_receive(id); ctx->msg->id = MSG_READY; send_message(id); } } }
void client_write_once(uint32_t len){ struct timeval start, end, dt; gettimeofday(&start, NULL); struct connection *conn = (struct connection *)s_ctx->id->context; post_receive(conn); uint32_t size = len|(1UL<<31); write_remote(conn,size); poll_cq(NULL);//wait for write completion poll_cq(NULL);//wait for recv completion gettimeofday(&end, NULL); timersub(&end, &start, &dt); long usec = dt.tv_usec + 1000000 * dt.tv_sec; printf("[Wriet] takes %ld micro_secs.\n", usec); }
static void on_pre_conn(struct rdma_cm_id *id) { struct conn_context *ctx = (struct conn_context *)malloc(sizeof(struct conn_context)); id->context = ctx; ctx->file_name[0] = '\0'; // take this to mean we don't have the file name posix_memalign((void **)&ctx->buffer, sysconf(_SC_PAGESIZE), BUFFER_SIZE); TEST_Z(ctx->buffer_mr = ibv_reg_mr(rc_get_pd(), ctx->buffer, BUFFER_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); posix_memalign((void **)&ctx->msg, sysconf(_SC_PAGESIZE), sizeof(*ctx->msg)); TEST_Z(ctx->msg_mr = ibv_reg_mr(rc_get_pd(), ctx->msg, sizeof(*ctx->msg), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); post_receive(id); }
void maca_isr(void) { // print_packets("maca_isr"); maca_entry++; if (bit_is_set(*MACA_STATUS, maca_status_ovr)) { PRINTF("maca overrun\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_busy)) { PRINTF("maca busy\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_crc)) { PRINTF("maca crc error\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_to)) { PRINTF("maca timeout\n\r"); } if (data_indication_irq()) { *MACA_CLRIRQ = (1 << maca_irq_di); if (dma_rx != &dummy_ack && dma_rx != &dummy_rx) { dma_rx->length = *MACA_GETRXLVL - 2; /* packet length does not include FCS */ dma_rx->lqi = get_lqi(); dma_rx->rx_time = *MACA_TIMESTAMP; /* check if received packet needs an ack */ if(prm_mode == AUTOACK && (dma_rx->data[1] & 0x20)) { /* this wait is necessary to auto-ack */ volatile uint32_t wait_clk; wait_clk = *MACA_CLK + 200; while(*MACA_CLK < wait_clk) { continue; } } if(maca_rx_callback != 0) { maca_rx_callback(dma_rx); } add_to_rx(dma_rx); } dma_rx = 0; } if (filter_failed_irq()) { PRINTF("maca filter failed\n\r"); ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_flt); } if (checksum_failed_irq()) { PRINTF("maca checksum failed\n\r"); ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_crc); } if (softclock_irq()) { *MACA_CLRIRQ = (1 << maca_irq_sftclk); } if (poll_irq()) { *MACA_CLRIRQ = (1 << maca_irq_poll); } if(action_complete_irq()) { /* PRINTF("maca action complete %d\n\r", get_field(*MACA_CONTROL,SEQUENCE)); */ if(last_post == TX_POST) { tx_head->status = get_field(*MACA_STATUS,CODE); #if MACA_INSERT_ACK /* Having sent a message with the acknowledge request flag set the * MACA hardware will only give a tx success indication if the message * was acknowledged by the remote node. We need to detect this * condition and inject an ACK packet into the internal receive stream * as the higher layers are expecting to see an ACK packet.*/ if(tx_head->status == SUCCESS && (tx_head->data[0] & MAC_ACK_REQUEST_FLAG)) { /* Create the dummy ack packet */ static volatile packet_t *ack_p; if(ack_p = get_free_packet()) { ack_p->length = 3; ack_p->offset = 1; ack_p->data[0] = 3; ack_p->data[1] = 0x02; ack_p->data[2] = 0; ack_p->data[3] = *MACA_TXSEQNR; insert_at_rx_head(ack_p); } } #endif if(maca_tx_callback != 0) { maca_tx_callback(tx_head); } dma_tx = 0; free_tx_head(); last_post = NO_POST; } ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_acpl); } decode_status(); if (*MACA_IRQ != 0) { PRINTF("*MACA_IRQ %x\n\r", (unsigned int)*MACA_IRQ); } if(tx_head != 0) { post_tx(); } else { post_receive(); } }
void maca_isr(void) { // print_packets("maca_isr"); maca_entry++; if (bit_is_set(*MACA_STATUS, maca_status_ovr)) { PRINTF("maca overrun\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_busy)) { PRINTF("maca busy\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_crc)) { PRINTF("maca crc error\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_to)) { PRINTF("maca timeout\n\r"); } if (data_indication_irq()) { *MACA_CLRIRQ = (1 << maca_irq_di); dma_rx->length = *MACA_GETRXLVL - 2; /* packet length does not include FCS */ // PRINTF("maca data ind %x %d\n\r", dma_rx, dma_rx->length); if(maca_rx_callback != 0) { maca_rx_callback(dma_rx); } add_to_rx(dma_rx); dma_rx = 0; } if (filter_failed_irq()) { PRINTF("maca filter failed\n\r"); ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_flt); } if (checksum_failed_irq()) { PRINTF("maca checksum failed\n\r"); ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_crc); } if (softclock_irq()) { *MACA_CLRIRQ = (1 << maca_irq_sftclk); } if (poll_irq()) { *MACA_CLRIRQ = (1 << maca_irq_poll); } if(action_complete_irq()) { /* PRINTF("maca action complete %d\n\r", get_field(*MACA_CONTROL,SEQUENCE)); */ if(last_post == TX_POST) { if(maca_tx_callback != 0) { maca_tx_callback(tx_head); } dma_tx = 0; free_tx_head(); last_post = NO_POST; } ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_acpl); } decode_status(); if (*MACA_IRQ != 0) { PRINTF("*MACA_IRQ %x\n\r", *MACA_IRQ); } if(tx_head != 0) { post_tx(); } else { post_receive(); } }
static int connect_qp(struct resources *res) { struct cm_con_data_t local_con_data; struct cm_con_data_t remote_con_data; struct cm_con_data_t tmp_con_data; int rc; /* modify the QP to init */ rc = modify_qp_to_init(res->qp); if (rc) { fprintf(stderr, "change QP state to INIT failed\n"); return rc; } /* let the client post RR to be prepared for incoming messages */ if (config.server_name) { rc = post_receive(res); if (rc) { fprintf(stderr, "failed to post RR\n"); return rc; } } /* exchange using TCP sockets info required to connect QPs */ local_con_data.qp_num = htonl(res->qp->qp_num); local_con_data.lid = htons(res->port_attr.lid); fprintf(stdout, "\nLocal LID = 0x%x\n", res->port_attr.lid); if (sock_sync_data(res->sock, !config.server_name, sizeof(struct cm_con_data_t), &local_con_data, &tmp_con_data) < 0) { fprintf(stderr, "failed to exchange connection data between sides\n"); return 1; } remote_con_data.qp_num = ntohl(tmp_con_data.qp_num); remote_con_data.lid = ntohs(tmp_con_data.lid); fprintf(stdout, "Remote QP number = 0x%x\n", remote_con_data.qp_num); fprintf(stdout, "Remote LID = 0x%x\n", remote_con_data.lid); /* modify the QP to RTR */ rc = modify_qp_to_rtr(res->qp, remote_con_data.qp_num, remote_con_data.lid); if (rc) { fprintf(stderr, "failed to modify QP state from RESET to RTS\n"); return rc; } /* only the daemon post SR, so only he should be in RTS (the client can be moved to RTS as well) */ if (config.server_name) fprintf(stdout, "QP state was change to RTR\n"); else { rc = modify_qp_to_rts(res->qp); if (rc) { fprintf(stderr, "failed to modify QP state from RESET to RTS\n"); return rc; } fprintf(stdout, "QP state was change to RTS\n"); } /* sync to make sure that both sides are in states that they can connect to prevent packet loose */ if (sock_sync_ready(res->sock, !config.server_name)) { fprintf(stderr, "sync after QPs are were moved to RTS\n"); return 1; } return 0; }
void maca_isr(void) { // print_packets("maca_isr"); maca_entry++; if (bit_is_set(*MACA_STATUS, maca_status_ovr)) { PRINTF("maca overrun\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_busy)) { PRINTF("maca busy\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_crc)) { PRINTF("maca crc error\n\r"); } if (bit_is_set(*MACA_STATUS, maca_status_to)) { PRINTF("maca timeout\n\r"); } if (data_indication_irq()) { *MACA_CLRIRQ = (1 << maca_irq_di); dma_rx->length = *MACA_GETRXLVL - 2; /* packet length does not include FCS */ dma_rx->lqi = get_lqi(); dma_rx->rx_time = *MACA_TIMESTAMP; /* check if received packet needs an ack */ if(dma_rx->data[1] & 0x20) { /* this wait is necessary to auto-ack */ volatile uint32_t wait_clk; wait_clk = *MACA_CLK + 200; while(*MACA_CLK < wait_clk) { continue; } } if(maca_rx_callback != 0) { maca_rx_callback(dma_rx); } add_to_rx(dma_rx); dma_rx = 0; } if (filter_failed_irq()) { PRINTF("maca filter failed\n\r"); ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_flt); } if (checksum_failed_irq()) { PRINTF("maca checksum failed\n\r"); ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_crc); } if (softclock_irq()) { *MACA_CLRIRQ = (1 << maca_irq_sftclk); } if (poll_irq()) { *MACA_CLRIRQ = (1 << maca_irq_poll); } if(action_complete_irq()) { /* PRINTF("maca action complete %d\n\r", get_field(*MACA_CONTROL,SEQUENCE)); */ if(last_post == TX_POST) { tx_head->status = get_field(*MACA_STATUS,CODE); if(maca_tx_callback != 0) { maca_tx_callback(tx_head); } dma_tx = 0; free_tx_head(); last_post = NO_POST; } ResumeMACASync(); *MACA_CLRIRQ = (1 << maca_irq_acpl); } decode_status(); if (*MACA_IRQ != 0) { PRINTF("*MACA_IRQ %x\n\r", (unsigned int)*MACA_IRQ); } if(tx_head != 0) { post_tx(); } else { post_receive(); } }
static void* rdma_thread(void *ptr) { int i, j, rc; struct rdma_resource_t *rdma_resource; struct user_param_t *user_param; struct thread_context_t *t_ctx; struct rdma_req_t rdma_req; double lat; t_ctx = (struct thread_context_t*)ptr; rdma_resource = t_ctx->rdma_resource; user_param = &(rdma_resource->user_param); t_ctx->thread_id = pthread_self(); t_ctx->num_of_iter = user_param->num_of_iter; if (create_rdma_buf_pool(t_ctx)) { ERROR("Failed to create MR pool.\n"); return NULL; } { uint32_t qp_type; if (user_param->server_ip != NULL) { qp_type = htonl(user_param->qp_type); } sock_c2d(&(t_ctx->sock), sizeof(qp_type), &qp_type); if (user_param->server_ip == NULL) { user_param->qp_type = ntohl(qp_type); } t_ctx->qp_type = user_param->qp_type; /// redesign } if (create_qp(t_ctx)) { ERROR("Failed to create QP.\n"); return NULL; } { struct thread_sync_info_t { uint32_t qp_num; uint32_t direction; uint32_t opcode; uint32_t qkey; uint32_t psn; uint32_t num_of_iter; uint16_t lid; } ATTR_PACKED; struct thread_sync_info_t local_info; struct thread_sync_info_t remote_info; local_info.lid = htons(rdma_resource->port_attr.lid); local_info.qp_num = htonl(t_ctx->qp->qp_num); local_info.direction = htonl(user_param->direction); local_info.opcode = htonl(user_param->opcode); /// enum ibv_wr_opcode local_info.qkey = htonl(0); local_info.psn = htonl(0); local_info.num_of_iter = htonl(t_ctx->num_of_iter); rc = sock_sync_data(&(t_ctx->sock), sizeof(local_info), &local_info, &remote_info); if (rc) { ERROR("failed to sync data.\n"); return NULL; } t_ctx->remote_lid = ntohs(remote_info.lid); t_ctx->remote_qpn = ntohl(remote_info.qp_num); t_ctx->remote_qkey = ntohl(remote_info.qkey); t_ctx->remote_psn = ntohl(remote_info.psn); if (user_param->server_ip == NULL) { user_param->direction = ntohl(remote_info.direction); user_param->opcode = ntohl(remote_info.opcode); t_ctx->num_of_iter = ntohl(remote_info.num_of_iter); if (user_param->direction == 0 || user_param->direction == 1) { t_ctx->is_requestor = 0; } else if (user_param->direction == 2) { t_ctx->is_requestor = 1; } } else { if (user_param->direction == 0 || user_param->direction == 1) { t_ctx->is_requestor = 1; } else if (user_param->direction == 2) { t_ctx->is_requestor = 0; } } } t_ctx->t_a = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_a == NULL) { ERROR("Failed to allocate memory.\n"); return NULL; } t_ctx->t_b = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_b == NULL) { free(t_ctx->t_a); ERROR("Failed to allocate memory.\n"); return NULL; } t_ctx->t_c = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_c == NULL) { free(t_ctx->t_b); free(t_ctx->t_a); ERROR("Failed to allocate memory.\n"); return NULL; } for (i = 0; i < LAT_LEVEL; i++) { t_ctx->lat[i] = 0; } if (connect_qp(t_ctx)) { ERROR("Failed to connect QP.\n"); return NULL; } for(i = 0; i < user_param->num_of_oust; i++) { rdma_req.rdma_buf = get_rdma_buf(t_ctx); rdma_req.num_of_oust = 1; rdma_req.data_size = DEF_BUF_SIZE; rc = post_receive(t_ctx, &rdma_req); if (rc) { ERROR("Failed to post_receive, i:%d.\n", i); return NULL; } } sock_sync_ready(&t_ctx->sock); for (i = 0; i < t_ctx->num_of_iter; i++) { t_ctx->t_a[i] = get_cycles(); DEBUG("do_rdma_transaction, t_ctx->num_of_iter=%d, i=%d.\n", t_ctx->num_of_iter, i); rc = do_rdma_transaction(t_ctx, i); if (rc) { ERROR("Failed to do_rdma_transaction, i:%d.\n", i); return NULL; } t_ctx->t_c[i] = get_cycles(); if (user_param->direction == 0 || (!t_ctx->is_requestor)) { rdma_req.rdma_buf = get_rdma_buf(t_ctx); if (rdma_req.rdma_buf == NULL) { ERROR("Failed to get RDMA buffer.\n"); return NULL; /// Memory Leak and remove hung RX buffers } rdma_req.num_of_oust = 1; post_receive(t_ctx, &rdma_req); } if (user_param->interval) { usleep(user_param->interval); } } /// Memory leak, release the hung RX rdma_buf; destroy_qp(t_ctx); t_ctx->min_lat = 0x7fffffff; t_ctx->max_lat = 0; for (i = 0; i < t_ctx->num_of_iter; i++) { lat = (t_ctx->t_c[i] - t_ctx->t_a[i]) / rdma_resource->freq_mhz; if (lat < t_ctx->min_lat) { t_ctx->min_lat = lat; t_ctx->min_lat_iter_num = i; } if (lat > t_ctx->max_lat) { t_ctx->max_lat = lat; t_ctx->max_lat_iter_num = i; } for (j = 0; j < LAT_LEVEL; j++) { if (j < 7) { if (lat < (1 + j)) { t_ctx->lat[j]++; break; } } else { if (lat < (1 << (j - 4))) { t_ctx->lat[j]++; break; } } } if (j == LAT_LEVEL) { t_ctx->lat[LAT_LEVEL - 1]++; } } free(t_ctx->t_a); free(t_ctx->t_b); free(t_ctx->t_c); if (!user_param->server_ip) { /// sock_close_multi(&(t_ctx->sock), sock_bind); // how to close sock_fd. free(t_ctx); /// Need to improve. } INFO("RDMA testing thread successfully exited.\n"); return NULL; }