void callback_recv(msk_trans_t *trans, void *arg) { struct datamr *datamr = arg; struct locks *locks = trans->private_data; if (!datamr) { ERROR_LOG("no callback_arg?"); return; } if (!locks) { ERROR_LOG("no locks?"); return; } msk_data_t *pdata = datamr->data; int i = (int)pdata->data[0]; // printf("got stuff from %d\n", i); if (pdata->size != 1) { // either we get real data and write it to stdout (do we want to bother writing it?) write(1, (char *)pdata->data, pdata->size); fflush(stdout); msk_post_recv(trans, pdata, datamr->mr, callback_recv, datamr); msk_post_send(trans, locks[i].ackdata, datamr->mr, NULL, NULL); } else { // or we get an ack and just send a signal to handle_thread thread msk_post_recv(trans, pdata, datamr->mr, callback_recv, datamr); pthread_mutex_lock(&locks[i].lock); pthread_cond_signal(&locks[i].cond); pthread_mutex_unlock(&locks[i].lock); } }
static void callback_recv(msk_trans_t *trans, msk_data_t *pdata, void *arg) { struct privatedata *priv = trans->private_data; if (!priv) { ERROR_LOG("no private data?"); return; } INFO_LOG(trans->debug & MSK_DEBUG_RECV, "Received something"); pthread_mutex_lock(&priv->lock); /* check we got what expected */ if (priv->docheck && ( // priv->pcaphdr->len - PACKET_HDR_LEN != min(pdata->size + PACKET_HDR_LEN, PACKET_HARD_MAX_LEN) || memcmp(priv->packet->data, pdata->data, priv->pcaphdr->caplen - PACKET_HDR_LEN) != 0)) { ERROR_LOG("Received packet doesn't match what we expected! Aborting."); priv->rc = EBADMSG; /* only repost buffer if we didn't have an error (or didn't check) */ } else if ((priv->rc = msk_post_recv(trans, pdata, callback_recv, callback_error, NULL))) { ERROR_LOG("Couldn't repost recv buffer, rc %d (%s)", priv->rc, strerror(priv->rc)); } pthread_cond_signal(&priv->cond); pthread_mutex_unlock(&priv->lock); }
/* Equivalent du _9p_socket_thread( */ void * _9p_rdma_thread( void * Arg ) { msk_trans_t * trans = Arg ; _9p_rdma_priv * priv = NULL ; _9p_conn_t * p_9p_conn = NULL ; uint8_t * rdmabuf = NULL ; struct ibv_mr * mr = NULL ; msk_data_t * rdata = NULL ; _9p_datalock_t * datalock = NULL ; unsigned int i = 0 ; int rc = 0 ; if( ( priv = gsh_malloc( sizeof(*priv) ) ) == NULL ) { LogFatal( COMPONENT_9P, "9P/RDMA: trans handler could not malloc private structure" ) ; goto error ; } memset(priv, 0, sizeof(*priv)); trans->private_data = priv; if( ( p_9p_conn = gsh_malloc( sizeof(*p_9p_conn) ) ) == NULL ) { LogFatal( COMPONENT_9P, "9P/RDMA: trans handler could not malloc _9p_conn" ) ; goto error ; } memset(p_9p_conn, 0, sizeof(*p_9p_conn)); priv->pconn = p_9p_conn; for (i = 0; i < FLUSH_BUCKETS; i++) { pthread_mutex_init(&p_9p_conn->flush_buckets[i].lock, NULL); glist_init(&p_9p_conn->flush_buckets[i].list); } p_9p_conn->sequence = 0 ; atomic_store_uint32_t(&p_9p_conn->refcount, 0) ; p_9p_conn->trans_type = _9P_RDMA ; p_9p_conn->trans_data.rdma_trans = trans ; memcpy(&p_9p_conn->addrpeer, msk_get_dst_addr(trans), sizeof(p_9p_conn->addrpeer)); /* Init the fids pointers array */ memset( &p_9p_conn->fids, 0, _9P_FID_PER_CONN* sizeof( _9p_fid_t * ) ) ; /* Set initial msize. Client may request a lower value during TVERSION */ p_9p_conn->msize = nfs_param._9p_param._9p_rdma_msize ; if( gettimeofday( &p_9p_conn->birth, NULL ) == -1 ) LogMajor( COMPONENT_9P, "Cannot get connection's time of birth" ) ; /* Alloc rdmabuf */ if( ( rdmabuf = gsh_malloc( (_9P_RDMA_BUFF_NUM)*_9P_RDMA_CHUNK_SIZE)) == NULL ) { LogFatal( COMPONENT_9P, "9P/RDMA: trans handler could not malloc rdmabuf" ) ; goto error ; } memset( rdmabuf, 0, (_9P_RDMA_BUFF_NUM)*_9P_RDMA_CHUNK_SIZE); priv->rdmabuf = rdmabuf; /* Register rdmabuf */ if( ( mr = msk_reg_mr( trans, rdmabuf, (_9P_RDMA_BUFF_NUM)*_9P_RDMA_CHUNK_SIZE, IBV_ACCESS_LOCAL_WRITE)) == NULL ) { LogFatal( COMPONENT_9P, "9P/RDMA: trans handler could not register rdmabuf" ) ; goto error ; } /* Get prepared to recv data */ if( ( rdata = gsh_malloc( _9P_RDMA_BUFF_NUM * sizeof(*rdata) ) ) == NULL ) { LogFatal( COMPONENT_9P, "9P/RDMA: trans handler could not malloc rdata" ) ; goto error ; } memset( rdata, 0, (_9P_RDMA_BUFF_NUM * sizeof(*rdata)) ) ; priv->rdata = rdata; if( (datalock = gsh_malloc(_9P_RDMA_BUFF_NUM*sizeof(*datalock))) == NULL ) { LogFatal( COMPONENT_9P, "9P/RDMA: trans handler could not malloc datalock" ) ; goto error ; } memset( datalock, 0, (_9P_RDMA_BUFF_NUM * sizeof(*datalock)) ) ; priv->datalock = datalock; for( i=0; i < _9P_RDMA_BUFF_NUM; i++) { rdata[i].data=rdmabuf+i*_9P_RDMA_CHUNK_SIZE ; rdata[i].max_size=_9P_RDMA_CHUNK_SIZE ; rdata[i].mr = mr; datalock[i].data = &rdata[i]; pthread_mutex_init(&datalock[i].lock, NULL); if( i < _9P_RDMA_OUT ) datalock[i].sender = &datalock[i+_9P_RDMA_OUT] ; else datalock[i].sender = NULL ; } /* for (unsigned int i=0; i < _9P_RDMA_BUFF_NUM; i++) */ for( i=0; i < _9P_RDMA_OUT; i++) { if( ( rc = msk_post_recv( trans, &rdata[i], _9p_rdma_callback_recv, _9p_rdma_callback_recv_err, &(datalock[i]) ) ) != 0 ) { LogEvent( COMPONENT_9P, "9P/RDMA: trans handler could recv first byte of datalock[%u], rc=%u", i, rc ) ; goto error ; } } /* Finalize accept */ if( ( rc = msk_finalize_accept( trans ) ) != 0 ) { LogMajor( COMPONENT_9P, "9P/RDMA: trans handler could not finalize accept, rc=%u", rc ) ; goto error ; } pthread_exit( NULL ) ; error: _9p_rdma_cleanup_conn_thread( trans ) ; pthread_exit( NULL ) ; } /* _9p_rdma_handle_trans */
int main(int argc, char **argv) { msk_trans_t *trans; uint8_t *rdmabuf; struct ibv_mr *mr; msk_data_t *wdata; msk_trans_attr_t attr; memset(&attr, 0, sizeof(msk_trans_attr_t)); attr.server = -1; // put an incorrect value to check if we're either client or server // sane values for optional or non-configurable elements attr.rq_depth = RECV_NUM+2; attr.port = "1235"; // attr.disconnect_callback = callback_disconnect; // argument handling static struct option long_options[] = { { "client", required_argument, 0, 'c' }, { "server", required_argument, 0, 's' }, { "port", required_argument, 0, 'p' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int op; while ((op = getopt_long(argc, argv, "@hvsS:c:p:", long_options, &option_index)) != -1) { switch(op) { case '@': printf("%s compiled on %s at %s\n", argv[0], __DATE__, __TIME__); printf("Release = %s\n", VERSION); printf("Release comment = %s\n", VERSION_COMMENT); printf("Git HEAD = %s\n", _GIT_HEAD_COMMIT ) ; printf("Git Describe = %s\n", _GIT_DESCRIBE ) ; exit(0); case 'h': print_help(argv); exit(0); case 'v': attr.debug = attr.debug * 2 + 1; break; case 'c': attr.server = 0; attr.node = optarg; break; case 's': attr.server = 10; attr.node = "::"; break; case 'S': attr.server = 10; attr.node = optarg; break; case 'p': attr.port = optarg; break; default: ERROR_LOG("Failed to parse arguments"); print_help(argv); exit(EINVAL); } } if (attr.server == -1) { ERROR_LOG("must be either a client or a server!"); print_help(argv); exit(EINVAL); } TEST_Z(msk_init(&trans, &attr)); if (!trans) exit(-1); if (trans->server) { TEST_Z(msk_bind_server(trans)); TEST_NZ(trans = msk_accept_one(trans)); } else { //client TEST_Z(msk_connect(trans)); TEST_NZ(trans); } TEST_NZ(rdmabuf = malloc((RECV_NUM+2)*CHUNK_SIZE*sizeof(char))); memset(rdmabuf, 0, (RECV_NUM+2)*CHUNK_SIZE*sizeof(char)); TEST_NZ(mr = msk_reg_mr(trans, rdmabuf, (RECV_NUM+2)*CHUNK_SIZE*sizeof(char), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ)); msk_data_t *ackdata; TEST_NZ(ackdata = malloc(sizeof(msk_data_t))); ackdata->data = rdmabuf+(RECV_NUM+1)*CHUNK_SIZE*sizeof(char); ackdata->max_size = CHUNK_SIZE*sizeof(char); ackdata->size = 1; ackdata->data[0] = 0; pthread_mutex_t lock; pthread_cond_t cond; pthread_mutex_init(&lock, NULL); pthread_cond_init(&cond, NULL); msk_data_t *rdata; struct datalock datalock; TEST_NZ(rdata = malloc(sizeof(msk_data_t))); rdata->data=rdmabuf; //+i*CHUNK_SIZE*sizeof(char); rdata->max_size=CHUNK_SIZE*sizeof(char); rdata->mr = mr; datalock.ackdata = ackdata; datalock.lock = &lock; datalock.cond = &cond; pthread_mutex_lock(&lock); TEST_Z(msk_post_recv(trans, rdata, callback_recv, NULL, &datalock)); if (trans->server) { TEST_Z(msk_finalize_accept(trans)); } else { TEST_Z(msk_finalize_connect(trans)); } TEST_NZ(wdata = malloc(sizeof(msk_data_t))); wdata->data = rdmabuf+RECV_NUM*CHUNK_SIZE*sizeof(char); wdata->mr = mr; wdata->max_size = CHUNK_SIZE*sizeof(char); msk_rloc_t *rloc; if (trans->server) { printf("wait for rloc\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); // receive rloc TEST_NZ(rloc = malloc(sizeof(msk_rloc_t))); memcpy(rloc, rdata->data, sizeof(msk_rloc_t)); printf("got rloc! key: %u, addr: %"PRIu64", size: %d\n", rloc->rkey, rloc->raddr, rloc->size); memcpy(wdata->data, "roses are red", 14); wdata->size = 14; TEST_Z(msk_post_write(trans, wdata, rloc, callback_recv, NULL, &datalock)); printf("waiting for write to finish\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); // write done TEST_Z(msk_post_recv(trans, rdata, callback_recv, NULL, &datalock)); TEST_Z(msk_post_send(trans, wdata, NULL, NULL, NULL)); // ack to say we're done printf("waiting for something to be ready to read\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); wdata->size=17; TEST_Z(msk_post_read(trans, wdata, rloc, callback_recv, NULL, &datalock)); printf("wait for read to finish\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); printf("%s\n", wdata->data); TEST_Z(msk_wait_send(trans, wdata)); // ack - other can quit } else { TEST_NZ(rloc = msk_make_rloc(mr, (uint64_t)(uintptr_t)ackdata->data, ackdata->max_size)); memcpy(wdata->data, rloc, sizeof(msk_rloc_t)); wdata->size = sizeof(msk_rloc_t); TEST_Z(msk_post_send(trans, wdata, NULL, NULL, NULL)); printf("sent rloc, waiting for server to say they're done\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); // receive server ack (they wrote stuff) printf("%s\n", ackdata->data); TEST_Z(msk_post_recv(trans, rdata, callback_recv, NULL, &datalock)); memcpy(ackdata->data, "violets are blue", 17); TEST_Z(msk_post_send(trans, wdata, NULL, NULL, NULL)); // say we've got something to read printf("waiting for server to be done\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); } pthread_mutex_unlock(&lock); msk_dereg_mr(mr); msk_destroy_trans(&trans); msk_destroy_trans(&trans); // check that double_destroy works free(rloc); free(ackdata); free(rdata); free(wdata); free(rdmabuf); return 0; }
void* handle_trans(void *arg) { msk_trans_t *trans = arg; uint8_t *rdmabuf; struct ibv_mr *mr; msk_data_t *ackdata; msk_data_t **rdata; struct datamr *datamr; struct locks *locks; int i; // malloc memory zone that will contain all buffer data (for mr), and register it for our trans #define RDMABUF_SIZE (RECV_NUM+NUM_THREADS+2)*CHUNK_SIZE TEST_NZ(rdmabuf = malloc(RDMABUF_SIZE)); memset(rdmabuf, 0, RDMABUF_SIZE); TEST_NZ(mr = msk_reg_mr(trans, rdmabuf, RDMABUF_SIZE, IBV_ACCESS_LOCAL_WRITE)); // malloc mooshika's data structs (i.e. max_size+size+pointer to actual data), for ack buffer TEST_NZ(ackdata = malloc(NUM_THREADS*sizeof(msk_data_t))); // malloc receive structs as well as a custom callback argument, and post it for future receive TEST_NZ(rdata = malloc(RECV_NUM*sizeof(msk_data_t*))); TEST_NZ(datamr = malloc(NUM_THREADS*RECV_NUM*sizeof(struct datamr))); TEST_NZ(locks = malloc(NUM_THREADS*sizeof(struct locks))); for (i=0; i < RECV_NUM; i++) { TEST_NZ(rdata[i] = malloc(sizeof(msk_data_t))); rdata[i]->data=rdmabuf+i*CHUNK_SIZE; rdata[i]->max_size=CHUNK_SIZE; datamr[i].data = rdata[i]; datamr[i].mr = mr; TEST_Z(msk_post_recv(trans, rdata[i], mr, callback_recv, &(datamr[i]))); } for (i=0; i < NUM_THREADS; i++) { pthread_mutex_init(&locks[i].lock, NULL); pthread_cond_init(&locks[i].cond, NULL); locks[i].ackdata = &ackdata[i]; locks[i].ackdata->data = rdmabuf+RECV_NUM*CHUNK_SIZE+i; locks[i].ackdata->max_size = 1; locks[i].ackdata->size = 1; locks[i].ackdata->data[0] = i; } trans->private_data = locks; // receive buffers are posted, we can finalize the connection if (trans->server) { TEST_Z(msk_finalize_accept(trans)); } else { TEST_Z(msk_finalize_connect(trans)); } void *sendstuff(void *arg) { int num = *(int*)arg; msk_data_t *wdata; // printf("starting thread %d\n", num); // malloc write (send) structs to post data read from stdin TEST_NZ(wdata = malloc(sizeof(msk_data_t))); wdata->data = rdmabuf+(RECV_NUM+1+num)*CHUNK_SIZE; wdata->max_size = CHUNK_SIZE; wdata->size = CHUNK_SIZE; wdata->data[0] = num; pthread_mutex_lock(&locks[num].lock); while (trans->state == MSK_CONNECTED) { TEST_Z(msk_post_send(trans, wdata, mr, NULL, NULL)); pthread_cond_wait(&locks[num].cond, &locks[num].lock); // printf("got cond on thread %d\n", num); } pthread_mutex_unlock(&locks[num].lock); free(wdata); pthread_exit(NULL); }
int main(int argc, char **argv) { msk_trans_t *trans; uint8_t *rdmabuf; struct ibv_mr *mr; msk_data_t *wdata; msk_trans_attr_t attr; memset(&attr, 0, sizeof(msk_trans_attr_t)); attr.server = -1; // put an incorrect value to check if we're either client or server // sane values for optional or non-configurable elements attr.rq_depth = 1; attr.sq_depth = RECV_NUM+2; // RECV_NUM for read requets, one for the final wait_send, one to have a free one (post in a callback) attr.addr.sa_in.sin_family = AF_INET; attr.addr.sa_in.sin_port = htons(1235); // attr.disconnect_callback = callback_disconnect; // argument handling static struct option long_options[] = { { "client", required_argument, 0, 'c' }, { "server", required_argument, 0, 's' }, { "port", required_argument, 0, 'p' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int op; while ((op = getopt_long(argc, argv, "@hvsS:c:p:", long_options, &option_index)) != -1) { switch(op) { case '@': printf("%s compiled on %s at %s\n", argv[0], __DATE__, __TIME__); printf("Release = %s\n", VERSION); printf("Release comment = %s\n", VERSION_COMMENT); printf("Git HEAD = %s\n", _GIT_HEAD_COMMIT ) ; printf("Git Describe = %s\n", _GIT_DESCRIBE ) ; exit(0); case 'h': print_help(argv); exit(0); case 'v': ERROR_LOG("verbose switch not ready just yet, come back later!\n"); break; case 'c': attr.server = 0; inet_pton(AF_INET, optarg, &attr.addr.sa_in.sin_addr); break; case 's': attr.server = 10; inet_pton(AF_INET, "0.0.0.0", &attr.addr.sa_in.sin_addr); break; case 'S': attr.server = 10; inet_pton(AF_INET, optarg, &attr.addr.sa_in.sin_addr); break; case 'p': ((struct sockaddr_in*) &attr.addr)->sin_port = htons(atoi(optarg)); break; default: ERROR_LOG("Failed to parse arguments"); print_help(argv); exit(EINVAL); } } if (attr.server == -1) { ERROR_LOG("must be either a client or a server!"); print_help(argv); exit(EINVAL); } TEST_Z(msk_init(&trans, &attr)); if (!trans) exit(-1); if (trans->server) { TEST_Z(msk_bind_server(trans)); trans = msk_accept_one(trans); } else { //client TEST_Z(msk_connect(trans)); } TEST_NZ(rdmabuf = malloc((RECV_NUM+2)*CHUNK_SIZE*sizeof(char))); memset(rdmabuf, 0, (RECV_NUM+2)*CHUNK_SIZE*sizeof(char)); TEST_NZ(mr = msk_reg_mr(trans, rdmabuf, (RECV_NUM+2)*CHUNK_SIZE*sizeof(char), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ)); msk_data_t *ackdata; TEST_NZ(ackdata = malloc(sizeof(msk_data_t))); ackdata->data = rdmabuf+(RECV_NUM+1)*CHUNK_SIZE*sizeof(char); ackdata->max_size = CHUNK_SIZE*sizeof(char); ackdata->size = 1; ackdata->data[0] = 0; pthread_mutex_t lock; pthread_cond_t cond; pthread_mutex_init(&lock, NULL); pthread_cond_init(&cond, NULL); msk_data_t **rdata; struct datamr *datamr; int i; TEST_NZ(rdata = malloc(RECV_NUM*sizeof(msk_data_t*))); TEST_NZ(datamr = malloc(RECV_NUM*sizeof(struct datamr))); for (i=0; i < RECV_NUM; i++) { TEST_NZ(rdata[i] = malloc(sizeof(msk_data_t))); rdata[i]->data=rdmabuf+i*CHUNK_SIZE*sizeof(char); rdata[i]->max_size=CHUNK_SIZE*sizeof(char); datamr[i].data = rdata[i]; datamr[i].mr = mr; datamr[i].lock = &lock; datamr[i].cond = &cond; } pthread_mutex_lock(&lock); TEST_Z(msk_post_recv(trans, rdata[0], mr, callback_recv, &(datamr[0]))); // post only one, others will be used for reads if (trans->server) { TEST_Z(msk_finalize_accept(trans)); } else { TEST_Z(msk_finalize_connect(trans)); } TEST_NZ(wdata = malloc(sizeof(msk_data_t))); wdata->data = rdmabuf+RECV_NUM*CHUNK_SIZE*sizeof(char); wdata->max_size = CHUNK_SIZE*sizeof(char); msk_rloc_t *rloc; if (trans->server) { printf("wait for rloc\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); // receive rloc TEST_NZ(rloc = malloc(sizeof(msk_rloc_t))); memcpy(rloc, (rdata[0])->data, sizeof(msk_rloc_t)); printf("got rloc! key: %u, addr: %lu, size: %d\n", rloc->rkey, rloc->raddr, rloc->size); volatile int count = 0; for (i=0; i < RECV_NUM; i++) { rdata[i]->size=CHUNK_SIZE*sizeof(char); datamr[i].rloc = rloc; datamr[i].count = &count; TEST_Z(msk_post_RW(trans, rdata[i], mr, rloc, callback_read, &(datamr[i]))); } while (count < SEND_COUNT) { pthread_cond_wait(&cond, &lock); if (count%100 == 0) printf("count: %d\n", count); } printf("count: %d\n", count); wdata->size = 1; TEST_Z(msk_post_send(trans, wdata, mr, NULL, NULL)); // ack - other can quit usleep(10000); //FIXME: wait till last work request is done. cannot use wait_send because the other will get the send before we get our ack, so they might disconnect and our threads might fail before we get our WC that would unstuck us. } else { rloc = msk_make_rloc(mr, (uint64_t)ackdata->data, ackdata->max_size); memcpy(wdata->data, rloc, sizeof(msk_rloc_t)); wdata->size = sizeof(msk_rloc_t); msk_post_send(trans, wdata, mr, NULL, NULL); printf("sent rloc, waiting for server to say they're done\n"); TEST_Z(pthread_cond_wait(&cond, &lock)); // receive server ack (they wrote stuff) } pthread_mutex_unlock(&lock); msk_destroy_trans(&trans); return 0; }
int main(int argc, char **argv) { msk_trans_t *trans, *listen_trans; msk_trans_attr_t trans_attr; char errbuf[PCAP_ERRBUF_SIZE]; char *pcap_file; pcap_t *pcap; size_t block_size = 0; uint32_t recv_num = 0; int banner = 0; int i, rc; uint8_t *rdmabuf; struct ibv_mr *mr; msk_data_t *data, *wdata; struct privatedata priv; // argument handling int option_index = 0; int op, last_op; char *tmp_s; static struct option long_options[] = { { "client", required_argument, 0, 'c' }, { "server", required_argument, 0, 's' }, { "banner", no_argument, 0, 'B' }, { "help", no_argument, 0, 'h' }, { "verbose", no_argument, 0, 'v' }, { "quiet", no_argument, 0, 'q' }, { "block-size", required_argument, 0, 'b' }, { "file", required_argument, 0, 'f' }, { "recv-num", required_argument, 0, 'r' }, { "no-check", no_argument, 0, 'n' }, { 0, 0, 0, 0 } }; memset(&trans_attr, 0, sizeof(msk_trans_attr_t)); memset(&priv, 0, sizeof(struct privatedata)); priv.docheck = 1; trans_attr.server = -1; // put an incorrect value to check if we're either client or server // sane values for optional or non-configurable elements trans_attr.debug = 1; trans_attr.max_recv_sge = 1; trans_attr.disconnect_callback = callback_disconnect; trans_attr.worker_count = -1; pcap_file = "pcap.out"; last_op = 0; while ((op = getopt_long(argc, argv, "-@hvqc:s:S:r:b:r:t:f:Bn", long_options, &option_index)) != -1) { switch(op) { case 1: // this means double argument if (last_op == 'c') { trans_attr.port = optarg; } else if (last_op == 'S') { trans_attr.port = optarg; } else { ERROR_LOG("Failed to parse arguments"); print_help(argv); exit(EINVAL); } break; case '@': printf("%s compiled on %s at %s\n", argv[0], __DATE__, __TIME__); printf("Release = %s\n", VERSION); printf("Release comment = %s\n", VERSION_COMMENT); printf("Git HEAD = %s\n", _GIT_HEAD_COMMIT ) ; printf("Git Describe = %s\n", _GIT_DESCRIBE ) ; exit(0); case 'h': print_help(argv); exit(0); case 'v': trans_attr.debug = trans_attr.debug * 2 + 1; break; case 'c': trans_attr.server = 0; trans_attr.node = optarg; break; case 's': trans_attr.server = 10; trans_attr.node = "::"; trans_attr.port = optarg; break; case 'S': trans_attr.server = 10; trans_attr.node = optarg; break; case 'q': trans_attr.debug = 0; break; case 'f': pcap_file = optarg; break; case 'B': banner = 1; break; case 'n': priv.docheck = 0; break; case 'b': block_size = strtoul(optarg, &tmp_s, 0); if (errno || block_size == 0) { ERROR_LOG("Invalid block size, assuming default (%u)", DEFAULT_BLOCK_SIZE); break; } if (tmp_s[0] != 0) { set_size(block_size, tmp_s); } INFO_LOG(trans_attr.debug > 1, "block size: %zu", block_size); break; case 'r': recv_num = strtoul(optarg, NULL, 0); if (errno || recv_num == 0) ERROR_LOG("Invalid recv_num, assuming default (%u)", DEFAULT_RECV_NUM); break; default: ERROR_LOG("Failed to parse arguments"); print_help(argv); exit(EINVAL); } last_op = op; } if (trans_attr.server == -1) { ERROR_LOG("Must be either client or server!"); print_help(argv); exit(EINVAL); } if (block_size == 0) block_size = DEFAULT_BLOCK_SIZE; if (recv_num == 0) recv_num = DEFAULT_RECV_NUM; trans_attr.rq_depth = recv_num+1; trans_attr.sq_depth = recv_num+1; /* open pcap file */ pcap = pcap_open_offline( pcap_file, errbuf ); if (pcap == NULL) { ERROR_LOG("Couldn't open pcap file: %s", errbuf); return EINVAL; } /* msk init */ TEST_Z(msk_init(&trans, &trans_attr)); if (!trans) { ERROR_LOG("msk_init failed! panic!"); exit(-1); } /* finish msk init */ const size_t mr_size = (recv_num+1)*block_size; if (trans_attr.server == 0) TEST_Z(msk_connect(trans)); else { listen_trans = trans; TEST_Z(msk_bind_server(listen_trans)); TEST_NZ(trans = msk_accept_one(listen_trans)); } TEST_NZ(rdmabuf = malloc(mr_size)); memset(rdmabuf, 0, mr_size); TEST_NZ(mr = msk_reg_mr(trans, rdmabuf, mr_size, IBV_ACCESS_LOCAL_WRITE)); TEST_NZ(data = malloc((recv_num+1)*sizeof(msk_data_t))); for (i=0; i < recv_num + 1; i++) { data[i].data = rdmabuf+i*block_size; data[i].max_size = block_size; data[i].mr = mr; } wdata = &data[recv_num]; trans->private_data = &priv; pthread_mutex_init(&priv.lock, NULL); pthread_cond_init(&priv.cond, NULL); for (i=0; i<recv_num; i++) { TEST_Z(msk_post_recv(trans, &data[i], callback_recv, callback_error, NULL)); } pthread_mutex_lock(&priv.lock); if (trans->server == 0) TEST_Z(msk_finalize_connect(trans)); else TEST_Z(msk_finalize_accept(trans)); /* set on first packet */ uint32_t send_ip = 0; uint32_t recv_ip = 0; uint16_t send_port = 0; uint16_t recv_port = 0; i=0; while ((rc = pcap_next_ex(pcap, &priv.pcaphdr, (const u_char**)&priv.packet)) >= 0) { INFO_LOG(trans->debug & (MSK_DEBUG_SEND|MSK_DEBUG_RECV), "Iteration %d", i++); /* first packet: */ if (send_ip == 0) { /* who talks first? */ if ((trans->server == 0 && banner == 0) || (trans->server && banner == 1)) { send_ip = priv.packet->ipv6.ip_src.s6_addr32[3]; send_port = priv.packet->tcp.th_sport; recv_ip = priv.packet->ipv6.ip_dst.s6_addr32[3]; recv_port = priv.packet->tcp.th_dport; } else { send_ip = priv.packet->ipv6.ip_dst.s6_addr32[3]; send_port = priv.packet->tcp.th_dport; recv_ip = priv.packet->ipv6.ip_src.s6_addr32[3]; recv_port = priv.packet->tcp.th_sport; } } /* all packets: decide if we send it or if we wait till we receive another */ if (priv.packet->ipv6.ip_src.s6_addr32[3] == send_ip && priv.packet->tcp.th_sport == send_port) { if (priv.pcaphdr->len != priv.pcaphdr->caplen) { ERROR_LOG("Can't send truncated data! make sure you've stored all the capture (-t in rmitm)"); rc = EINVAL; break; } memcpy(wdata->data, priv.packet->data, priv.pcaphdr->len); wdata->size = priv.pcaphdr->len; rc = msk_post_send(trans, wdata, callback_send, callback_error, NULL); if (rc) { ERROR_LOG("msk_post_send failed with rc %d (%s)", rc, strerror(rc)); break; } } else if (priv.packet->ipv6.ip_src.s6_addr32[3] == recv_ip && priv.packet->tcp.th_sport == recv_port) { INFO_LOG(trans->debug & (MSK_DEBUG_SEND|MSK_DEBUG_RECV), "Waiting"); pthread_cond_wait(&priv.cond, &priv.lock); if (priv.rc != 0) { /* got an error in recv thread */ ERROR_LOG("Stopping loop"); rc = priv.rc; break; } } else { ERROR_LOG("Multiple streams in pcap file? Stopping loop."); break; } } pthread_mutex_unlock(&priv.lock); /* mooshika doesn't use negative return values, so hopefully -1 can only mean pcap error */ if (rc == -1) { ERROR_LOG("Pcap error: %s", pcap_geterr(pcap)); } pcap_close(pcap); msk_destroy_trans(&trans); /* -2 is pcap way of saying end of file */ if (rc == -2) { rc = 0; printf("Replay ended succesfully!\n"); } return rc; }