cq_mgr::~cq_mgr() { cq_logdbg("destroying CQ as %s", (m_b_is_rx?"Rx":"Tx")); int ret = 0; uint32_t ret_total = 0; uint64_t cq_poll_sn = 0; mem_buf_desc_t* buff = NULL; struct ibv_wc wce[MCE_MAX_CQ_POLL_BATCH]; while ((ret = poll(wce, MCE_MAX_CQ_POLL_BATCH, &cq_poll_sn)) > 0) { for (int i = 0; i < ret; i++) { if (m_b_is_rx) { buff = process_cq_element_rx(&wce[i]); } else { buff = process_cq_element_tx(&wce[i]); } if (buff) m_rx_queue.push_back(buff); } ret_total += ret; } m_b_was_drained = true; if (ret_total > 0) { cq_logdbg("Drained %d wce", ret_total); } if (m_rx_queue.size() + m_rx_pool.size()) { cq_logdbg("Returning %d buffers to global Rx pool (ready queue %d, free pool %d))", m_rx_queue.size() + m_rx_pool.size(), m_rx_queue.size(), m_rx_pool.size()); g_buffer_pool_rx->put_buffers_thread_safe(&m_rx_queue, m_rx_queue.size()); m_p_cq_stat->n_rx_sw_queue_len = m_rx_queue.size(); g_buffer_pool_rx->put_buffers_thread_safe(&m_rx_pool, m_rx_pool.size()); m_p_cq_stat->n_buffer_pool_len = m_rx_pool.size(); } cq_logfunc("destroying ibv_cq"); IF_VERBS_FAILURE(ibv_destroy_cq(m_p_ibv_cq)) { cq_logerr("destroy cq failed (errno=%d %m)", errno); } ENDIF_VERBS_FAILURE; statistics_print(); if (m_b_is_rx) vma_stats_instance_remove_cq_block(m_p_cq_stat); cq_logdbg("done"); }
cq_mgr::cq_mgr(ring* p_ring, ib_ctx_handler* p_ib_ctx_handler, int cq_size, struct ibv_comp_channel* p_comp_event_channel, bool is_rx) : m_p_ring(p_ring), m_p_ib_ctx_handler(p_ib_ctx_handler), m_b_is_rx(is_rx), m_comp_event_channel(p_comp_event_channel), m_p_next_rx_desc_poll(NULL) { cq_logfunc(""); m_n_wce_counter = 0; m_b_was_drained = false; m_b_notification_armed = false; m_n_out_of_free_bufs_warning = 0; m_n_cq_poll_sn = 0; m_cq_id = atomic_fetch_and_inc(&m_n_cq_id_counter); // cq id is nonzero m_transport_type = m_p_ring->get_transport_type(); m_p_ibv_cq = ibv_create_cq(m_p_ib_ctx_handler->get_ibv_context(), cq_size, (void*)this, m_comp_event_channel, 0); BULLSEYE_EXCLUDE_BLOCK_START if (!m_p_ibv_cq) { cq_logpanic("ibv_create_cq failed (errno=%d %m)", errno); } BULLSEYE_EXCLUDE_BLOCK_END // use local copy of stats by default (on rx cq get shared memory stats) m_p_cq_stat = &m_cq_stat_static; memset(m_p_cq_stat , 0, sizeof(*m_p_cq_stat)); /* m_p_cq_stat->n_rx_sw_queue_len = 0; m_p_cq_stat->n_rx_pkt_drop = 0; m_p_cq_stat->n_rx_drained_at_once_max = 0; m_p_cq_stat->n_buffer_pool_len = 0; m_p_cq_stat->buffer_miss_rate = 0.0; //*/ m_buffer_miss_count = 0; m_buffer_total_count = 0; m_buffer_prev_id = 0; m_sz_transport_header = 0; switch (m_transport_type) { case VMA_TRANSPORT_IB: m_sz_transport_header = GRH_HDR_LEN; break; case VMA_TRANSPORT_ETH: m_sz_transport_header = ETH_HDR_LEN; break; BULLSEYE_EXCLUDE_BLOCK_START default: cq_logpanic("Unknown transport type: %d", m_transport_type); break; BULLSEYE_EXCLUDE_BLOCK_END } if (m_b_is_rx) vma_stats_instance_create_cq_block(m_p_cq_stat); cq_logdbg("Created CQ as %s with fd[%d] and of size %d elements (ibv_cq_hndl=%p)", (m_b_is_rx?"Rx":"Tx"), get_channel_fd(), cq_size, m_p_ibv_cq); }
/** * this function polls the CQ, and extracts the needed fields * upon CQE error state it will return -1 * if a bad checksum packet or a filler bit it will return VMA_MP_RQ_BAD_PACKET */ int cq_mgr_mp::poll_mp_cq(uint16_t &size, uint32_t &strides_used, uint32_t &flags, struct mlx5_cqe64 *&out_cqe64) { struct mlx5_cqe64 *cqe= check_cqe(); if (likely(cqe)) { if (unlikely(MLX5_CQE_OPCODE(cqe->op_own) != MLX5_CQE_RESP_SEND)) { cq_logdbg("Warning op_own is %x", MLX5_CQE_OPCODE(cqe->op_own)); // optimize checks in ring by setting size non zero if (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) { cq_logdbg("poll_length, CQE response error, " "syndrome=0x%x, vendor syndrome error=0x%x, " "HW syndrome 0x%x, HW syndrome type 0x%x\n", ((struct mlx5_err_cqe *)cqe)->syndrome, ((struct mlx5_err_cqe *)cqe)->vendor_err_synd, ((struct mlx5_err_cqe *)cqe)->hw_err_synd, ((struct mlx5_err_cqe *)cqe)->hw_synd_type); } size = 1; m_p_cq_stat->n_rx_pkt_drop++; return -1; } m_p_cq_stat->n_rx_pkt_drop += cqe->sop_qpn.sop; out_cqe64 = cqe; uint32_t stride_byte_cnt = ntohl(cqe->byte_cnt); strides_used = (stride_byte_cnt & MP_RQ_NUM_STRIDES_FIELD_MASK) >> MP_RQ_NUM_STRIDES_FIELD_SHIFT; flags = (!!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) * IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK) | (!!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) * IBV_EXP_CQ_RX_IP_CSUM_OK); if (likely(flags == UDP_OK_FLAGS)) { size = stride_byte_cnt & MP_RQ_BYTE_CNT_FIELD_MASK; } else { // if CSUM is bad it can be either filler or bad packet flags = VMA_MP_RQ_BAD_PACKET; size = 1; if (stride_byte_cnt & MP_RQ_FILLER_FIELD_MASK) { m_p_cq_stat->n_rx_pkt_drop++; } } ++m_mlx5_cq.cq_ci; prefetch((uint8_t*)m_mlx5_cq.cq_buf + ((m_mlx5_cq.cq_ci & (m_mlx5_cq.cqe_count - 1)) << m_mlx5_cq.cqe_size_log)); } else {
void cq_mgr::add_qp_rx(qp_mgr* qp) { cq_logdbg("qp_mgr=%p", qp); mem_buf_desc_t *p_temp_desc_list, *p_temp_desc_next; m_p_cq_stat->n_rx_drained_at_once_max = 0; // Initial fill of receiver work requests uint32_t qp_rx_wr_num = qp->get_rx_max_wr_num(); cq_logdbg("Trying to push %d WRE to allocated qp (%p)", qp_rx_wr_num, qp); while (qp_rx_wr_num) { uint32_t n_num_mem_bufs = mce_sys.rx_num_wr_to_post_recv; if (n_num_mem_bufs > qp_rx_wr_num) n_num_mem_bufs = qp_rx_wr_num; p_temp_desc_list = g_buffer_pool_rx->get_buffers_thread_safe(n_num_mem_bufs, m_p_ib_ctx_handler); if (p_temp_desc_list == NULL) { cq_logwarn("Out of mem_buf_desc from Rx buffer pool for qp_mgr qp_mgr initialization (qp=%p)", qp); cq_logwarn("This might happen due to wrong setting of VMA_RX_BUFS and VMA_RX_WRE. Please refer to README.txt for more info"); break; } p_temp_desc_next = p_temp_desc_list; while (p_temp_desc_next) { p_temp_desc_next->p_desc_owner = m_p_ring; p_temp_desc_next = p_temp_desc_next->p_next_desc; } if (qp->post_recv(p_temp_desc_list) != 0) { cq_logdbg("qp post recv is already full (push=%d, planned=%d)", qp->get_rx_max_wr_num()-qp_rx_wr_num, qp->get_rx_max_wr_num()); g_buffer_pool_rx->put_buffers_thread_safe(p_temp_desc_list); break; } qp_rx_wr_num -= n_num_mem_bufs; } cq_logdbg("Successfully post_recv qp with %d new Rx buffers (planned=%d)", qp->get_rx_max_wr_num()-qp_rx_wr_num, qp->get_rx_max_wr_num()); // Add qp_mgr to map m_qp_rec.qp = qp; m_qp_rec.debth = 0; }
void cq_mgr_mp::add_qp_rx(qp_mgr *qp) { cq_logdbg("qp_mp_mgr=%p", qp); qp_mgr_mp* mp_qp = dynamic_cast<qp_mgr_mp *>(qp); if (mp_qp == NULL) { cq_logdbg("this qp is not of type qp_mgr_mp %p", qp); throw_vma_exception("this qp is not of type qp_mgr_mp"); } set_qp_rq(qp); m_qp_rec.qp = qp; if (m_external_mem) { cq_logdbg("this qp uses an external memory %p", qp); } else { if (mp_qp->post_recv(0, mp_qp->get_wq_count()) != 0) { cq_logdbg("qp post recv failed"); } else { cq_logdbg("Successfully post_recv qp with %d new Rx buffers", mp_qp->get_wq_count()); } } }
cq_mgr_mlx5::~cq_mgr_mlx5() { cq_logfunc(""); cq_logdbg("destroying CQ as %s", (m_b_is_rx?"Rx":"Tx")); m_rq = NULL; }