Exemplo n.º 1
0
void nmb_put(struct nmb *nmb,
             MSN msn,
             msgtype_t type,
             struct msg *key,
             struct msg *val,
             struct txnid_pair *xidpair)
{
	char *base;
	uint32_t size = (NMB_ENTRY_SIZE + key->size);

	if (type != MSG_DELETE)
		size += val->size;

	ness_rwlock_write_lock(&nmb->rwlock);
	base = mempool_alloc_aligned(nmb->mpool, size);
	ness_rwlock_write_unlock(&nmb->rwlock);
	_nmb_entry_pack(base, msn, type, key, val, xidpair);

	/* pma is thread-safe */
	pma_insert(nmb->pma,
	           (void*)base,
	           _nmb_entry_key_compare,
	           (void*)nmb);
	atomic_fetch_and_inc(&nmb->count);
}
Exemplo n.º 2
0
cq_mgr::cq_mgr(ring* p_ring, ib_ctx_handler* p_ib_ctx_handler, int cq_size, struct ibv_comp_channel* p_comp_event_channel, bool is_rx) :
		m_p_ring(p_ring), m_p_ib_ctx_handler(p_ib_ctx_handler), m_b_is_rx(is_rx), m_comp_event_channel(p_comp_event_channel), m_p_next_rx_desc_poll(NULL)
{
	cq_logfunc("");

	m_n_wce_counter = 0;
	m_b_was_drained = false;

	m_b_notification_armed = false;
	m_n_out_of_free_bufs_warning = 0;

	m_n_cq_poll_sn = 0;
	m_cq_id = atomic_fetch_and_inc(&m_n_cq_id_counter); // cq id is nonzero

	m_transport_type = m_p_ring->get_transport_type();

	m_p_ibv_cq = ibv_create_cq(m_p_ib_ctx_handler->get_ibv_context(), cq_size, (void*)this, m_comp_event_channel, 0);
	BULLSEYE_EXCLUDE_BLOCK_START
	if (!m_p_ibv_cq) {
		cq_logpanic("ibv_create_cq failed (errno=%d %m)", errno);
	}
	BULLSEYE_EXCLUDE_BLOCK_END
	
	// use local copy of stats by default (on rx cq get shared memory stats)
	m_p_cq_stat = &m_cq_stat_static;
	memset(m_p_cq_stat , 0, sizeof(*m_p_cq_stat));
/*
	m_p_cq_stat->n_rx_sw_queue_len = 0;
	m_p_cq_stat->n_rx_pkt_drop = 0;
	m_p_cq_stat->n_rx_drained_at_once_max = 0;
	m_p_cq_stat->n_buffer_pool_len = 0;
	m_p_cq_stat->buffer_miss_rate = 0.0;
//*/
	m_buffer_miss_count = 0;
	m_buffer_total_count = 0;
	m_buffer_prev_id = 0;

	m_sz_transport_header = 0;
	switch (m_transport_type) {
	case VMA_TRANSPORT_IB:
		m_sz_transport_header = GRH_HDR_LEN;
		break;
	case VMA_TRANSPORT_ETH:
		m_sz_transport_header = ETH_HDR_LEN;
		break;
	BULLSEYE_EXCLUDE_BLOCK_START
	default:
		cq_logpanic("Unknown transport type: %d", m_transport_type);
		break;
	BULLSEYE_EXCLUDE_BLOCK_END
	}

	if (m_b_is_rx)
		vma_stats_instance_create_cq_block(m_p_cq_stat);

	cq_logdbg("Created CQ as %s with fd[%d] and of size %d elements (ibv_cq_hndl=%p)", (m_b_is_rx?"Rx":"Tx"), get_channel_fd(), cq_size, m_p_ibv_cq);
}
Exemplo n.º 3
0
ssize_t dst_entry_udp::pass_buff_to_neigh(const iovec *p_iov, size_t & sz_iov, uint16_t packet_id)
{
	m_header_neigh.init();
	m_header_neigh.configure_udp_header(m_dst_port, m_src_port);

	packet_id = (safe_mce_sys().thread_mode > THREAD_MODE_SINGLE) ?
						atomic_fetch_and_inc(&m_a_tx_ip_id) :
						m_n_tx_ip_id++;
	packet_id = htons(packet_id);

	return(dst_entry::pass_buff_to_neigh(p_iov, sz_iov, packet_id));
}
Exemplo n.º 4
0
ssize_t dst_entry_udp::fast_send(const iovec* p_iov, const ssize_t sz_iov, bool b_blocked /*=true*/, bool is_rexmit /*=false*/, bool dont_inline /*=false*/)
{
	NOT_IN_USE(is_rexmit);

	tx_packet_template_t *p_pkt;
	mem_buf_desc_t* p_mem_buf_desc = NULL, *tmp;
	uint16_t packet_id = 0;
	bool b_need_sw_csum;

	// Calc user data payload size
	ssize_t sz_data_payload = 0;
	for (ssize_t i = 0; i < sz_iov; i++)
		sz_data_payload += p_iov[i].iov_len;

	if (unlikely(sz_data_payload > 65536)) {
		dst_udp_logfunc("sz_data_payload=%d, to_port=%d, local_port=%d, b_blocked=%s", sz_data_payload, ntohs(m_dst_port), ntohs(m_src_port), b_blocked?"true":"false");
		dst_udp_logfunc("sz_data_payload=%d exceeds max of 64KB", sz_data_payload);
		errno = EMSGSIZE;
		return -1;
	}

	// Calc udp payload size
	size_t sz_udp_payload = sz_data_payload + sizeof(struct udphdr);

	if (!dont_inline && (sz_iov == 1 && (sz_data_payload + m_header.m_total_hdr_len) < m_max_inline)) {
		m_p_send_wqe = &m_inline_send_wqe;

		//m_sge[0].addr  already points to the header
		//so we just need to update the payload addr + len
		m_sge[1].length = p_iov[0].iov_len;
		m_sge[1].addr = (uintptr_t)p_iov[0].iov_base;

		m_header.m_header.hdr.m_udp_hdr.len = htons((uint16_t)sz_udp_payload);
		m_header.m_header.hdr.m_ip_hdr.tot_len = htons(m_header.m_ip_header_len + sz_udp_payload);

#ifdef VMA_NO_HW_CSUM
		dst_udp_logfunc("using SW checksum calculation");
		m_header.m_header.hdr.m_ip_hdr.check = 0; // use 0 at csum calculation time
		m_header.m_header.hdr.m_ip_hdr.check = csum((unsigned short*)&m_header.m_header.hdr.m_ip_hdr, m_header.m_header.hdr.m_ip_hdr.ihl * 2);
#endif
		// Get a bunch of tx buf descriptor and data buffers
		if (unlikely(m_p_tx_mem_buf_desc_list == NULL)) {
			m_p_tx_mem_buf_desc_list = m_p_ring->mem_buf_tx_get(m_id, b_blocked, safe_mce_sys().tx_bufs_batch_udp);
		}
		p_mem_buf_desc = m_p_tx_mem_buf_desc_list;

		if (unlikely(m_p_tx_mem_buf_desc_list == NULL)) {
			if (b_blocked) {
				dst_udp_logdbg("Error when blocking for next tx buffer (errno=%d %m)", errno);
			}
			else {
				dst_udp_logfunc("Packet dropped. NonBlocked call but not enough tx buffers. Returning OK");
				if (!safe_mce_sys().tx_nonblocked_eagains) return sz_data_payload;
			}
			errno = EAGAIN;
			return -1;
		}
		else {
			m_p_tx_mem_buf_desc_list = m_p_tx_mem_buf_desc_list->p_next_desc;
			set_tx_buff_list_pending(false);
		}
		p_mem_buf_desc->p_next_desc = NULL;
		m_inline_send_wqe.wr_id = (uintptr_t)p_mem_buf_desc;
		m_p_ring->send_ring_buffer(m_id, m_p_send_wqe, b_blocked);

		if (unlikely(m_p_tx_mem_buf_desc_list == NULL)) {
			m_p_tx_mem_buf_desc_list = m_p_ring->mem_buf_tx_get(m_id, b_blocked, safe_mce_sys().tx_bufs_batch_udp);
		}
	}
	else {
		// Find number of ip fragments (-> packets, buffers, buffer descs...)
		int n_num_frags = 1;
		b_need_sw_csum = false;
		m_p_send_wqe = &m_not_inline_send_wqe;

		// Usually max inline < MTU!
		if (sz_udp_payload > m_max_ip_payload_size) {
			b_need_sw_csum = true;
			n_num_frags = (sz_udp_payload + m_max_ip_payload_size - 1) / m_max_ip_payload_size;
			packet_id = (safe_mce_sys().thread_mode > THREAD_MODE_SINGLE) ?
					atomic_fetch_and_inc(&m_a_tx_ip_id) :
					m_n_tx_ip_id++;
			packet_id = htons(packet_id);
		}
#ifdef VMA_NO_HW_CSUM
		b_need_sw_csum = true;
#endif

		dst_udp_logfunc("udp info: payload_sz=%d, frags=%d, scr_port=%d, dst_port=%d, blocked=%s, ", sz_data_payload, n_num_frags, ntohs(m_header.m_header.hdr.m_udp_hdr.source), ntohs(m_dst_port), b_blocked?"true":"false");

		// Get all needed tx buf descriptor and data buffers
		p_mem_buf_desc = m_p_ring->mem_buf_tx_get(m_id, b_blocked, n_num_frags);

		if (unlikely(p_mem_buf_desc == NULL)) {
			if (b_blocked) {
				dst_udp_logdbg("Error when blocking for next tx buffer (errno=%d %m)", errno);
			}
			else {
				dst_udp_logfunc("Packet dropped. NonBlocked call but not enough tx buffers. Returning OK");
				if (!safe_mce_sys().tx_nonblocked_eagains) return sz_data_payload;
			}
			errno = EAGAIN;
			return -1;
		}

		// Int for counting offset inside the ip datagram payload
		uint32_t n_ip_frag_offset = 0;
		size_t sz_user_data_offset = 0;

		while (n_num_frags--) {
			// Calc this ip datagram fragment size (include any udp header)
			size_t sz_ip_frag = min(m_max_ip_payload_size, (sz_udp_payload - n_ip_frag_offset));
			size_t sz_user_data_to_copy = sz_ip_frag;
			size_t hdr_len = m_header.m_transport_header_len + m_header.m_ip_header_len; // Add count of L2 (ipoib or mac) header length

			if (safe_mce_sys().tx_prefetch_bytes) {
				prefetch_range(p_mem_buf_desc->p_buffer + m_header.m_transport_header_tx_offset,
						min(sz_ip_frag, (size_t)safe_mce_sys().tx_prefetch_bytes));
			}

			p_pkt = (tx_packet_template_t*)p_mem_buf_desc->p_buffer;

			uint16_t frag_off = 0;
			if (n_num_frags) {
				frag_off |= MORE_FRAGMENTS_FLAG;
			}

			if (n_ip_frag_offset == 0) {
				m_header.copy_l2_ip_udp_hdr(p_pkt);
				// Add count of udp header length
				hdr_len += sizeof(udphdr);

				// Copy less from user data
				sz_user_data_to_copy -= sizeof(udphdr);

				// Only for first fragment add the udp header
				p_pkt->hdr.m_udp_hdr.len = htons((uint16_t)sz_udp_payload);
			}
			else {
				m_header.copy_l2_ip_hdr(p_pkt);
				frag_off |= FRAGMENT_OFFSET & (n_ip_frag_offset / 8);
			}

			p_pkt->hdr.m_ip_hdr.frag_off = htons(frag_off);
			// Update ip header specific values
			p_pkt->hdr.m_ip_hdr.id = packet_id;
			p_pkt->hdr.m_ip_hdr.tot_len = htons(m_header.m_ip_header_len + sz_ip_frag);

			// Calc payload start point (after the udp header if present else just after ip header)
			uint8_t* p_payload = p_mem_buf_desc->p_buffer + m_header.m_transport_header_tx_offset + hdr_len;

			// Copy user data to our tx buffers
			int ret = memcpy_fromiovec(p_payload, p_iov, sz_iov, sz_user_data_offset, sz_user_data_to_copy);
			BULLSEYE_EXCLUDE_BLOCK_START
			if (ret != (int)sz_user_data_to_copy) {
				dst_udp_logerr("memcpy_fromiovec error (sz_user_data_to_copy=%d, ret=%d)", sz_user_data_to_copy, ret);
				m_p_ring->mem_buf_tx_release(p_mem_buf_desc, true);
				errno = EINVAL;
				return -1;
			}
			BULLSEYE_EXCLUDE_BLOCK_END

			if (b_need_sw_csum) {
				dst_udp_logfunc("ip fragmentation detected, using SW checksum calculation");
				p_pkt->hdr.m_ip_hdr.check = 0; // use 0 at csum calculation time
				p_pkt->hdr.m_ip_hdr.check = csum((unsigned short*)&p_pkt->hdr.m_ip_hdr, p_pkt->hdr.m_ip_hdr.ihl * 2);
				m_p_send_wqe_handler->disable_hw_csum(m_not_inline_send_wqe);
			} else {
				dst_udp_logfunc("using HW checksum calculation");
				m_p_send_wqe_handler->enable_hw_csum(m_not_inline_send_wqe);
			}


			m_sge[1].addr = (uintptr_t)(p_mem_buf_desc->p_buffer + (uint8_t)m_header.m_transport_header_tx_offset);
			m_sge[1].length = sz_user_data_to_copy + hdr_len;
			m_not_inline_send_wqe.wr_id = (uintptr_t)p_mem_buf_desc;

			dst_udp_logfunc("%s packet_sz=%d, payload_sz=%d, ip_offset=%d id=%d", m_header.to_str().c_str(),
					m_sge[1].length - m_header.m_transport_header_len, sz_user_data_to_copy,
					n_ip_frag_offset, ntohs(packet_id));

			tmp = p_mem_buf_desc->p_next_desc;
			p_mem_buf_desc->p_next_desc = NULL;

			// We don't check the return valuse of post send when we reach the HW we consider that we completed our job
			m_p_ring->send_ring_buffer(m_id, m_p_send_wqe, b_blocked);

			p_mem_buf_desc = tmp;

			// Update ip frag offset position
			n_ip_frag_offset += sz_ip_frag;

			// Update user data start offset copy location
			sz_user_data_offset += sz_user_data_to_copy;

		} // while(n_num_frags)
	}

	// If all went well :) then return the user data count transmitted
	return sz_data_payload;
}