Пример #1
0
bool sockinfo::detach_receiver(flow_tuple_with_local_if &flow_key)
{
	si_logdbg("Unregistering receiver: %s", flow_key.to_str());

	// TODO ALEXR: DO we need to return a 3 tuple instead of a 5 tuple being removed?
	// if (peer_ip != INADDR_ANY && peer_port != INPORT_ANY);

	// Find ring associated with this tuple
	rx_flow_map_t::iterator rx_flow_iter = m_rx_flow_map.find(flow_key);
	BULLSEYE_EXCLUDE_BLOCK_START
	if (rx_flow_iter == m_rx_flow_map.end()) {
		si_logdbg("Failed to find ring associated with: %s", flow_key.to_str());
		return false;
	}
	BULLSEYE_EXCLUDE_BLOCK_END
	ring* p_ring = rx_flow_iter->second;

	si_logdbg("Detaching %s from ring %p", flow_key.to_str(), p_ring);

	// Detach tuple
	unlock_rx_q();
	p_ring->detach_flow(flow_key, this);
	lock_rx_q();

	// Un-map flow from local map
#ifndef DEFINED_SOCKETXTREME // is not defined
	rx_del_ring_cb(flow_key, p_ring);
#endif // DEFINED_SOCKETXTREME
	m_rx_flow_map.erase(rx_flow_iter);

	return destroy_nd_resources((const ip_address)flow_key.get_local_if());
}
Пример #2
0
int sockinfo::ioctl(unsigned long int __request, unsigned long int __arg) throw (vma_error)
{

	int *p_arg = (int *)__arg;

	switch (__request) {
	case FIONBIO:
		{
			si_logdbg("request=FIONBIO, arg=%d", *p_arg);
			if (*p_arg)
				set_blocking(false);
			else
				set_blocking(true);
		}
		break;

	default:
		char buf[128];
		snprintf(buf, sizeof(buf), "unimplemented ioctl request=%#x, flags=%#x", (unsigned)__request, (unsigned)__arg);
		buf[ sizeof(buf)-1 ] = '\0';

		VLOG_PRINTF_INFO(safe_mce_sys().exception_handling.get_log_severity(), "%s", buf);
		int rc = handle_exception_flow();
		switch (rc) {
		case -1:
			return rc;
		case -2:
			vma_throw_object_with_msg(vma_unsupported_api, buf);
		}
		break;
	}

    si_logdbg("going to OS for ioctl request=%d, flags=%x", __request, __arg);
	return orig_os_api.ioctl(m_fd, __request, __arg);
}
Пример #3
0
net_device_resources_t* sockinfo::create_nd_resources(const ip_address ip_local)
{
	net_device_resources_t* p_nd_resources = NULL;

	// Check if we are already registered to net_device with the local ip as observers
	rx_net_device_map_t::iterator rx_nd_iter = m_rx_nd_map.find(ip_local.get_in_addr());
	if (rx_nd_iter == m_rx_nd_map.end()) {

		// Need to register as observer to net_device
		net_device_resources_t nd_resources;
		nd_resources.refcnt = 0;
		nd_resources.p_nde = NULL;
		nd_resources.p_ndv = NULL;
		nd_resources.p_ring = NULL;

		BULLSEYE_EXCLUDE_BLOCK_START
		cache_entry_subject<ip_address, net_device_val*>* p_ces = NULL;
		if (!g_p_net_device_table_mgr->register_observer(ip_local, &m_rx_nd_observer, &p_ces)) {
			si_logdbg("Failed registering as observer for local ip %s", ip_local.to_str().c_str());
			goto err;
		}
		nd_resources.p_nde = (net_device_entry*)p_ces;
		if (!nd_resources.p_nde) {
			si_logerr("Got NULL net_devide_entry for local ip %s", ip_local.to_str().c_str());
			goto err;
		}
		if (!nd_resources.p_nde->get_val(nd_resources.p_ndv)) {
			si_logerr("Got net_device_val=NULL (interface is not offloaded) for local ip %s", ip_local.to_str().c_str());
			goto err;
		}

		unlock_rx_q();
		m_rx_ring_map_lock.lock();
		resource_allocation_key *key;
		if (m_rx_ring_map.size() && m_ring_alloc_logic.is_logic_support_migration()) {
			key = m_ring_alloc_logic.get_key();
		} else {
			key = m_ring_alloc_logic.create_new_key(ip_local.get_in_addr());
		}
		nd_resources.p_ring = nd_resources.p_ndv->reserve_ring(key);
		m_rx_ring_map_lock.unlock();
		lock_rx_q();
		if (!nd_resources.p_ring) {
			si_logdbg("Failed to reserve ring for allocation key %s on ip %s",
				  m_ring_alloc_logic.get_key()->to_str(), ip_local.to_str().c_str());
			goto err;
		}

		// Add new net_device to rx_map
		m_rx_nd_map[ip_local.get_in_addr()] = nd_resources;

		rx_nd_iter = m_rx_nd_map.find(ip_local.get_in_addr());
		if (rx_nd_iter == m_rx_nd_map.end()) {
			si_logerr("Failed to find rx_nd_iter");
			goto err;
		}
		BULLSEYE_EXCLUDE_BLOCK_END

	}
Пример #4
0
void sockinfo::set_blocking(bool is_blocked)
{
	if (is_blocked) {
		si_logdbg("set socket to blocked mode");
		m_b_blocking = true;
	}
	else {
		si_logdbg("set socket to non-blocking mode");
		m_b_blocking = false;
	}

	// Update statistics info
	m_p_socket_stats->b_blocking = m_b_blocking;
}
Пример #5
0
int sockinfo::getsockopt(int __level, int __optname, void *__optval, socklen_t *__optlen)
{
	int ret = -1;

	switch (__level) {
	case SOL_SOCKET:
		switch(__optname) {
		case SO_VMA_USER_DATA:
			if (*__optlen == sizeof(m_fd_context)) {
				*(void **)__optval = m_fd_context;
				ret = 0;
			} else {
				errno = EINVAL;
			}
		break;
		case SO_VMA_FLOW_TAG:
			if (*__optlen >= sizeof(uint32_t)) {
				*(uint32_t*)__optval = m_flow_tag_id;
				ret = 0;
			} else {
				errno = EINVAL;
			}
		break;
		case SO_MAX_PACING_RATE:
			if (*__optlen == sizeof(struct vma_rate_limit_t)) {
				*(struct vma_rate_limit_t*)__optval = m_so_ratelimit;
				*__optlen = sizeof(struct vma_rate_limit_t);
				si_logdbg("(SO_MAX_PACING_RATE) value: %d, %d, %d",
					  (*(struct vma_rate_limit_t*)__optval).rate,
					  (*(struct vma_rate_limit_t*)__optval).max_burst_sz,
					  (*(struct vma_rate_limit_t*)__optval).typical_pkt_sz);
			} else if (*__optlen == sizeof(uint32_t)) {
				*(uint32_t*)__optval = KB_TO_BYTE(m_so_ratelimit.rate);
				*__optlen = sizeof(uint32_t);
				si_logdbg("(SO_MAX_PACING_RATE) value: %d",
					  *(int *)__optval);
				ret = 0;
			} else {
				errno = EINVAL;
			}
		break;
		}
	}

	return ret;
}
Пример #6
0
int sockinfo::fcntl(int __cmd, unsigned long int __arg)
{
	switch (__cmd) {
	case F_SETFL:
		{
			si_logdbg("cmd=F_SETFL, arg=%#x", __arg);
			if (__arg & O_NONBLOCK)
				set_blocking(false);
			else
				set_blocking(true);
		}
		break;
	case F_GETFL:		/* Get file status flags.  */
		si_logfunc("cmd=F_GETFL, arg=%#x", __arg);
		break;

	case F_GETFD:		/* Get file descriptor flags.  */
		si_logfunc("cmd=F_GETFD, arg=%#x", __arg);
		break;

	case F_SETFD:		/* Set file descriptor flags.  */
		si_logfunc("cmd=F_SETFD, arg=%#x", __arg);
		break;

	default:
		char buf[128];
		snprintf(buf, sizeof(buf), "unimplemented fcntl cmd=%#x, arg=%#x", (unsigned)__cmd, (unsigned)__arg);
		buf[ sizeof(buf)-1 ] = '\0';

		VLOG_PRINTF_INFO(safe_mce_sys().exception_handling.get_log_severity(), "%s", buf);
		int rc = handle_exception_flow();
		switch (rc) {
		case -1:
			return rc;
		case -2:
			vma_throw_object_with_msg(vma_unsupported_api, buf);
		}
		break;
	}
	si_logdbg("going to OS for fcntl cmd=%d, arg=%#x", __cmd, __arg);
	return orig_os_api.fcntl(m_fd, __cmd, __arg);
}
Пример #7
0
bool sockinfo::attach_receiver(flow_tuple_with_local_if &flow_key)
{
	// This function should be called from within mutex protected context of the sockinfo!!!

	si_logdbg("Attaching to %s", flow_key.to_str());

	// Protect against local loopback used as local_if & peer_ip
	// rdma_cm will accept it but we don't want to offload it
	if (flow_key.is_local_loopback()) {
		si_logdbg("VMA does not offload local loopback IP address");
		return false;
	}

	if (m_rx_flow_map.find(flow_key) != m_rx_flow_map.end()) {
		si_logdbg("already attached %s", flow_key.to_str());
		return false;
	}

	// Allocate resources on specific interface (create ring)
	net_device_resources_t* p_nd_resources = create_nd_resources((const ip_address)flow_key.get_local_if());
	if (NULL == p_nd_resources) {
		// any error which occurred inside create_nd_resources() was already printed. No need to reprint errors here
		return false;
	}

	// Map flow in local map
	m_rx_flow_map[flow_key] = p_nd_resources->p_ring;
#ifndef DEFINED_SOCKETXTREME // is not defined
		// Save the new CQ from ring
		rx_add_ring_cb(flow_key, p_nd_resources->p_ring);
#endif // DEFINED_SOCKETXTREME

	// Attach tuple
	BULLSEYE_EXCLUDE_BLOCK_START
	unlock_rx_q();
	if (!p_nd_resources->p_ring->attach_flow(flow_key, this)) {
		lock_rx_q();
		si_logdbg("Failed to attach %s to ring %p", flow_key.to_str(), p_nd_resources->p_ring);
		return false;
	}
	set_rx_packet_processor();
	lock_rx_q();
	BULLSEYE_EXCLUDE_BLOCK_END

	// Registered as receiver successfully
	si_logdbg("Attached %s to ring %p", flow_key.to_str(), p_nd_resources->p_ring);


        // Verify 5 tuple over 3 tuple
        if (flow_key.is_5_tuple())
        {
        	// Check and remove lesser 3 tuple
        	flow_tuple_with_local_if flow_key_3t(flow_key.get_dst_ip(), flow_key.get_dst_port(), INADDR_ANY, INPORT_ANY, flow_key.get_protocol(), flow_key.get_local_if());
        	rx_flow_map_t::iterator rx_flow_iter = m_rx_flow_map.find(flow_key_3t);
        	if (rx_flow_iter != m_rx_flow_map.end()) {
        		si_logdbg("Removing (and detaching) 3 tuple now that we added a stronger 5 tuple");
        		detach_receiver(flow_key_3t);
        	}
        }

	return true;
}
Пример #8
0
int sockinfo::setsockopt(int __level, int __optname, const void *__optval, socklen_t __optlen)
{
	int ret = SOCKOPT_PASS_TO_OS;

	if (__level == SOL_SOCKET) {
		switch(__optname) {
		case SO_VMA_USER_DATA:
			if (__optlen == sizeof(m_fd_context)) {
				m_fd_context = *(void **)__optval;
				ret = SOCKOPT_INTERNAL_VMA_SUPPORT;
			} else {
				ret = SOCKOPT_NO_VMA_SUPPORT;
				errno = EINVAL;
			}
			break;
		case SO_VMA_RING_USER_MEMORY:
			if (__optval) {
				if (__optlen == sizeof(iovec)) {
					iovec *attr = (iovec *)__optval;
					m_ring_alloc_log_rx.set_memory_descriptor(*attr);
					m_ring_alloc_logic = ring_allocation_logic_rx(get_fd(), m_ring_alloc_log_rx, this);
					if (m_p_rx_ring || m_rx_ring_map.size()) {
						si_logwarn("user asked to assign memory for "
							   "RX ring but ring already exists");
					}
					ret = SOCKOPT_INTERNAL_VMA_SUPPORT;
				} else {
					ret = SOCKOPT_NO_VMA_SUPPORT;
					errno = EINVAL;
					si_logdbg("SOL_SOCKET, SO_VMA_RING_USER_MEMORY - "
						  "bad length expected %d got %d",
						  sizeof(iovec), __optlen);
				}
			}
			else {
				ret = SOCKOPT_NO_VMA_SUPPORT;
				errno = EINVAL;
				si_logdbg("SOL_SOCKET, SO_VMA_RING_USER_MEMORY - NOT HANDLED, optval == NULL");
			}
			break;
		case SO_VMA_FLOW_TAG:
			if (__optval) {
				if (__optlen == sizeof(uint32_t)) {
					if (set_flow_tag(*(uint32_t*)__optval)) {
						si_logdbg("SO_VMA_FLOW_TAG, set "
							  "socket %s to flow id %d",
							  m_fd, m_flow_tag_id);
						// not supported in OS
						ret = SOCKOPT_INTERNAL_VMA_SUPPORT;
					} else {
						ret = SOCKOPT_NO_VMA_SUPPORT;
						errno = EINVAL;
					}
				} else {
					ret = SOCKOPT_NO_VMA_SUPPORT;
					errno = EINVAL;
					si_logdbg("SO_VMA_FLOW_TAG, bad length "
						  "expected %d got %d",
						  sizeof(uint32_t), __optlen);
					break;
				}
			} else {
				ret = SOCKOPT_NO_VMA_SUPPORT;
				errno = EINVAL;
				si_logdbg("SO_VMA_FLOW_TAG - NOT HANDLED, "
					  "optval == NULL");
			}
			break;
		case SO_TIMESTAMP:
		case SO_TIMESTAMPNS:
			if (__optval) {
				m_b_rcvtstamp = *(bool*)__optval;
				if (__optname == SO_TIMESTAMPNS)
					m_b_rcvtstampns = m_b_rcvtstamp;
				si_logdbg("SOL_SOCKET, %s=%s", setsockopt_so_opt_to_str(__optname), (m_b_rcvtstamp ? "true" : "false"));
			}
			else {
				si_logdbg("SOL_SOCKET, %s=\"???\" - NOT HANDLED, optval == NULL", setsockopt_so_opt_to_str(__optname));
			}
			break;

		case SO_TIMESTAMPING:
			if (__optval) {
				uint8_t val = *(uint8_t*)__optval;

				// SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_HARDWARE is NOT supported.
				if (val & (SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE)) {
					ret = SOCKOPT_NO_VMA_SUPPORT;
					errno = EOPNOTSUPP;
					si_logdbg("SOL_SOCKET, SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_HARDWARE is not supported, errno set to EOPNOTSUPP");
				}

				if (val & (SOF_TIMESTAMPING_RAW_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE)) {
					if (g_p_ib_ctx_handler_collection->get_ctx_time_conversion_mode() == TS_CONVERSION_MODE_DISABLE){
						if (safe_mce_sys().hw_ts_conversion_mode ==  TS_CONVERSION_MODE_DISABLE) {
							ret = SOCKOPT_NO_VMA_SUPPORT;
							errno = EPERM;
							si_logdbg("SOL_SOCKET, SOF_TIMESTAMPING_RAW_HARDWARE and SOF_TIMESTAMPING_RX_HARDWARE socket options were disabled (VMA_HW_TS_CONVERSION = %d) , errno set to EPERM", TS_CONVERSION_MODE_DISABLE);
						} else {
							ret = SOCKOPT_NO_VMA_SUPPORT;
							errno = ENODEV;
							si_logdbg("SOL_SOCKET, SOF_TIMESTAMPING_RAW_HARDWARE and SOF_TIMESTAMPING_RX_HARDWARE is not supported by device(s), errno set to ENODEV");
						}
					}
				}

				m_n_tsing_flags  = val;
				si_logdbg("SOL_SOCKET, SO_TIMESTAMPING=%u", m_n_tsing_flags);
			}
			else {
				si_logdbg("SOL_SOCKET, %s=\"???\" - NOT HANDLED, optval == NULL", setsockopt_so_opt_to_str(__optname));
			}
			break;
		default:
			break;
		}
	} else if (__level == IPPROTO_IP) {
		switch(__optname) {
		case IP_TTL:
			if (__optlen < sizeof(m_n_uc_ttl)) {
				ret = SOCKOPT_NO_VMA_SUPPORT;
				errno = EINVAL;
			} else {
				int val = __optlen < sizeof(val) ?  (uint8_t) *(uint8_t *)__optval : (int) *(int *)__optval;
				if (val != -1 && (val < 1 || val > 255)) {
					ret = SOCKOPT_NO_VMA_SUPPORT;
					errno = EINVAL;
				} else {
					m_n_uc_ttl = (val == -1) ? safe_mce_sys().sysctl_reader.get_net_ipv4_ttl() : (uint8_t) val;
					header_ttl_updater du(m_n_uc_ttl, false);
					update_header_field(&du);
					si_logdbg("IPPROTO_IP, optname=IP_TTL (%d)", m_n_uc_ttl);
				}
			}
			break;
		default:
			break;
		}
	}

	si_logdbg("ret (%d)", ret);
	return ret;
}
Пример #9
0
bool sockinfo::attach_receiver(flow_tuple_with_local_if &flow_key)
{
	// This function should be called from within mutex protected context of the sockinfo!!!

	si_logdbg("Attaching to %s", flow_key.to_str());

	// Protect against local loopback used as local_if & peer_ip
	// rdma_cm will accept it but we don't want to offload it
	if (flow_key.is_local_loopback()) {
		si_logdbg("VMA does not offload local loopback IP address");
		return false;
	}

	if (m_rx_flow_map.find(flow_key) != m_rx_flow_map.end()) {
		si_logdbg("already attached %s", flow_key.to_str());
		return false;
	}

	net_device_resources_t* p_nd_resources = NULL;

	// Check if we are already registered to net_device with the local ip as observers
	ip_address ip_local(flow_key.get_local_if());
	rx_net_device_map_t::iterator rx_nd_iter = m_rx_nd_map.find(ip_local.get_in_addr());
	if (rx_nd_iter == m_rx_nd_map.end()) {

		// Need to register as observer to net_device
		net_device_resources_t nd_resources;
		nd_resources.refcnt = 0;
		nd_resources.p_nde = NULL;
		nd_resources.p_ndv = NULL;
		nd_resources.p_ring = NULL;

		BULLSEYE_EXCLUDE_BLOCK_START
		cache_entry_subject<ip_address, net_device_val*>* p_ces = NULL;
		if (!g_p_net_device_table_mgr->register_observer(ip_local, &m_rx_nd_observer, &p_ces)) {
			si_logdbg("Failed registering as observer for local ip %s", ip_local.to_str().c_str());
			return false;
		}
		nd_resources.p_nde = (net_device_entry*)p_ces;
		if (!nd_resources.p_nde) {
			si_logerr("Got NULL net_devide_entry for local ip %s", ip_local.to_str().c_str());
			return false;
		}
		if (!nd_resources.p_nde->get_val(nd_resources.p_ndv)) {
			si_logerr("Got net_device_val=NULL (interface is not offloaded) for local ip %s", ip_local.to_str().c_str());
			return false;
		}

		unlock_rx_q();
		m_rx_ring_map_lock.lock();
		resource_allocation_key key = 0;
		if (m_rx_ring_map.size()) {
			key = m_ring_alloc_logic.get_key();
		} else {
			key = m_ring_alloc_logic.create_new_key();
		}
		nd_resources.p_ring = nd_resources.p_ndv->reserve_ring(key);
		m_rx_ring_map_lock.unlock();
		lock_rx_q();
		if (!nd_resources.p_ring) {
			si_logdbg("Failed to reserve ring for allocation key %d on lip %s", m_ring_alloc_logic.get_key(), ip_local.to_str().c_str());
			return false;
		}

		// Add new net_device to rx_map
		m_rx_nd_map[ip_local.get_in_addr()] = nd_resources;

		rx_nd_iter = m_rx_nd_map.find(ip_local.get_in_addr());
		if (rx_nd_iter == m_rx_nd_map.end()) {
			si_logerr("Failed to find rx_nd_iter");
			return false;
		}
		BULLSEYE_EXCLUDE_BLOCK_END

	}