Exemple #1
0
void ci_udp_set_laddr(citp_socket* ep, unsigned laddr_be32, int lport_be16)
{
  ci_udp_state* us = SOCK_TO_UDP(ep->s);
  udp_laddr_be32(us) = laddr_be32;
  udp_lport_be16(us) = (ci_uint16) lport_be16;
  if( CI_IP_IS_MULTICAST(laddr_be32) )
    us->s.cp.ip_laddr_be32 = 0;
  else
    us->s.cp.ip_laddr_be32 = laddr_be32;
  us->s.cp.lport_be16 = lport_be16;
}
Exemple #2
0
void ci_udp_set_laddr(citp_socket* ep, ci_addr_t addr, int lport_be16)
{
  ci_udp_state* us = SOCK_TO_UDP(ep->s);

  ci_sock_set_laddr(&us->s, addr, lport_be16);
#if CI_CFG_IPV6
  if( ipcache_is_ipv6(us->s.pkt) )
    return;
#endif
  if( CI_IP_IS_MULTICAST(addr.ip4) )
    us->s.cp.ip_laddr_be32 = 0;
  else
    us->s.cp.ip_laddr_be32 = addr.ip4;
  us->s.cp.lport_be16 = lport_be16;
}
Exemple #3
0
int ci_udp_should_handover(citp_socket* ep, const struct sockaddr* addr,
                           ci_uint16 lport)
{
    ci_uint32 addr_be32;

#if CI_CFG_FAKE_IPV6
    if( ep->s->domain == AF_INET6 && ! ci_tcp_ipv6_is_ipv4(addr) )
        goto handover;
#endif

    if( (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover_min &&
            CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover_max) ||
            (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover2_min &&
             CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover2_max) ||
            (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover3_min &&
             CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover3_max) ) {
        LOG_UC(log(FNS_FMT "HANDOVER (%d <= %d <= %d)",
                   FNS_PRI_ARGS(ep->netif, ep->s),
                   NI_OPTS(ep->netif).udp_port_handover_min,
                   CI_BSWAP_BE16(lport),
                   NI_OPTS(ep->netif).udp_port_handover_max));
        goto handover;
    }

    addr_be32 = ci_get_ip4_addr(ep->s->domain, addr);
    if( addr_be32 != CI_BSWAPC_BE32(INADDR_ANY) &&
            ! cicp_user_addr_is_local_efab(CICP_HANDLE(ep->netif), &addr_be32) &&
            ! CI_IP_IS_MULTICAST(addr_be32) ) {
        /* Either the bind/getsockname indicated that we need to let the OS
          * take this or the local address is not one of ours - so we can safely
          * hand-over as bind to a non-ANY addr cannot be revoked.
          * The filters (if any) have already been removed, so we just get out. */
        goto handover;
    }

    return 0;
handover:
    return 1;
}
Exemple #4
0
static int do_clear_affinity(const char* protos, struct sockaddr_in la,
                             struct sockaddr_in ra)
{
  int proto = str_to_proto(protos);
  int i, ifindex;

  refresh_ip_list();

  if( la.sin_addr.s_addr == 0 ) {
    for( i = 0; i < ip_list_n; ++i )
      if( interface_driver_is(ip_list_name(i), "sfc") )
        clear_affinity(interface_to_ifindex(ip_list_name(i)), proto,
                       ip_list_ip(i), la.sin_port,
                       ra.sin_addr.s_addr, ra.sin_port);
    return 1;
  }
  else if( CI_IP_IS_MULTICAST(la.sin_addr.s_addr) ) {
    for( i = 0; i < ip_list_n; ++i )
      if( interface_is(ip_list_name(i), "sfc") )
        clear_affinity(interface_to_ifindex(ip_list_name(i)), proto,
                       la.sin_addr.s_addr, la.sin_port,
                       ra.sin_addr.s_addr, ra.sin_port);
    return 1;
  }
  else {
    ifindex = ip_to_ifindex(la.sin_addr.s_addr);
    if( ifindex < 0 ) {
      err("%s: ERROR: Can't find interface for IP %s\n",
          me, inet_ntoa(la.sin_addr));
      return 0;
    }
    clear_affinity(ifindex, proto,
                   la.sin_addr.s_addr, la.sin_port,
                   ra.sin_addr.s_addr, ra.sin_port);
    return 1;
  }
}
Exemple #5
0
int ci_udp_filter_recved_pkts(ci_netif* ni, ci_udp_state* us)
{
  enum onload_zc_callback_rc rc;
  struct onload_zc_msg zc_msg;
  struct onload_zc_iovec zc_iovec[CI_UDP_ZC_IOVEC_MAX];
  ci_ip_pkt_fmt* pkt;
  unsigned cb_flags;
  int dropped_bytes;

  ci_assert(ci_sock_is_locked(ni, &us->s.b));

  zc_msg.iov = zc_iovec;
  zc_msg.msghdr.msg_controllen = 0;
  zc_msg.msghdr.msg_flags = 0;

  while( us->recv_q.pkts_added != 
         us->recv_q.pkts_filter_passed + us->recv_q.pkts_filter_dropped ) {
    ci_rmb();
    pkt = PKT_CHK_NNL(ni, us->recv_q.filter);
    if( pkt->pf.udp.rx_flags & 
        (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED |
         CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED) ) {
      /* We know this can't go past tail because of the while loop condition */
      us->recv_q.filter = pkt->next;
      pkt = PKT_CHK_NNL(ni, us->recv_q.filter);
      ci_assert( !(pkt->pf.udp.rx_flags & 
                   (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED |
                    CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED)) );
    }

    ci_udp_pkt_to_zc_msg(ni, pkt, &zc_msg);

    cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) ? 
      ONLOAD_ZC_MSG_SHARED : 0;
    rc = (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter))
      (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags);

    ci_assert(!(rc & ONLOAD_ZC_KEEP));

    if( rc & ONLOAD_ZC_TERMINATE ) {
      us->recv_q.bytes_filter_dropped += pkt->pf.udp.pay_len;
      pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED;
      ++us->recv_q.pkts_filter_dropped;
    }
    else {
      pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED;
      ++us->recv_q.pkts_filter_passed;
      if( rc & ONLOAD_ZC_MODIFIED ) {
        ci_assert(!(cb_flags & ONLOAD_ZC_MSG_SHARED));
        dropped_bytes = ci_zc_msg_to_udp_pkt(ni, &zc_msg, pkt);
        ci_assert_gt(dropped_bytes, 0);
        ci_assert_lt(dropped_bytes, pkt->pf.udp.pay_len);
        pkt->pf.udp.pay_len -= dropped_bytes;
        us->recv_q.bytes_filter_dropped += dropped_bytes;
      }
      us->recv_q.bytes_filter_passed += pkt->pf.udp.pay_len;
      return 1;
    }
  }

  return us->recv_q.pkts_filter_passed != us->recv_q.pkts_delivered;
}
Exemple #6
0
void
cicp_user_retrieve(ci_netif*                    ni,
                   ci_ip_cached_hdrs*           ipcache,
                   const struct oo_sock_cplane* sock_cp)
{
  struct cp_fwd_key key;
  struct cp_fwd_data data;
  int rc;
  uint32_t daddr_be32 = ipcache->ip.ip_daddr_be32;

  /* This function must be called when "the route is unusable".  I.e. when
   * the route is invalid or if there is no ARP.  In the second case, we
   * can expedite ARP resolution by explicit request just now. */
  if( oo_cp_verinfo_is_valid(ni->cplane, &ipcache->mac_integrity) ) {
    ci_assert_equal(ipcache->status, retrrc_nomac);
    oo_cp_arp_resolve(ni->cplane, &ipcache->mac_integrity);

    /* Re-check the version of the fwd entry after ARP resolution.
     * Return if nothing changed; otherwise handle the case when ARP has
     * already been resolved. */
    if( oo_cp_verinfo_is_valid(ni->cplane, &ipcache->mac_integrity) )
      return;
  }


  key.dst = daddr_be32;
  key.tos = sock_cp->ip_tos;
  key.flag = 0;

  if( ipcache->ip.ip_protocol == IPPROTO_UDP )
    key.flag |= CP_FWD_KEY_UDP;

  key.ifindex = sock_cp->so_bindtodevice;
  if( CI_IP_IS_MULTICAST(daddr_be32) ) {
    if( sock_cp->sock_cp_flags & OO_SCP_NO_MULTICAST ) {
      ipcache->status = retrrc_alienroute;
      ipcache->hwport = CI_HWPORT_ID_BAD;
      ipcache->intf_i = -1;
      return;
    }

    /* In linux, SO_BINDTODEVICE has the priority over IP_MULTICAST_IF */
    if( key.ifindex == 0 )
      key.ifindex = sock_cp->ip_multicast_if;
    key.src = sock_cp->ip_multicast_if_laddr_be32;
    if( key.src == 0 && sock_cp->ip_laddr_be32 != 0 )
      key.src = sock_cp->ip_laddr_be32;
  }
  else {
    key.src = sock_cp->ip_laddr_be32;
    if( sock_cp->sock_cp_flags & OO_SCP_TPROXY )
      key.flag |= CP_FWD_KEY_TRANSPARENT;
  }

  if( key.src == 0 && sock_cp->sock_cp_flags & OO_SCP_UDP_WILD )
    key.flag |= CP_FWD_KEY_SOURCELESS;

#ifdef __KERNEL__
  if( ! (ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT) )
#endif
    key.flag |= CP_FWD_KEY_REQ_WAIT;

  rc = cicp_user_resolve(ni, &ipcache->mac_integrity, &key, &data);
  if( rc == 0 && key.src == 0 &&
      ! (sock_cp->sock_cp_flags & OO_SCP_UDP_WILD) ) {
    key.src = data.src;
    rc = cicp_user_resolve(ni, &ipcache->mac_integrity, &key, &data);
  }

  switch( data.ifindex ) {
    case CI_IFID_LOOP:
      ipcache->status = retrrc_localroute;
      ipcache->encap.type = CICP_LLAP_TYPE_NONE;
      ipcache->ether_offset = 4;
      ipcache->intf_i = OO_INTF_I_LOOPBACK;
      return;
    case CI_IFID_BAD:
      ipcache->status = retrrc_alienroute;
      ipcache->intf_i = -1;
      return;
    default:
    {
      cicp_hwport_mask_t hwports = 0;
      /* Can we accelerate interface in this stack ? */
      if( (data.encap.type & CICP_LLAP_TYPE_BOND) == 0 &&
          (data.hwports & ~(ci_netif_get_hwport_mask(ni))) == 0 )
        break;
      /* Check bond */
      rc = oo_cp_find_llap(ni->cplane, data.ifindex, NULL/*mtu*/,
                           NULL /*tx_hwports*/, &hwports /*rx_hwports*/,
                           NULL/*mac*/, NULL /*encap*/);
      if( rc != 0 || (hwports & ~(ci_netif_get_hwport_mask(ni))) ) {
        ipcache->status = retrrc_alienroute;
        ipcache->intf_i = -1;
      }
      break;
    }
  }

  ipcache->encap = data.encap;
#if CI_CFG_TEAMING
  if( ipcache->encap.type & CICP_LLAP_TYPE_USES_HASH ) {
     if( cicp_user_bond_hash_get_hwport(ni, ipcache, data.hwports,
                                    sock_cp->lport_be16, daddr_be32) != 0 ) {
      ipcache->status = retrrc_alienroute;
      ipcache->intf_i = -1;
      return;
    }
  }
  else
#endif
    ipcache->hwport = cp_hwport_mask_first(data.hwports);

  ipcache->mtu = data.mtu;
  ipcache->ip_saddr.ip4 = key.src == INADDR_ANY ? data.src : key.src;
  ipcache->ifindex = data.ifindex;
  ipcache->nexthop.ip4 = data.next_hop;
  if( ! ci_ip_cache_is_onloadable(ni, ipcache)) {
    ipcache->status = retrrc_alienroute;
    ipcache->intf_i = -1;
    return;
  }

  /* Layout the Ethernet header, and set the source mac.
   * Route resolution already issues ARP request, so there is no need to
   * call oo_cp_arp_resolve() explicitly in case of retrrc_nomac. */
  ipcache->status = data.arp_valid ? retrrc_success : retrrc_nomac;
  cicp_ipcache_vlan_set(ipcache);
  memcpy(ci_ip_cache_ether_shost(ipcache), &data.src_mac, ETH_ALEN);
  if( data.arp_valid )
    memcpy(ci_ip_cache_ether_dhost(ipcache), &data.dst_mac, ETH_ALEN);

  if( CI_IP_IS_MULTICAST(daddr_be32) )
    ipcache->ip.ip_ttl = sock_cp->ip_mcast_ttl;
  else
    ipcache->ip.ip_ttl = sock_cp->ip_ttl;
}
Exemple #7
0
/* In this bind handler we just check that the address to which
 * are binding is either "any" or one of ours. 
 * In the Linux kernel version [fd] is unused.
 */
int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr,
                socklen_t addrlen, ci_fd_t fd )
{
  struct sockaddr_in* my_addr_in;
  ci_uint16 new_port;
  ci_uint32 addr_be32;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc;

  CHECK_TEP(ep);

  my_addr_in = (struct sockaddr_in*) my_addr;

  /* Check if state of the socket is OK for bind operation. */
  /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used.
   *       What is better? */
  if (my_addr == NULL)
    RET_WITH_ERRNO( EINVAL );


  if (s->b.state != CI_TCP_CLOSED)
    RET_WITH_ERRNO( EINVAL );

  if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB)
    RET_WITH_ERRNO( EINVAL );

  if( my_addr->sa_family != s->domain )
    RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL );

  /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) 
   * Linux is also relaxed about overlength data areas. */
  if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in))
    RET_WITH_ERRNO( EINVAL );

#if CI_CFG_FAKE_IPV6
  if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133)
    RET_WITH_ERRNO( EINVAL );

  if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) )
    return CI_SOCKET_HANDOVER;
#endif
  addr_be32 = ci_get_ip4_addr(s->domain, my_addr);
 
  /* Using the port number provided, see if we can do this bind */
  new_port = my_addr_in->sin_port;

  if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) {
    struct ci_port_list *force_reuseport;
    CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link,
                        (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) {
      if( force_reuseport->port == new_port ) {
        int one = 1;
        ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd);
        ci_assert(CI_IS_VALID_SOCKET(os_sock));
        rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one,
                               sizeof(one));
        ci_rel_os_sock_fd(os_sock);
        if( rc != 0 && errno == ENOPROTOOPT )
          ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY;
        ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT;
        LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u",
                   __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port));
      }
    }
  }

  if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) 
    CI_LOGLEVEL_TRY_RET(LOG_TV,
		        __ci_bind(ep->netif, ep->s, addr_be32, &new_port));
  ep->s->s_flags |= CI_SOCK_FLAG_BOUND;
  sock_lport_be16(s) = new_port; 
  sock_laddr_be32(s) = addr_be32;
  if( CI_IP_IS_MULTICAST(addr_be32) )
    s->cp.ip_laddr_be32 = 0;
  else
    s->cp.ip_laddr_be32 = addr_be32;
  s->cp.lport_be16 = new_port;
  sock_rport_be16(s) = sock_raddr_be32(s) = 0;

  LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32),
	     (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port),
	     CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); 

  return 0;
}