Example #1
0
/* Encapsulation of sys_getsockname for UDP EPs */
static int ci_udp_sys_getsockname( ci_fd_t sock, citp_socket* ep )
{
    socklen_t salen;
    int rc;
    union ci_sockaddr_u sa_u;

    ci_assert(ep);
#if CI_CFG_FAKE_IPV6
    ci_assert(ep->s->domain == AF_INET || ep->s->domain == AF_INET6);
#else
    ci_assert(ep->s->domain == AF_INET);
#endif

    salen = sizeof(sa_u);

    rc = ci_sys_getsockname( sock, &sa_u.sa, &salen );
    if( rc )
        return rc;

    if( sa_u.sa.sa_family != ep->s->domain || salen < sizeof(struct sockaddr_in)
#if CI_CFG_FAKE_IPV6
            || (ep->s->domain == AF_INET6 && salen < sizeof(struct sockaddr_in6) )
#endif
      ) {
        LOG_UV(log("%s: OS sock domain %d != expected domain %d or "
                   "sys_getsockname struct small (%d exp %d)",
                   __FUNCTION__, sa_u.sa.sa_family, ep->s->domain,
                   salen,
                   (int)(ep->s->domain == AF_INET ? sizeof(struct sockaddr_in) :
                         sizeof(struct sockaddr_in6))));
        return -1;
    }

#if CI_CFG_FAKE_IPV6
    if( ep->s->domain == AF_INET ) {
        ci_udp_set_laddr( ep, ci_get_ip4_addr(sa_u.sa.sa_family, &sa_u.sa),
                          sa_u.sin.sin_port );
    }
    else {
        ci_udp_set_laddr( ep, ci_get_ip4_addr(sa_u.sa.sa_family, &sa_u.sa),
                          sa_u.sin6.sin6_port );
    }
#else
    ci_udp_set_laddr( ep, ci_get_ip4_addr(sa_u.sa.sa_family, &sa_u.sa),
                      sa_u.sin.sin_port );
#endif
    return 0;
}
Example #2
0
/* Set a reuseport bind on a socket.
 */
int ci_udp_reuseport_bind(citp_socket* ep, ci_fd_t fd,
                          const struct sockaddr* sa, socklen_t sa_len)
{
    int rc;
    ci_uint32 laddr_be32 = ci_get_ip4_addr(ep->s->domain, sa);
    int lport_be16 = ((struct sockaddr_in*)sa)->sin_port;
    ci_assert_nequal(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT, 0);

    /* We cannot support binding to port 0 as the kernel would assign
     * the socket a port number.  We must move the socket before binding
     * the OS socket and we don't have a port number to look up
     * clusters.
     */
    if( lport_be16 == 0 ) {
        LOG_UC(ci_log("%s: Binding to port 0 with reuseport set not supported",
                      __FUNCTION__));
        RET_WITH_ERRNO(ENOSYS);
    }

    if( (rc = ci_tcp_ep_reuseport_bind(fd, CITP_OPTS.cluster_name,
                                       CITP_OPTS.cluster_size,
                                       CITP_OPTS.cluster_restart_opt, laddr_be32,
                                       lport_be16)) != 0 ) {
        errno = -rc;
        return -1;
    }
    return rc;
}
Example #3
0
/* Conclude the EP's binding.  This function is abstracted from the
 * main bind code to allow implicit binds that occur when sendto() is
 * called on an OS socket.  [lport] and CI_SIN(addr)->sin_port do not
 * have to be the same value. */
static int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr,
                                ci_uint16 lport )
{
    ci_udp_state* us;
    ci_uint32 addr_be32;
    int rc;

    CHECK_UEP(ep);
    ci_assert(addr != NULL);

    if( ci_udp_should_handover(ep, addr, lport) )
        goto handover;

    addr_be32 = ci_get_ip4_addr(ep->s->domain, addr);

    ci_udp_set_laddr(ep, addr_be32, lport);
    us = SOCK_TO_UDP(ep->s);
    if( addr_be32 != 0 )
        us->s.cp.sock_cp_flags |= OO_SCP_LADDR_BOUND;
    /* reset any rx/tx that have taken place already */
    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);

#ifdef ONLOAD_OFE
    if( ep->netif->ofe != NULL )
        us->s.ofe_code_start = ofe_socktbl_find(
                                   ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                   udp_laddr_be32(us), udp_raddr_be32(us),
                                   udp_lport_be16(us), udp_rport_be16(us));
#endif

    /* OS source addrs have already been handed-over, so this must be one of
     * our src addresses.
     */
    rc = ci_udp_set_filters( ep, us);
    ci_assert( !UDP_GET_FLAG(us, CI_UDPF_EF_BIND) );
    /*! \todo FIXME isn't the port the thing to be testing here? */
    if( udp_laddr_be32(us) != INADDR_ANY_BE32 )
        UDP_SET_FLAG(us, CI_UDPF_EF_BIND);
    CI_UDPSTATE_SHOW_EP( ep );
    if( rc == CI_SOCKET_ERROR && CITP_OPTS.no_fail) {
        CITP_STATS_NETIF(++ep->netif->state->stats.udp_bind_no_filter);
        goto handover;
    }
    return rc;

handover:
    LOG_UV(log("%s: "SK_FMT" HANDOVER", __FUNCTION__, SK_PRI_ARGS(ep)));
    return CI_SOCKET_HANDOVER;
}
Example #4
0
int ci_udp_should_handover(citp_socket* ep, const struct sockaddr* addr,
                           ci_uint16 lport)
{
    ci_uint32 addr_be32;

#if CI_CFG_FAKE_IPV6
    if( ep->s->domain == AF_INET6 && ! ci_tcp_ipv6_is_ipv4(addr) )
        goto handover;
#endif

    if( (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover_min &&
            CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover_max) ||
            (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover2_min &&
             CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover2_max) ||
            (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover3_min &&
             CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover3_max) ) {
        LOG_UC(log(FNS_FMT "HANDOVER (%d <= %d <= %d)",
                   FNS_PRI_ARGS(ep->netif, ep->s),
                   NI_OPTS(ep->netif).udp_port_handover_min,
                   CI_BSWAP_BE16(lport),
                   NI_OPTS(ep->netif).udp_port_handover_max));
        goto handover;
    }

    addr_be32 = ci_get_ip4_addr(ep->s->domain, addr);
    if( addr_be32 != CI_BSWAPC_BE32(INADDR_ANY) &&
            ! cicp_user_addr_is_local_efab(CICP_HANDLE(ep->netif), &addr_be32) &&
            ! CI_IP_IS_MULTICAST(addr_be32) ) {
        /* Either the bind/getsockname indicated that we need to let the OS
          * take this or the local address is not one of ours - so we can safely
          * hand-over as bind to a non-ANY addr cannot be revoked.
          * The filters (if any) have already been removed, so we just get out. */
        goto handover;
    }

    return 0;
handover:
    return 1;
}
Example #5
0
/* Complete a UDP U/L connect.  The sys connect() call must have been made
 * (and succeeded) before calling this function.  So if anything goes wrong
 * in here, then it can be consider an internal error or failing of onload.
 */
int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd,
                            const struct sockaddr* serv_addr,
                            socklen_t addrlen, ci_fd_t os_sock)
{
    const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr;
    ci_uint32 dst_be32;
    ci_udp_state* us = SOCK_TO_UDP(ep->s);
    int onloadable;
    int rc = 0;

    CHECK_UEP(ep);

    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);
    us->s.rx_errno = 0;
    us->s.tx_errno = 0;

    if( IS_DISCONNECTING(serv_sin) ) {
        rc = ci_udp_disconnect(ep, us, os_sock);
        goto out;
    }
#if CI_CFG_FAKE_IPV6
    if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) {
        LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us)));
        goto handover;
    }
#endif

    dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr);
    if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) {
        LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)",
                  FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                  CI_BSWAP_BE16(serv_sin->sin_port), errno));
        goto out;
    }

    us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED;
    ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port);
    cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp);

    switch( us->s.pkt.status ) {
    case retrrc_success:
    case retrrc_nomac:
        onloadable = 1;
        break;
    default:
        onloadable = 0;
        if( NI_OPTS(ep->netif).udp_connect_handover ) {
            LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                       ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
            goto handover;
        }
        break;
    }

    if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) {
        LOG_UC(log(FNT_FMT "%s:%d - route via OS socket",
                   FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                   CI_BSWAP_BE16(serv_sin->sin_port)));
        ci_udp_clr_filters(ep);
        return 0;
    }
    if( CI_IP_IS_LOOPBACK(dst_be32) ) {
        /* After connecting via loopback it is not possible to connect anywhere
         * else.
         */
        LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                   ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
        goto handover;
    }

    if( onloadable ) {
#ifdef ONLOAD_OFE
        if( ep->netif->ofe != NULL )
            us->s.ofe_code_start = ofe_socktbl_find(
                                       ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                       udp_laddr_be32(us), udp_raddr_be32(us),
                                       udp_lport_be16(us), udp_rport_be16(us));
#endif

        if( (rc = ci_udp_set_filters(ep, us)) != 0 ) {
            /* Failed to set filters.  Most likely we've run out of h/w filters.
             * Handover to O/S to avoid breaking the app.
             *
             * TODO: Actually we probably won't break the app if we don't
             * handover, as packets will still get delivered via the kernel
             * stack.  Might be worth having a runtime option to choose whether
             * or not to handover in such cases.
             */
            LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)",
                      FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                      CI_BSWAP_BE16(serv_sin->sin_port), rc));
            CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter);
            goto out;
        }
    }
    else {
        ci_udp_clr_filters(ep);
    }

    LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)",
               SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS",
               ip_addr_str(udp_laddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)),
               ip_addr_str(udp_raddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno));
    return 0;

out:
    if( rc < 0 && CITP_OPTS.no_fail )
        goto handover;
    return rc;

handover:
    ci_udp_clr_filters(ep);
    return CI_SOCKET_HANDOVER;
}
Example #6
0
/* In this bind handler we just check that the address to which
 * are binding is either "any" or one of ours. 
 * In the Linux kernel version [fd] is unused.
 */
int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr,
                socklen_t addrlen, ci_fd_t fd )
{
  struct sockaddr_in* my_addr_in;
  ci_uint16 new_port;
  ci_uint32 addr_be32;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc;

  CHECK_TEP(ep);

  my_addr_in = (struct sockaddr_in*) my_addr;

  /* Check if state of the socket is OK for bind operation. */
  /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used.
   *       What is better? */
  if (my_addr == NULL)
    RET_WITH_ERRNO( EINVAL );


  if (s->b.state != CI_TCP_CLOSED)
    RET_WITH_ERRNO( EINVAL );

  if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB)
    RET_WITH_ERRNO( EINVAL );

  if( my_addr->sa_family != s->domain )
    RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL );

  /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) 
   * Linux is also relaxed about overlength data areas. */
  if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in))
    RET_WITH_ERRNO( EINVAL );

#if CI_CFG_FAKE_IPV6
  if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133)
    RET_WITH_ERRNO( EINVAL );

  if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) )
    return CI_SOCKET_HANDOVER;
#endif
  addr_be32 = ci_get_ip4_addr(s->domain, my_addr);
 
  /* Using the port number provided, see if we can do this bind */
  new_port = my_addr_in->sin_port;

  if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) {
    struct ci_port_list *force_reuseport;
    CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link,
                        (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) {
      if( force_reuseport->port == new_port ) {
        int one = 1;
        ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd);
        ci_assert(CI_IS_VALID_SOCKET(os_sock));
        rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one,
                               sizeof(one));
        ci_rel_os_sock_fd(os_sock);
        if( rc != 0 && errno == ENOPROTOOPT )
          ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY;
        ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT;
        LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u",
                   __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port));
      }
    }
  }

  if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) 
    CI_LOGLEVEL_TRY_RET(LOG_TV,
		        __ci_bind(ep->netif, ep->s, addr_be32, &new_port));
  ep->s->s_flags |= CI_SOCK_FLAG_BOUND;
  sock_lport_be16(s) = new_port; 
  sock_laddr_be32(s) = addr_be32;
  if( CI_IP_IS_MULTICAST(addr_be32) )
    s->cp.ip_laddr_be32 = 0;
  else
    s->cp.ip_laddr_be32 = addr_be32;
  s->cp.lport_be16 = new_port;
  sock_rport_be16(s) = sock_raddr_be32(s) = 0;

  LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32),
	     (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port),
	     CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); 

  return 0;
}
Example #7
0
/* Returns:
 *          0                  on success
 *          
 *          CI_SOCKET_ERROR (and errno set)
 *                             this is a normal error that is returned to the
 *                             the application
 *
 *          CI_SOCKET_HANDOVER we tell the upper layers to handover, no need
 *                             to set errno since it isn't a real error
 */
int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr,
		   socklen_t addrlen, ci_fd_t fd, int *p_moved)
{
  /* Address family is validated earlier. */
  struct sockaddr_in* inaddr = (struct sockaddr_in*) serv_addr;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* ts = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc = 0, crc;
  ci_uint32 dst_be32;

  if( NI_OPTS(ep->netif).tcp_connect_handover )
    return CI_SOCKET_HANDOVER;

  /* Make sure we're up-to-date. */
  ci_netif_lock(ep->netif);
  CHECK_TEP(ep);
  ci_netif_poll(ep->netif);

  /*
   * 1. Check if state of the socket is OK for connect operation.
   */

 start_again:

  if( (rc = ci_tcp_connect_handle_so_error(s)) != 0) {
    CI_SET_ERROR(rc, rc);
    goto unlock_out;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    /* see if progress can be made on this socket before
    ** determining status  (e.g. non-blocking connect and connect poll)*/
    if( s->b.state & CI_TCP_STATE_SYNCHRONISED ) {
      if( ts->tcpflags & CI_TCPT_FLAG_NONBLOCK_CONNECT ) {
        ts->tcpflags &= ~CI_TCPT_FLAG_NONBLOCK_CONNECT;
	rc = 0;
	goto unlock_out;
      }
      if( serv_addr->sa_family == AF_UNSPEC )
        LOG_E(ci_log("Onload does not support TCP disconnect via "

                     "connect(addr->sa_family==AF_UNSPEC)"));
      CI_SET_ERROR(rc, EISCONN);
    }
    else if( s->b.state == CI_TCP_LISTEN ) {
#if CI_CFG_POSIX_CONNECT_AFTER_LISTEN
      CI_SET_ERROR(rc, EOPNOTSUPP);
#else
      if( ci_tcp_validate_sa(s->domain, serv_addr, addrlen) ) {
        /* Request should be forwarded to OS */
        rc = CI_SOCKET_HANDOVER;
	goto unlock_out;
      }
      if( serv_addr->sa_family == AF_UNSPEC ) {
        /* Linux does listen shutdown on disconnect (AF_UNSPEC) */
        ci_netif_unlock(ep->netif);
        rc = ci_tcp_shutdown(ep, SHUT_RD, fd);
	goto out;
      } else {
        /* Linux has curious error reporting in this case */
        CI_SET_ERROR(rc, EISCONN);
      }
#endif
    }
    else {
      /* Socket is in SYN-SENT state. Let's block for receiving SYN-ACK */
      ci_assert_equal(s->b.state, CI_TCP_SYN_SENT);
      if( s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) )
        CI_SET_ERROR(rc, EALREADY);
      else
        goto syn_sent;
    }
    goto unlock_out;
  }

  /* Check if we've ever been connected. */
  if( ts->tcpflags & CI_TCPT_FLAG_WAS_ESTAB ) {
    CI_SET_ERROR(rc, EISCONN);
    goto unlock_out;
  }

  /* 
   * 2. Check address parameter, if it's inappropriate for handover
   *    decision or handover should be done, try to to call OS and
   *    do handover on success.
   */

  if (
    /* Af first, check that address family and length is OK. */
    ci_tcp_validate_sa(s->domain, serv_addr, addrlen)
    /* rfc793 p54 if the foreign socket is unspecified return          */
    /* "error: foreign socket unspecified" (EINVAL), but keep it to OS */
    || (dst_be32 = ci_get_ip4_addr(inaddr->sin_family, serv_addr)) == 0
    /* Zero destination port is tricky as well, keep it to OS */
    || inaddr->sin_port == 0 )
  {
    rc = CI_SOCKET_HANDOVER;
    goto unlock_out;
  }
  
  /* is this a socket that we can handle? */
  rc = ci_tcp_connect_check_dest(ep, dst_be32, inaddr->sin_port);
  if( rc )  goto unlock_out;

  if( (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) &&
      OO_SP_IS_NULL(ts->local_peer) ) {
    /* Try to connect to another stack; handover if can't */
    struct oo_op_loopback_connect op;
    op.dst_port = inaddr->sin_port;
    op.dst_addr = dst_be32;
    /* this operation unlocks netif */
    rc = oo_resource_op(fd, OO_IOC_TCP_LOOPBACK_CONNECT, &op);
    if( rc < 0)
      return CI_SOCKET_HANDOVER;
    if( op.out_moved )
      *p_moved = 1;
    if( op.out_rc == -EINPROGRESS )
      RET_WITH_ERRNO( EINPROGRESS );
    else if( op.out_rc == -EAGAIN )
      return -EAGAIN;
    else if( op.out_rc != 0 )
      return CI_SOCKET_HANDOVER;
    return 0;
  }

  /* filters can't handle alien source address */
  if( (s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN) &&
      ! (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) ) {
    rc = CI_SOCKET_HANDOVER;
    goto unlock_out;
  }

  crc = ci_tcp_connect_ul_start(ep->netif, ts, dst_be32, inaddr->sin_port, &rc);
  if( crc != CI_CONNECT_UL_OK ) {
    switch( crc ) {
    case CI_CONNECT_UL_FAIL:
      goto unlock_out;
    case CI_CONNECT_UL_LOCK_DROPPED:
      goto out;
    case CI_CONNECT_UL_START_AGAIN:
      goto start_again;
    }
  }
  CI_TCP_STATS_INC_ACTIVE_OPENS( ep->netif );

 syn_sent:
  rc = ci_tcp_connect_ul_syn_sent(ep->netif, ts);

 unlock_out:
  ci_netif_unlock(ep->netif);
 out:
  return rc;
}