Exemple #1
0
static int ci_tcp_setsockopt_lk(citp_socket* ep, ci_fd_t fd, int level,
				int optname, const void* optval,
				socklen_t optlen )
{
  ci_sock_cmn* s = ep->s;
#if defined(__linux__) || \
    defined(__sun__) && defined(TCP_KEEPALIVE_THRESHOLD) || \
    defined(__sun__) && defined(TCP_KEEPALIVE_ABORT_THRESHOLD)
  ci_tcp_socket_cmn* c = &(SOCK_TO_WAITABLE_OBJ(s)->tcp.c);
#endif
  ci_netif* netif = ep->netif;
  int zeroval = 0;
  int rc;

  /* ?? what to do about optval and optlen checking
  ** Kernel can raise EFAULT, here we are a little in the dark.
  ** Note: If the OS sock is sync'd then we get this checking for free.
  */

  if (optlen == 0) {
    /* Match kernel behaviour: if length is 0, it treats the value as 0; and
     * some applications rely on this.
     */
    optval = &zeroval;
    optlen = sizeof(zeroval);
  }

  /* If you're adding to this please remember to look in common_sockopts.c
   * and decide if the option is common to all protocols. */

  if(level == SOL_SOCKET) {
    switch(optname) {
    case SO_KEEPALIVE:
      /* Over-ride the default common handler.
       * Enable sending of keep-alive messages */
      if( (rc = opt_not_ok(optval, optlen, unsigned)) )
      	goto fail_inval;

      if( *(unsigned*) optval ) {
	unsigned prev_flags = s->s_flags;
	s->s_flags |= CI_SOCK_FLAG_KALIVE;
	/* Set KEEPALIVE timer only if we are not in
	** CLOSE or LISTENING state. */
	if( s->b.state != CI_TCP_CLOSED && s->b.state != CI_TCP_LISTEN &&
	    !(prev_flags & CI_SOCK_FLAG_KALIVE) ) {
	  ci_tcp_state* ts = SOCK_TO_TCP(s);
	  LOG_TV(log("%s: "NSS_FMT" run KEEPALIVE timer from setsockopt()",
		     __FUNCTION__, NSS_PRI_ARGS(netif, s)));
	  ci_assert(ts->ka_probes == 0);
	  ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts));
	}
      }
      else {
      	s->s_flags &=~ CI_SOCK_FLAG_KALIVE;
	if( s->b.state != CI_TCP_LISTEN ) {
	  ci_tcp_state* ts = SOCK_TO_TCP(s);
	  ci_tcp_kalive_check_and_clear(netif, ts);
	  ts->ka_probes = 0;
	}
      }
      break;

    default:
      {
        /* Common socket level options */
        return ci_set_sol_socket(netif, s, optname, optval, optlen);
      }
    }
  }
  else if( level == IPPROTO_IP ) {
Exemple #2
0
/* In this bind handler we just check that the address to which
 * are binding is either "any" or one of ours. 
 * In the Linux kernel version [fd] is unused.
 */
int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr,
                socklen_t addrlen, ci_fd_t fd )
{
  struct sockaddr_in* my_addr_in;
  ci_uint16 new_port;
  ci_uint32 addr_be32;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc;

  CHECK_TEP(ep);

  my_addr_in = (struct sockaddr_in*) my_addr;

  /* Check if state of the socket is OK for bind operation. */
  /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used.
   *       What is better? */
  if (my_addr == NULL)
    RET_WITH_ERRNO( EINVAL );


  if (s->b.state != CI_TCP_CLOSED)
    RET_WITH_ERRNO( EINVAL );

  if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB)
    RET_WITH_ERRNO( EINVAL );

  if( my_addr->sa_family != s->domain )
    RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL );

  /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) 
   * Linux is also relaxed about overlength data areas. */
  if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in))
    RET_WITH_ERRNO( EINVAL );

#if CI_CFG_FAKE_IPV6
  if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133)
    RET_WITH_ERRNO( EINVAL );

  if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) )
    return CI_SOCKET_HANDOVER;
#endif
  addr_be32 = ci_get_ip4_addr(s->domain, my_addr);
 
  /* Using the port number provided, see if we can do this bind */
  new_port = my_addr_in->sin_port;

  if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) {
    struct ci_port_list *force_reuseport;
    CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link,
                        (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) {
      if( force_reuseport->port == new_port ) {
        int one = 1;
        ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd);
        ci_assert(CI_IS_VALID_SOCKET(os_sock));
        rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one,
                               sizeof(one));
        ci_rel_os_sock_fd(os_sock);
        if( rc != 0 && errno == ENOPROTOOPT )
          ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY;
        ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT;
        LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u",
                   __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port));
      }
    }
  }

  if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) 
    CI_LOGLEVEL_TRY_RET(LOG_TV,
		        __ci_bind(ep->netif, ep->s, addr_be32, &new_port));
  ep->s->s_flags |= CI_SOCK_FLAG_BOUND;
  sock_lport_be16(s) = new_port; 
  sock_laddr_be32(s) = addr_be32;
  if( CI_IP_IS_MULTICAST(addr_be32) )
    s->cp.ip_laddr_be32 = 0;
  else
    s->cp.ip_laddr_be32 = addr_be32;
  s->cp.lport_be16 = new_port;
  sock_rport_be16(s) = sock_raddr_be32(s) = 0;

  LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32),
	     (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port),
	     CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); 

  return 0;
}
Exemple #3
0
/* [fd] is unused in the kernel version */
int ci_tcp_getsockopt(citp_socket* ep, ci_fd_t fd, int level,
		      int optname, void *optval, socklen_t *optlen )
{
  ci_sock_cmn* s = ep->s;
#if defined(__linux__) || \
    defined(__sun__) && defined(TCP_KEEPALIVE_THRESHOLD) || \
    defined(__sun__) && defined(TCP_KEEPALIVE_ABORT_THRESHOLD)
  ci_tcp_socket_cmn *c = &(SOCK_TO_WAITABLE_OBJ(s)->tcp.c);
#endif
  ci_netif* netif = ep->netif;
  unsigned u = 0;

  /* NOTE: The setsockopt() call is reflected into the os socket to
   * keep the two in sync - it's assumed that we know everything
   * to allow us to give good answers here - and therefore we don't
   * bother the os with the get call */

  /* ?? what to do about optval and optlen checking
   * Kernel can raise EFAULT, here we are a little in the dark.
   *  - sockcall_intercept.c checks that optlen is non-NULL and if *optlen
   *    is non-zero that optval is non-NULL, returning EFAULT if false
   */

  if(level == SOL_SOCKET) {
    /* Common SOL_SOCKET handler */
    return ci_get_sol_socket(netif, s, optname, optval, optlen);

  } else if (level ==  IPPROTO_IP) {
    /* IP level options valid for TCP */
    return ci_get_sol_ip(ep, s, fd, optname, optval, optlen);

#if CI_CFG_FAKE_IPV6
  } else if (level ==  IPPROTO_IPV6 && s->domain == AF_INET6) {
    /* IP6 level options valid for TCP */
    return ci_get_sol_ip6(ep, s, fd, optname, optval, optlen);
#endif

  } else if (level == IPPROTO_TCP) {
    /* TCP level options valid for TCP */
    switch(optname){
    case TCP_NODELAY:
      /* gets status of TCP Nagle algorithm  */
      u = ((s->s_aflags & CI_SOCK_AFLAG_NODELAY) != 0);
      goto u_out;
    case TCP_MAXSEG:
      /* gets the MSS size for this connection */
      if ((s->b.state & CI_TCP_STATE_TCP_CONN)) {
        u = tcp_eff_mss(SOCK_TO_TCP(s));
      } else {
        u = 536;
      }
      goto u_out;
# ifdef TCP_CORK
    case TCP_CORK:
      /* don't send partial framses, all partial frames sent
      ** when the option is cleared */
      u = ((s->s_aflags & CI_SOCK_AFLAG_CORK) != 0);
      goto u_out;
# endif


    case TCP_KEEPIDLE:
      {
        /* idle time for keepalives  */
        u = (unsigned) c->t_ka_time_in_secs;
      }
      goto u_out;
    case TCP_KEEPINTVL:
      {
        /* time between keepalives */
        u = (unsigned) c->t_ka_intvl_in_secs;
      }
      goto u_out;
    case TCP_KEEPCNT:
      {
        /* number of keepalives before giving up */
        u = c->ka_probe_th;
      }
      goto u_out;
    case TCP_INFO:
      /* struct tcp_info to be filled */
      return ci_tcp_info_get(netif, s, (struct ci_tcp_info*) optval);
    case TCP_DEFER_ACCEPT:
      {
        u = 0;
        if( c->tcp_defer_accept != OO_TCP_DEFER_ACCEPT_OFF ) {
          u = ci_ip_time_ticks2ms(netif, NI_CONF(netif).tconst_rto_initial);
          u = ((u + 500) / 1000) << c->tcp_defer_accept;
        }
        goto u_out;
      }
    case TCP_QUICKACK:
      {
        u = 0;
        if( s->b.state & CI_TCP_STATE_TCP_CONN ) {
          ci_tcp_state* ts = SOCK_TO_TCP(s);
          u = ci_tcp_is_in_faststart(ts);
        }
        goto u_out;
      }
    default:
      LOG_TC( log(LPF "getsockopt: unimplemented or bad option: %i", 
                  optname));
      RET_WITH_ERRNO(ENOPROTOOPT);
    }
  } else {
    SOCKOPT_RET_INVALID_LEVEL(s);
  }

  return 0;

 u_out:
  return ci_getsockopt_final(optval, optlen, level, &u, sizeof(u));
}
Exemple #4
0
/* Returns:
 *          0                  on success
 *          
 *          CI_SOCKET_ERROR (and errno set)
 *                             this is a normal error that is returned to the
 *                             the application
 *
 *          CI_SOCKET_HANDOVER we tell the upper layers to handover, no need
 *                             to set errno since it isn't a real error
 */
int ci_tcp_connect(citp_socket* ep, const struct sockaddr* serv_addr,
		   socklen_t addrlen, ci_fd_t fd, int *p_moved)
{
  /* Address family is validated earlier. */
  struct sockaddr_in* inaddr = (struct sockaddr_in*) serv_addr;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* ts = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc = 0, crc;
  ci_uint32 dst_be32;

  if( NI_OPTS(ep->netif).tcp_connect_handover )
    return CI_SOCKET_HANDOVER;

  /* Make sure we're up-to-date. */
  ci_netif_lock(ep->netif);
  CHECK_TEP(ep);
  ci_netif_poll(ep->netif);

  /*
   * 1. Check if state of the socket is OK for connect operation.
   */

 start_again:

  if( (rc = ci_tcp_connect_handle_so_error(s)) != 0) {
    CI_SET_ERROR(rc, rc);
    goto unlock_out;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    /* see if progress can be made on this socket before
    ** determining status  (e.g. non-blocking connect and connect poll)*/
    if( s->b.state & CI_TCP_STATE_SYNCHRONISED ) {
      if( ts->tcpflags & CI_TCPT_FLAG_NONBLOCK_CONNECT ) {
        ts->tcpflags &= ~CI_TCPT_FLAG_NONBLOCK_CONNECT;
	rc = 0;
	goto unlock_out;
      }
      if( serv_addr->sa_family == AF_UNSPEC )
        LOG_E(ci_log("Onload does not support TCP disconnect via "

                     "connect(addr->sa_family==AF_UNSPEC)"));
      CI_SET_ERROR(rc, EISCONN);
    }
    else if( s->b.state == CI_TCP_LISTEN ) {
#if CI_CFG_POSIX_CONNECT_AFTER_LISTEN
      CI_SET_ERROR(rc, EOPNOTSUPP);
#else
      if( ci_tcp_validate_sa(s->domain, serv_addr, addrlen) ) {
        /* Request should be forwarded to OS */
        rc = CI_SOCKET_HANDOVER;
	goto unlock_out;
      }
      if( serv_addr->sa_family == AF_UNSPEC ) {
        /* Linux does listen shutdown on disconnect (AF_UNSPEC) */
        ci_netif_unlock(ep->netif);
        rc = ci_tcp_shutdown(ep, SHUT_RD, fd);
	goto out;
      } else {
        /* Linux has curious error reporting in this case */
        CI_SET_ERROR(rc, EISCONN);
      }
#endif
    }
    else {
      /* Socket is in SYN-SENT state. Let's block for receiving SYN-ACK */
      ci_assert_equal(s->b.state, CI_TCP_SYN_SENT);
      if( s->b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) )
        CI_SET_ERROR(rc, EALREADY);
      else
        goto syn_sent;
    }
    goto unlock_out;
  }

  /* Check if we've ever been connected. */
  if( ts->tcpflags & CI_TCPT_FLAG_WAS_ESTAB ) {
    CI_SET_ERROR(rc, EISCONN);
    goto unlock_out;
  }

  /* 
   * 2. Check address parameter, if it's inappropriate for handover
   *    decision or handover should be done, try to to call OS and
   *    do handover on success.
   */

  if (
    /* Af first, check that address family and length is OK. */
    ci_tcp_validate_sa(s->domain, serv_addr, addrlen)
    /* rfc793 p54 if the foreign socket is unspecified return          */
    /* "error: foreign socket unspecified" (EINVAL), but keep it to OS */
    || (dst_be32 = ci_get_ip4_addr(inaddr->sin_family, serv_addr)) == 0
    /* Zero destination port is tricky as well, keep it to OS */
    || inaddr->sin_port == 0 )
  {
    rc = CI_SOCKET_HANDOVER;
    goto unlock_out;
  }
  
  /* is this a socket that we can handle? */
  rc = ci_tcp_connect_check_dest(ep, dst_be32, inaddr->sin_port);
  if( rc )  goto unlock_out;

  if( (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) &&
      OO_SP_IS_NULL(ts->local_peer) ) {
    /* Try to connect to another stack; handover if can't */
    struct oo_op_loopback_connect op;
    op.dst_port = inaddr->sin_port;
    op.dst_addr = dst_be32;
    /* this operation unlocks netif */
    rc = oo_resource_op(fd, OO_IOC_TCP_LOOPBACK_CONNECT, &op);
    if( rc < 0)
      return CI_SOCKET_HANDOVER;
    if( op.out_moved )
      *p_moved = 1;
    if( op.out_rc == -EINPROGRESS )
      RET_WITH_ERRNO( EINPROGRESS );
    else if( op.out_rc == -EAGAIN )
      return -EAGAIN;
    else if( op.out_rc != 0 )
      return CI_SOCKET_HANDOVER;
    return 0;
  }

  /* filters can't handle alien source address */
  if( (s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN) &&
      ! (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) ) {
    rc = CI_SOCKET_HANDOVER;
    goto unlock_out;
  }

  crc = ci_tcp_connect_ul_start(ep->netif, ts, dst_be32, inaddr->sin_port, &rc);
  if( crc != CI_CONNECT_UL_OK ) {
    switch( crc ) {
    case CI_CONNECT_UL_FAIL:
      goto unlock_out;
    case CI_CONNECT_UL_LOCK_DROPPED:
      goto out;
    case CI_CONNECT_UL_START_AGAIN:
      goto start_again;
    }
  }
  CI_TCP_STATS_INC_ACTIVE_OPENS( ep->netif );

 syn_sent:
  rc = ci_tcp_connect_ul_syn_sent(ep->netif, ts);

 unlock_out:
  ci_netif_unlock(ep->netif);
 out:
  return rc;
}