예제 #1
0
static int
ci_udp_disconnect(citp_socket* ep, ci_udp_state* us, ci_fd_t os_sock)
{
    int rc;

    if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) {
        LOG_E(log(FNS_FMT "ERROR: sys_getsockname failed (%d)",
                  FNS_PRI_ARGS(ep->netif, ep->s), errno));
        return rc;
    }
    ci_udp_set_raddr(us, 0, 0);
    /* TODO: We shouldn't really clear then set here; instead we should
     * insert wildcard filters before removing the full-match ones.  ie. The
     * reverse of what we do in connect().  But probably not worth worrying
     * about in this case.
     */
    ci_udp_clr_filters(ep);

#ifdef ONLOAD_OFE
    if( ep->netif->ofe != NULL )
        us->s.ofe_code_start = ofe_socktbl_find(
                                   ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                   udp_laddr_be32(us), udp_raddr_be32(us),
                                   udp_lport_be16(us), udp_rport_be16(us));
#endif

    if( (rc = ci_udp_set_filters(ep, us)) != 0 )
        /* Not too bad -- should still get packets via OS socket. */
        LOG_U(log(FNS_FMT "ERROR: ci_udp_set_filters failed (%d)",
                  FNS_PRI_ARGS(ep->netif, ep->s), errno));
    us->s.cp.sock_cp_flags &= ~OO_SCP_CONNECTED;
    return 0;
}
예제 #2
0
static int ci_udp_set_filters(citp_socket* ep, ci_udp_state* us)
{
    int rc;

    ci_assert(ep);
    ci_assert(us);

    if( udp_lport_be16(us) == 0 )
        return 0;

    rc = ci_tcp_ep_set_filters(ep->netif, S_SP(us), us->s.cp.so_bindtodevice,
                               OO_SP_NULL);
    if( rc == -EFILTERSSOME ) {
        if( CITP_OPTS.no_fail )
            rc = 0;
        else {
            ci_tcp_ep_clear_filters(ep->netif, S_SP(us), 0);
            rc = -ENOBUFS;
        }
    }
    if( rc < 0 ) {
        LOG_UC(log(FNS_FMT "ci_tcp_ep_set_filters failed (%d)",
                   FNS_PRI_ARGS(ep->netif, ep->s), -rc));
        CI_SET_ERROR(rc, -rc);
        return rc;
    }
    UDP_SET_FLAG(us, CI_UDPF_FILTERED);
    return 0;
}
예제 #3
0
파일: udp.c 프로젝트: davenso/openonload
void ci_udp_set_laddr(citp_socket* ep, unsigned laddr_be32, int lport_be16)
{
  ci_udp_state* us = SOCK_TO_UDP(ep->s);
  udp_laddr_be32(us) = laddr_be32;
  udp_lport_be16(us) = (ci_uint16) lport_be16;
  if( CI_IP_IS_MULTICAST(laddr_be32) )
    us->s.cp.ip_laddr_be32 = 0;
  else
    us->s.cp.ip_laddr_be32 = laddr_be32;
  us->s.cp.lport_be16 = lport_be16;
}
예제 #4
0
static char * ci_udp_addr_str( ci_udp_state* us )
{
    static char buf[128];

    ci_assert(us);
    sprintf( buf, "L[%s:%d] R[%s:%d]",
             ip_addr_str( udp_laddr_be32(us)),
             CI_BSWAP_BE16(udp_lport_be16(us)),
             ip_addr_str( udp_raddr_be32(us)),
             CI_BSWAP_BE16(udp_rport_be16(us)) );
    return buf;
}
예제 #5
0
파일: udp.c 프로젝트: majek/openonload
/* initialise all the fields that we can in the UDP state structure.  
** There are no IP options, no destination addresses, no ports */
static void ci_udp_state_init(ci_netif* netif, ci_udp_state* us)
{
  ci_sock_cmn_init(netif, &us->s, 1);

  /* IP_MULTICAST_LOOP is 1 by default, so we should not send multicast
   * unless specially permitted */
  if( ! NI_OPTS(netif).force_send_multicast )
    us->s.cp.sock_cp_flags |= OO_SCP_NO_MULTICAST;

  /* Poison. */
  CI_DEBUG(memset(&us->s + 1, 0xf0, (char*) (us + 1) - (char*) (&us->s + 1)));

  /*! \todo This should be part of sock_cmn reinit, but the comment to that
   * function suggests that it's possibly not a good plan to move it there */

#if CI_CFG_TIMESTAMPING
  ci_udp_recv_q_init(&us->timestamp_q);
#endif

  /*! \todo These two should really be handled in ci_sock_cmn_init() */

  /* Make sure we don't hit any state assertions. Can use
   *  UDP_STATE_FROM_SOCKET_EPINFO() after this. */
  us->s.b.state = CI_TCP_STATE_UDP;

  us->s.so.sndbuf = NI_OPTS(netif).udp_sndbuf_def;
  us->s.so.rcvbuf = NI_OPTS(netif).udp_rcvbuf_def;

  /* Init the ip-caches (packet header templates). */
  ci_udp_hdrs_init(&us->s.pkt);
  ci_ip_cache_init(&us->ephemeral_pkt);
  ci_udp_hdrs_init(&us->ephemeral_pkt);
  udp_lport_be16(us) = 0;
  udp_rport_be16(us) = 0;

#if CI_CFG_ZC_RECV_FILTER
  us->recv_q_filter = 0;
  us->recv_q_filter_arg = 0;
#endif
  ci_udp_recv_q_init(&us->recv_q);
  us->zc_kernel_datagram = OO_PP_NULL;
  us->zc_kernel_datagram_count = 0;
  us->tx_async_q = CI_ILL_END;
  oo_atomic_set(&us->tx_async_q_level, 0);
  us->tx_count = 0;
  us->udpflags = CI_UDPF_MCAST_LOOP;
  us->ip_pktinfo_cache.intf_i = -1;
  us->stamp = 0;
  memset(&us->stats, 0, sizeof(us->stats));
}
예제 #6
0
/* Conclude the EP's binding.  This function is abstracted from the
 * main bind code to allow implicit binds that occur when sendto() is
 * called on an OS socket.  [lport] and CI_SIN(addr)->sin_port do not
 * have to be the same value. */
static int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr,
                                ci_uint16 lport )
{
    ci_udp_state* us;
    ci_uint32 addr_be32;
    int rc;

    CHECK_UEP(ep);
    ci_assert(addr != NULL);

    if( ci_udp_should_handover(ep, addr, lport) )
        goto handover;

    addr_be32 = ci_get_ip4_addr(ep->s->domain, addr);

    ci_udp_set_laddr(ep, addr_be32, lport);
    us = SOCK_TO_UDP(ep->s);
    if( addr_be32 != 0 )
        us->s.cp.sock_cp_flags |= OO_SCP_LADDR_BOUND;
    /* reset any rx/tx that have taken place already */
    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);

#ifdef ONLOAD_OFE
    if( ep->netif->ofe != NULL )
        us->s.ofe_code_start = ofe_socktbl_find(
                                   ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                   udp_laddr_be32(us), udp_raddr_be32(us),
                                   udp_lport_be16(us), udp_rport_be16(us));
#endif

    /* OS source addrs have already been handed-over, so this must be one of
     * our src addresses.
     */
    rc = ci_udp_set_filters( ep, us);
    ci_assert( !UDP_GET_FLAG(us, CI_UDPF_EF_BIND) );
    /*! \todo FIXME isn't the port the thing to be testing here? */
    if( udp_laddr_be32(us) != INADDR_ANY_BE32 )
        UDP_SET_FLAG(us, CI_UDPF_EF_BIND);
    CI_UDPSTATE_SHOW_EP( ep );
    if( rc == CI_SOCKET_ERROR && CITP_OPTS.no_fail) {
        CITP_STATS_NETIF(++ep->netif->state->stats.udp_bind_no_filter);
        goto handover;
    }
    return rc;

handover:
    LOG_UV(log("%s: "SK_FMT" HANDOVER", __FUNCTION__, SK_PRI_ARGS(ep)));
    return CI_SOCKET_HANDOVER;
}
예제 #7
0
파일: udp.c 프로젝트: majek/openonload
void ci_udp_state_dump(ci_netif* ni, ci_udp_state* us, const char* pf,
                      oo_dump_log_fn_t logger, void* log_arg)
{
  ci_udp_socket_stats uss = us->stats;
  unsigned rx_added = us->recv_q.pkts_added;
  unsigned rx_os = uss.n_rx_os + uss.n_rx_os_slow;
  unsigned rx_total = rx_added + uss.n_rx_mem_drop + uss.n_rx_overflow + rx_os;
  unsigned n_tx_onload = uss.n_tx_onload_uc + uss.n_tx_onload_c;
  unsigned tx_total = n_tx_onload + uss.n_tx_os;
  ci_ip_cached_hdrs* ipcache;

  (void) rx_total;  /* unused on 32-bit builds in kernel */
  (void) tx_total;

#if CI_CFG_TIMESTAMPING
  if( us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_TX_HARDWARE )
    ci_udp_recvq_dump(ni, &us->timestamp_q, pf, "  TX timestamping queue:",
                      logger, log_arg);
#endif

  /* General. */
  logger(log_arg, "%s  udpflags: "CI_UDP_STATE_FLAGS_FMT, pf,
         CI_UDP_STATE_FLAGS_PRI_ARG(us));

  /* Receive path. */
  ci_udp_recvq_dump(ni, &us->recv_q, pf, "  rcv:", logger, log_arg);
  logger(log_arg,
         "%s  rcv: oflow_drop=%u(%u%%) mem_drop=%u eagain=%u pktinfo=%u "
         "q_max_pkts=%u", pf, uss.n_rx_overflow,
         percent(uss.n_rx_overflow, rx_total),
         uss.n_rx_mem_drop, uss.n_rx_eagain, uss.n_rx_pktinfo, 
         uss.max_recvq_pkts);
  logger(log_arg, "%s  rcv: os=%u(%u%%) os_slow=%u os_error=%u", pf,
         rx_os, percent(rx_os, rx_total), uss.n_rx_os_slow, uss.n_rx_os_error);

  /* Send path. */
  logger(log_arg, "%s  snd: q=%u+%u ul=%u os=%u(%u%%)", pf,
         us->tx_count, oo_atomic_read(&us->tx_async_q_level),
         n_tx_onload, uss.n_tx_os, percent(uss.n_tx_os, tx_total));
  logger(log_arg,
         "%s  snd: LOCK cp=%u(%u%%) pkt=%u(%u%%) snd=%u(%u%%) poll=%u(%u%%) "
         "defer=%u(%u%%)", pf,
         uss.n_tx_lock_cp,  percent(uss.n_tx_lock_cp,  n_tx_onload),
         uss.n_tx_lock_pkt,  percent(uss.n_tx_lock_pkt,  n_tx_onload),
         uss.n_tx_lock_snd,  percent(uss.n_tx_lock_snd,  n_tx_onload),
         uss.n_tx_lock_poll, percent(uss.n_tx_lock_poll, n_tx_onload),
         uss.n_tx_lock_defer, percent(uss.n_tx_lock_defer, n_tx_onload));

  logger(log_arg, "%s  snd: MCAST if=%d src="OOF_IP4" ttl=%d", pf,
         us->s.cp.ip_multicast_if,
         OOFA_IP4(us->s.cp.ip_multicast_if_laddr_be32),
         (int) us->s.cp.ip_mcast_ttl);

  /* State relating to unconnected sends. */
  ipcache = &us->ephemeral_pkt;
  logger(log_arg,
         "%s  snd: TO n=%u match=%u(%u%%) "
         "lookup=%u+%u(%u%%) "OOF_IPCACHE_STATE,
         pf, uss.n_tx_onload_uc,
         uss.n_tx_cp_match, percent(uss.n_tx_cp_match, uss.n_tx_onload_uc),
         uss.n_tx_cp_uc_lookup, uss.n_tx_cp_a_lookup,
         percent(uss.n_tx_cp_uc_lookup + uss.n_tx_cp_a_lookup,
                 uss.n_tx_onload_uc),
         OOFA_IPCACHE_STATE(ni, ipcache));
  logger(log_arg, "%s  snd: TO "OOF_IPCACHE_DETAIL, pf,
         OOFA_IPCACHE_DETAIL(ipcache));
  logger(log_arg, "%s  snd: TO "OOF_IP4PORT" => "OOF_IP4PORT, pf,
         OOFA_IP4PORT(ipcache->ip_saddr.ip4, udp_lport_be16(us)),
         OOFA_IP4PORT(ipcache->ip.ip_daddr_be32, ipcache->dport_be16));
   
  /* State relating to connected sends. */
  ipcache = &us->s.pkt;
  logger(log_arg, "%s  snd: CON n=%d lookup=%d "OOF_IPCACHE_STATE, pf,
         uss.n_tx_onload_c, uss.n_tx_cp_c_lookup,
         OOFA_IPCACHE_STATE(ni,ipcache));
  logger(log_arg, "%s  snd: CON "OOF_IPCACHE_DETAIL, pf,
         OOFA_IPCACHE_DETAIL(ipcache));

  logger(log_arg, "%s  snd: eagain=%d spin=%d block=%d", pf,
         uss.n_tx_eagain, uss.n_tx_spin, uss.n_tx_block);
  logger(log_arg, "%s  snd: poll_avoids_full=%d fragments=%d confirm=%d", pf,
         uss.n_tx_poll_avoids_full, uss.n_tx_fragments, uss.n_tx_msg_confirm);
  logger(log_arg,
         "%s  snd: os_slow=%d os_late=%d unconnect_late=%d nomac=%u(%u%%)", pf,
         uss.n_tx_os_slow, uss.n_tx_os_late, uss.n_tx_unconnect_late,
         uss.n_tx_cp_no_mac, percent(uss.n_tx_cp_no_mac, tx_total));
}
예제 #8
0
/* Complete a UDP U/L connect.  The sys connect() call must have been made
 * (and succeeded) before calling this function.  So if anything goes wrong
 * in here, then it can be consider an internal error or failing of onload.
 */
int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd,
                            const struct sockaddr* serv_addr,
                            socklen_t addrlen, ci_fd_t os_sock)
{
    const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr;
    ci_uint32 dst_be32;
    ci_udp_state* us = SOCK_TO_UDP(ep->s);
    int onloadable;
    int rc = 0;

    CHECK_UEP(ep);

    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);
    us->s.rx_errno = 0;
    us->s.tx_errno = 0;

    if( IS_DISCONNECTING(serv_sin) ) {
        rc = ci_udp_disconnect(ep, us, os_sock);
        goto out;
    }
#if CI_CFG_FAKE_IPV6
    if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) {
        LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us)));
        goto handover;
    }
#endif

    dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr);
    if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) {
        LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)",
                  FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                  CI_BSWAP_BE16(serv_sin->sin_port), errno));
        goto out;
    }

    us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED;
    ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port);
    cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp);

    switch( us->s.pkt.status ) {
    case retrrc_success:
    case retrrc_nomac:
        onloadable = 1;
        break;
    default:
        onloadable = 0;
        if( NI_OPTS(ep->netif).udp_connect_handover ) {
            LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                       ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
            goto handover;
        }
        break;
    }

    if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) {
        LOG_UC(log(FNT_FMT "%s:%d - route via OS socket",
                   FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                   CI_BSWAP_BE16(serv_sin->sin_port)));
        ci_udp_clr_filters(ep);
        return 0;
    }
    if( CI_IP_IS_LOOPBACK(dst_be32) ) {
        /* After connecting via loopback it is not possible to connect anywhere
         * else.
         */
        LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                   ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
        goto handover;
    }

    if( onloadable ) {
#ifdef ONLOAD_OFE
        if( ep->netif->ofe != NULL )
            us->s.ofe_code_start = ofe_socktbl_find(
                                       ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                       udp_laddr_be32(us), udp_raddr_be32(us),
                                       udp_lport_be16(us), udp_rport_be16(us));
#endif

        if( (rc = ci_udp_set_filters(ep, us)) != 0 ) {
            /* Failed to set filters.  Most likely we've run out of h/w filters.
             * Handover to O/S to avoid breaking the app.
             *
             * TODO: Actually we probably won't break the app if we don't
             * handover, as packets will still get delivered via the kernel
             * stack.  Might be worth having a runtime option to choose whether
             * or not to handover in such cases.
             */
            LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)",
                      FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                      CI_BSWAP_BE16(serv_sin->sin_port), rc));
            CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter);
            goto out;
        }
    }
    else {
        ci_udp_clr_filters(ep);
    }

    LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)",
               SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS",
               ip_addr_str(udp_laddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)),
               ip_addr_str(udp_raddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno));
    return 0;

out:
    if( rc < 0 && CITP_OPTS.no_fail )
        goto handover;
    return rc;

handover:
    ci_udp_clr_filters(ep);
    return CI_SOCKET_HANDOVER;
}
예제 #9
0
static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_iomsg_args* a, 
                                              ci_msghdr* msg,
                                              ci_iovec_ptr *piov, int flags)
{
  int rc = 0;
  ci_netif* ni = a->ni;
  ci_udp_state* us = a->us;

  if(CI_UNLIKELY( ni->state->rxq_low ))
    ci_netif_rxq_low_on_recv(ni, &us->s,
                             1 /* assume at least one pkt freed */);
  /* In the kernel recv() with flags is not called.
   * only read(). So flags may only contain MSG_DONTWAIT */
#ifdef __KERNEL__
  ci_assert_equal(flags, 0);
#endif

#ifndef __KERNEL__
  if( flags & MSG_ERRQUEUE_CHK ) {
    if( OO_PP_NOT_NULL(us->timestamp_q.extract) ) {
      ci_ip_pkt_fmt* pkt;
      struct timespec ts[3];
      struct cmsg_state cmsg_state;
      ci_udp_hdr* udp;
      int paylen;

      /* TODO is this necessary? - mirroring ci_udp_recvmsg_get() */
      ci_rmb();
      
      pkt = PKT_CHK_NNL(ni, us->timestamp_q.extract);
      if( pkt->tx_hw_stamp.tv_sec == CI_PKT_TX_HW_STAMP_CONSUMED ) {
        if( OO_PP_IS_NULL(pkt->tsq_next) )
          goto errqueue_empty;
        us->timestamp_q.extract = pkt->tsq_next;
        pkt = PKT_CHK_NNL(ni, us->timestamp_q.extract);
        ci_assert(pkt->tx_hw_stamp.tv_sec != CI_PKT_TX_HW_STAMP_CONSUMED);
      }

      udp = oo_ip_data(pkt);
      paylen = CI_BSWAP_BE16(oo_ip_hdr(pkt)->ip_tot_len_be16) -
                        sizeof(ci_ip4_hdr) - sizeof(udp);

      msg->msg_flags = 0;
      cmsg_state.msg = msg;
      cmsg_state.cm = msg->msg_control;
      cmsg_state.cmsg_bytes_used = 0;
      ci_iovec_ptr_init_nz(piov, msg->msg_iov, msg->msg_iovlen);
      memset(ts, 0, sizeof(ts));

      if( us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_RAW_HARDWARE ) {
        ts[2].tv_sec = pkt->tx_hw_stamp.tv_sec;
        ts[2].tv_nsec = pkt->tx_hw_stamp.tv_nsec;
      }
      if( (us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_SYS_HARDWARE) &&
          (pkt->tx_hw_stamp.tv_nsec & CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC) ) {
        ts[1].tv_sec = pkt->tx_hw_stamp.tv_sec;
        ts[1].tv_nsec = pkt->tx_hw_stamp.tv_nsec;
      }
      ci_put_cmsg(&cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING,
                  sizeof(ts), &ts);
      oo_offbuf_set_start(&pkt->buf, udp + 1);
      oo_offbuf_set_len(&pkt->buf, paylen);
      rc = oo_copy_pkt_to_iovec_no_adv(ni, pkt, piov, paylen);

      /* Mark this packet/timestamp as consumed */
      pkt->tx_hw_stamp.tv_sec = CI_PKT_TX_HW_STAMP_CONSUMED;

      ci_ip_cmsg_finish(&cmsg_state);
      msg->msg_flags |= MSG_ERRQUEUE_CHK;
      return rc;
    }
  errqueue_empty:
    /* ICMP is handled via OS, so get OS error */
    rc = oo_os_sock_recvmsg(ni, SC_SP(&us->s), msg, flags);
    if( rc < 0 ) {
      ci_assert(-rc == errno);
      return -1;
    }
    else
      return rc;
  }
#endif
  if( (rc = ci_get_so_error(&us->s)) != 0 ) {
    CI_SET_ERROR(rc, rc);
    return rc;
  }
  if( msg->msg_iovlen > 0 && msg->msg_iov == NULL ) {
    CI_SET_ERROR(rc, EFAULT);
    return rc;
  }
#if MSG_OOB_CHK
  if( flags & MSG_OOB_CHK ) {
    CI_SET_ERROR(rc, EOPNOTSUPP);
    return rc;
  }
#endif
#if CI_CFG_POSIX_RECV  
  if( ! udp_lport_be16(us)) {
    LOG_UV(log("%s: -1 (ENOTCONN)", __FUNCTION__));
    CI_SET_ERROR(rc, ENOTCONN);
    return rc;
  }
#endif
  if( msg->msg_iovlen == 0 ) {
    /* We have a difference in behaviour from the Linux stack here.  When
    ** msg_iovlen is 0 Linux 2.4.21-15.EL does not set MSG_TRUNC when a
    ** datagram has non-zero length.  We do. */
    CI_IOVEC_LEN(&piov->io) = piov->iovlen = 0;
    return IOVLEN_WORKAROUND_RC_VALUE;
  }
  return 0;
}