Esempio n. 1
0
static void ci_udp_pkt_to_zc_msg(ci_netif* ni, ci_ip_pkt_fmt* pkt,
                                 struct onload_zc_msg* zc_msg)
{
  int i, bytes_left = pkt->pf.udp.pay_len;
  ci_ip_pkt_fmt* frag;
  ci_ip_pkt_fmt* handle_frag;

  handle_frag = frag = pkt;
  i = 0;
  ci_assert_nequal(zc_msg->iov, NULL);

  /* Ignore first frag if zero length and there is another frag, but
   * still pass the zero-length buffer as the onload_zc_handle so it
   * will get freed correctly
   */
  if( oo_offbuf_left(&frag->buf) == 0 && OO_PP_NOT_NULL(frag->frag_next) )
    frag = PKT_CHK_NNL(ni, frag->frag_next);

  do {
    zc_msg->iov[i].iov_len = CI_MIN(oo_offbuf_left(&frag->buf), 
                                    bytes_left);
    zc_msg->iov[i].iov_base = oo_offbuf_ptr(&frag->buf);
    zc_msg->iov[i].buf = (onload_zc_handle)handle_frag;
    zc_msg->iov[i].iov_flags = 0;
    bytes_left -= zc_msg->iov[i].iov_len;
    ++i;
    if( OO_PP_IS_NULL(frag->frag_next) || 
        (i == CI_UDP_ZC_IOVEC_MAX) ||
        (bytes_left == 0) )
      break;
    frag = PKT_CHK_NNL(ni, frag->frag_next);
    handle_frag = frag;
  } while( 1 );
  zc_msg->msghdr.msg_iovlen = i;
}
Esempio n. 2
0
void ci_sock_cmn_timestamp_q_enqueue(ci_netif* ni, ci_sock_cmn* s,
                                     ci_ip_pkt_fmt* pkt)
{
  ci_ip_pkt_queue* qu = &s->timestamp_q;
  oo_pkt_p prev_head = qu->head;

  /* This part is effectively ci_ip_queue_enqueue(ni, &s->timestamp_q, p);
   * but inlined to allow using tsq_next field 
   */
  pkt->tsq_next = OO_PP_NULL;
  if( ci_ip_queue_is_empty(qu) ) {
    ci_assert(OO_PP_IS_NULL(qu->head));
    qu->head = OO_PKT_P(pkt);
  }
  else {
    ci_assert(OO_PP_NOT_NULL(qu->head));
    /* This assumes the netif lock is held, so use
       ci_ip_queue_enqueue_nnl() if it's not */
    PKT(ni, qu->tail)->tsq_next = OO_PKT_P(pkt);
  }
  qu->tail = OO_PKT_P(pkt);
  qu->num++;


  if( OO_PP_IS_NULL(prev_head) ) {
    ci_assert(OO_PP_IS_NULL(s->timestamp_q_extract));
    s->timestamp_q_extract = qu->head;
  } 
  else {
    ci_sock_cmn_timestamp_q_reap(ni, s);
  }

  /* Tells post-poll loop to put socket on the [reap_list]. */
  s->b.sb_flags |= CI_SB_FLAG_RX_DELIVERED;
}
Esempio n. 3
0
static int
oo_copy_pkt_to_iovec_no_adv(ci_netif* ni, const ci_ip_pkt_fmt* pkt,
                            ci_iovec_ptr* piov, int bytes_to_copy)
{
  /* Copy data from [pkt] to [piov], following [pkt->frag_next] as
   * necessary.  Does not modify [pkt].  May or may not advance [piov].
   * The packet must contain at least [bytes_to_copy] of data in the
   * [pkt->buf].  [piov] may contain an arbitrary amount of space.
   *
   * Returns number of bytes copied on success, or -EFAULT otherwise.
   */
  int n, pkt_left, pkt_off = 0;
  int bytes_copied = 0;

  while( 1 ) {
    pkt_left = oo_offbuf_left(&pkt->buf) - pkt_off;
    n = CI_MIN(pkt_left, CI_IOVEC_LEN(&piov->io));
    n = CI_MIN(n, bytes_to_copy);
    if(CI_UNLIKELY( do_copy(CI_IOVEC_BASE(&piov->io),
                            oo_offbuf_ptr(&pkt->buf) + pkt_off, n) != 0 ))
      return -EFAULT;

    bytes_copied += n;
    pkt_off += n;
    if( n == bytes_to_copy )
      return bytes_copied;

    bytes_to_copy -= n;
    if( n == pkt_left ) {
      /* Caller guarantees that packet contains at least [bytes_to_copy]. */
      ci_assert(OO_PP_NOT_NULL(pkt->frag_next));
      ci_iovec_ptr_advance(piov, n);
      pkt = PKT_CHK_NNL(ni, pkt->frag_next);
      pkt_off = 0;
      /* We're unlikely to hit end-of-pkt-buf and end-of-iovec at the same
       * time, and if we do, just go round the loop again.
       */
      continue;
    }

    ci_assert_equal(n, CI_IOVEC_LEN(&piov->io));
    if( piov->iovlen == 0 )
      return bytes_copied;
    piov->io = *piov->iov++;
    --piov->iovlen;
  }
}
static int efab_file_move_supported_tcp(ci_netif *ni, ci_tcp_state *ts)
{
#if CI_CFG_FD_CACHING
  /* Don't support moving cached sockets for now */
  if( ci_tcp_is_cached(ts) ||
      !ci_ni_dllist_is_self_linked(ni, &ts->epcache_link) )
    return false;
#endif

  /* TCP closed: supported */
  if( ts->s.b.state == CI_TCP_CLOSED )
    return true;

  /* everything except TCP connected is not supported */
  if( !(ts->s.b.state & CI_TCP_STATE_TCP_CONN) )
    return false;
  if( ts->local_peer != OO_SP_NULL )
    return false;
  if( !(ts->tcpflags & CI_TCPT_FLAG_PASSIVE_OPENED) )
    return false;

  /* send queue is not supported
   * NB: retrans_ptr is uninitialised when retrans was not used yet,
   * so do not check for !OO_PP_IS_NULL(ts->retrans_ptr) */
  if( !ci_ip_queue_is_empty(&ts->send) ||
      ts->send_prequeue != OO_PP_ID_NULL ||
      oo_atomic_read(&ts->send_prequeue_in) != 0 ||
      !ci_ip_queue_is_empty(&ts->retrans) ||
      ci_ip_timer_pending(ni, &ts->rto_tid) ||
      ci_ip_timer_pending(ni, &ts->zwin_tid) ||
#if CI_CFG_TAIL_DROP_PROBE
      ci_ip_timer_pending(ni, &ts->taildrop_tid) ||
#endif
      ci_ip_timer_pending(ni, &ts->cork_tid) )
    return false;

  /* Sockets with allocated templates are not supported */
  if( OO_PP_NOT_NULL(ts->tmpl_head) )
    return false;

  return true;
}
Esempio n. 5
0
void ci_udp_all_fds_gone(ci_netif* netif, oo_sp sock_id, int do_free)
{
    /* All process references to this socket have gone.  So we should
     * shutdown() if necessary, and arrange for all resources to eventually
     * get cleaned up.
     *
     * This is called by the driver only.  [sock_id] is trusted.
     */
    ci_udp_state* us = SP_TO_UDP(netif, sock_id);

    ci_assert(ci_netif_is_locked(netif));
    ci_assert(us->s.b.state == CI_TCP_STATE_UDP);

    LOG_UC(ci_log("ci_udp_all_fds_gone: "NTS_FMT,
                  NTS_PRI_ARGS(netif, us)));

    if( UDP_GET_FLAG(us, CI_UDPF_FILTERED) ) {
        UDP_CLR_FLAG(us, CI_UDPF_FILTERED);
        ci_tcp_ep_clear_filters(netif, S_SP(us), 0);
    }
    ci_udp_recv_q_drop(netif, &us->recv_q);
    ci_ni_dllist_remove(netif, &us->s.reap_link);

    if( OO_PP_NOT_NULL(us->zc_kernel_datagram) ) {
        ci_netif_pkt_release_rx(netif, PKT_CHK(netif, us->zc_kernel_datagram));
        us->zc_kernel_datagram = OO_PP_NULL;
        us->zc_kernel_datagram_count = 0;
    }

    /* Only free state if no outstanding tx packets: otherwise it'll get
     * freed by the tx completion event.
     */
    if( do_free ) {
        if( us->tx_count == 0 )
            ci_udp_state_free(netif, us);
        else
            CITP_STATS_NETIF_INC(netif, udp_free_with_tx_active);
    }
}
static void efab_ip_queue_copy(ci_netif *ni_to, ci_ip_pkt_queue *q_to,
                               ci_netif *ni_from, ci_ip_pkt_queue *q_from)
{
  ci_ip_pkt_fmt *pkt_to, *pkt_from;
  oo_pkt_p pp;

  ci_ip_queue_init(q_to);
  if( q_from->num == 0 )
    return;

  ci_assert( OO_PP_NOT_NULL(q_from->head) );
  pp = q_from->head;
  do {
    pkt_from = PKT_CHK(ni_from, pp);
    pkt_to = ci_netif_pkt_alloc(ni_to);
    memcpy(&pkt_to->pay_len, &pkt_from->pay_len,
           CI_CFG_PKT_BUF_SIZE - CI_MEMBER_OFFSET(ci_ip_pkt_fmt, pay_len));
    ci_ip_queue_enqueue(ni_to, q_to, pkt_to);
    if( pp == q_from->tail )
      break;
    pp = pkt_from->next;
  } while(1);
}
Esempio n. 7
0
static int ci_udp_ioctl_locked(ci_netif* ni, ci_udp_state* us,
                               ci_fd_t fd, int request, void* arg)
{
  int rc;

  switch( request ) {
  case FIONREAD: /* synonym of SIOCINQ */
    if( ! CI_IOCTL_ARG_OK(int, arg) )
      return -EFAULT;
    rc = 1;
    if( rc ) {
      /* Return the size of the datagram at the head of the receive queue.
       *
       * Careful: extract side of receive queue is owned by sock lock,
       * which we don't have.  However, freeing of bufs is owned by netif
       * lock, which we do have.  So we're safe so long as we only read
       * [extract] once.
       */
      oo_pkt_p extract = us->recv_q.extract;
      if( OO_PP_NOT_NULL(extract) ) {
        ci_ip_pkt_fmt* pkt = PKT_CHK(ni, extract);
        if( (pkt->rx_flags & CI_PKT_RX_FLAG_RECV_Q_CONSUMED) &&
            OO_PP_NOT_NULL(pkt->udp_rx_next) )
          pkt = PKT_CHK(ni, pkt->udp_rx_next);
        if( !(pkt->rx_flags & CI_PKT_RX_FLAG_RECV_Q_CONSUMED) ) {
          *(int*) arg = pkt->pf.udp.pay_len;
          return 0;
        }
      }
    }
    /* Nothing in userlevel receive queue: So take the value returned by
     * the O/S socket.
     */
    if( !(us->s.os_sock_status & OO_OS_STATUS_RX) ) {
      *(int*)arg = 0;
      return 0;
    }
    goto sys_ioctl;

  case TIOCOUTQ: /* synonym of SIOCOUTQ */
    if( ! CI_IOCTL_ARG_OK(int, arg) )
      return -EFAULT;

    *(int*)arg = us->tx_count + oo_atomic_read(&us->tx_async_q_level);
    return 0;

  case SIOCGSTAMP:
#if defined( __linux__) && defined(__KERNEL__)
/* The following code assumes the width of the timespec and timeval fields */
# error "Need to consider 32-on-64 bit setting of timeval arg" 
#endif
    if( ! (us->udpflags & CI_UDPF_LAST_RECV_ON) )
      return oo_os_sock_ioctl(ni, us->s.b.bufid, request, arg, NULL);
    return ci_udp_ioctl_siocgstamp(ni, us, arg, 1);
  case SIOCGSTAMPNS:
    if( ! (us->udpflags & CI_UDPF_LAST_RECV_ON) )
      return oo_os_sock_ioctl(ni, us->s.b.bufid, request, arg, NULL);
    return ci_udp_ioctl_siocgstamp(ni, us, arg, 0);
  }

  return ci_udp_ioctl_slow(ni, us, fd, request, arg);

 sys_ioctl:
  return oo_os_sock_ioctl(ni, us->s.b.bufid, request, arg, NULL);
}
Esempio n. 8
0
static int ci_udp_recvmsg_socklocked_slowpath(ci_udp_iomsg_args* a, 
                                              ci_msghdr* msg,
                                              ci_iovec_ptr *piov, int flags)
{
  int rc = 0;
  ci_netif* ni = a->ni;
  ci_udp_state* us = a->us;

  if(CI_UNLIKELY( ni->state->rxq_low ))
    ci_netif_rxq_low_on_recv(ni, &us->s,
                             1 /* assume at least one pkt freed */);
  /* In the kernel recv() with flags is not called.
   * only read(). So flags may only contain MSG_DONTWAIT */
#ifdef __KERNEL__
  ci_assert_equal(flags, 0);
#endif

#ifndef __KERNEL__
  if( flags & MSG_ERRQUEUE_CHK ) {
    if( OO_PP_NOT_NULL(us->timestamp_q.extract) ) {
      ci_ip_pkt_fmt* pkt;
      struct timespec ts[3];
      struct cmsg_state cmsg_state;
      ci_udp_hdr* udp;
      int paylen;

      /* TODO is this necessary? - mirroring ci_udp_recvmsg_get() */
      ci_rmb();
      
      pkt = PKT_CHK_NNL(ni, us->timestamp_q.extract);
      if( pkt->tx_hw_stamp.tv_sec == CI_PKT_TX_HW_STAMP_CONSUMED ) {
        if( OO_PP_IS_NULL(pkt->tsq_next) )
          goto errqueue_empty;
        us->timestamp_q.extract = pkt->tsq_next;
        pkt = PKT_CHK_NNL(ni, us->timestamp_q.extract);
        ci_assert(pkt->tx_hw_stamp.tv_sec != CI_PKT_TX_HW_STAMP_CONSUMED);
      }

      udp = oo_ip_data(pkt);
      paylen = CI_BSWAP_BE16(oo_ip_hdr(pkt)->ip_tot_len_be16) -
                        sizeof(ci_ip4_hdr) - sizeof(udp);

      msg->msg_flags = 0;
      cmsg_state.msg = msg;
      cmsg_state.cm = msg->msg_control;
      cmsg_state.cmsg_bytes_used = 0;
      ci_iovec_ptr_init_nz(piov, msg->msg_iov, msg->msg_iovlen);
      memset(ts, 0, sizeof(ts));

      if( us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_RAW_HARDWARE ) {
        ts[2].tv_sec = pkt->tx_hw_stamp.tv_sec;
        ts[2].tv_nsec = pkt->tx_hw_stamp.tv_nsec;
      }
      if( (us->s.timestamping_flags & ONLOAD_SOF_TIMESTAMPING_SYS_HARDWARE) &&
          (pkt->tx_hw_stamp.tv_nsec & CI_IP_PKT_HW_STAMP_FLAG_IN_SYNC) ) {
        ts[1].tv_sec = pkt->tx_hw_stamp.tv_sec;
        ts[1].tv_nsec = pkt->tx_hw_stamp.tv_nsec;
      }
      ci_put_cmsg(&cmsg_state, SOL_SOCKET, ONLOAD_SCM_TIMESTAMPING,
                  sizeof(ts), &ts);
      oo_offbuf_set_start(&pkt->buf, udp + 1);
      oo_offbuf_set_len(&pkt->buf, paylen);
      rc = oo_copy_pkt_to_iovec_no_adv(ni, pkt, piov, paylen);

      /* Mark this packet/timestamp as consumed */
      pkt->tx_hw_stamp.tv_sec = CI_PKT_TX_HW_STAMP_CONSUMED;

      ci_ip_cmsg_finish(&cmsg_state);
      msg->msg_flags |= MSG_ERRQUEUE_CHK;
      return rc;
    }
  errqueue_empty:
    /* ICMP is handled via OS, so get OS error */
    rc = oo_os_sock_recvmsg(ni, SC_SP(&us->s), msg, flags);
    if( rc < 0 ) {
      ci_assert(-rc == errno);
      return -1;
    }
    else
      return rc;
  }
#endif
  if( (rc = ci_get_so_error(&us->s)) != 0 ) {
    CI_SET_ERROR(rc, rc);
    return rc;
  }
  if( msg->msg_iovlen > 0 && msg->msg_iov == NULL ) {
    CI_SET_ERROR(rc, EFAULT);
    return rc;
  }
#if MSG_OOB_CHK
  if( flags & MSG_OOB_CHK ) {
    CI_SET_ERROR(rc, EOPNOTSUPP);
    return rc;
  }
#endif
#if CI_CFG_POSIX_RECV  
  if( ! udp_lport_be16(us)) {
    LOG_UV(log("%s: -1 (ENOTCONN)", __FUNCTION__));
    CI_SET_ERROR(rc, ENOTCONN);
    return rc;
  }
#endif
  if( msg->msg_iovlen == 0 ) {
    /* We have a difference in behaviour from the Linux stack here.  When
    ** msg_iovlen is 0 Linux 2.4.21-15.EL does not set MSG_TRUNC when a
    ** datagram has non-zero length.  We do. */
    CI_IOVEC_LEN(&piov->io) = piov->iovlen = 0;
    return IOVLEN_WORKAROUND_RC_VALUE;
  }
  return 0;
}
Esempio n. 9
0
static int ci_zc_msg_to_udp_pkt(ci_netif* ni, 
                                struct onload_zc_msg* zc_msg,
                                ci_ip_pkt_fmt* pkt)
{
  int i, n_buffers = pkt->n_buffers, dropped_bytes = 0;
  ci_ip_pkt_fmt* frag;
  ci_ip_pkt_fmt* prev_frag = NULL;
  frag = pkt;
  i = 0;
  ci_assert_nequal(zc_msg->iov, NULL);

  /* Ignore first frag if zero length and there is another frag */
  if( oo_offbuf_left(&frag->buf) == 0 && OO_PP_NOT_NULL(frag->frag_next) ) {
    frag = PKT_CHK_NNL(ni, frag->frag_next);
    --n_buffers;
  }

  CI_TEST(zc_msg->msghdr.msg_iovlen <= n_buffers);
  CI_TEST(zc_msg->msghdr.msg_iovlen > 0);

  do {
    CI_TEST(zc_msg->iov[i].buf == (onload_zc_handle)frag);
    CI_TEST(zc_msg->iov[i].iov_len != 0);
    if( i < zc_msg->msghdr.msg_iovlen ) {
      if( zc_msg->iov[i].iov_base != oo_offbuf_ptr(&frag->buf) ) {
        ci_assert_gt((char*)zc_msg->iov[i].iov_base, 
                     oo_offbuf_ptr(&frag->buf));
        dropped_bytes += ((char*)zc_msg->iov[i].iov_base - 
                          oo_offbuf_ptr(&frag->buf) );
        oo_offbuf_set_start(&frag->buf, (char*)zc_msg->iov[i].iov_base);
      }
      if( zc_msg->iov[i].iov_len != oo_offbuf_left(&frag->buf) ) {
        ci_assert_lt(zc_msg->iov[i].iov_len, oo_offbuf_left(&frag->buf));
        dropped_bytes += (oo_offbuf_left(&frag->buf) - zc_msg->iov[i].iov_len);
        oo_offbuf_set_len(&frag->buf, zc_msg->iov[i].iov_len);
      }
    }
    else {
      /* All remaining fragments should be discarded.  Should not get
       * here on first frag as msg_iovlen > 0
       */
      ci_assert(prev_frag != NULL);
      prev_frag->frag_next = OO_PP_NULL;
      /* remember frag so we can release it after counting dropped bytes */
      prev_frag = frag;
      do {
        dropped_bytes += oo_offbuf_left(&frag->buf);
        if( ++i == n_buffers )
          break;
        frag = PKT_CHK_NNL(ni, frag->frag_next);
      } while( 1 );
      ci_netif_pkt_release(ni, prev_frag);
      pkt->n_buffers -= (n_buffers - zc_msg->msghdr.msg_iovlen);
      return dropped_bytes;
    }

    ci_assert_lt(oo_offbuf_offset(&frag->buf) + oo_offbuf_left(&frag->buf),
                 CI_CFG_PKT_BUF_SIZE);

    if( ++i == n_buffers )
      break;
    prev_frag = frag;
    frag = PKT_CHK_NNL(ni, frag->frag_next);
  } while( 1 );

  return dropped_bytes;
}