示例#1
0
static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked)
{
  int rc;
  citp_fdinfo* epoll_fdi = NULL;
  int os_fd = fdi->fd;
#ifndef NDEBUG
  /* Yuk: does for UDP too. */
  volatile citp_fdinfo_p* p_fdip;
  p_fdip = &citp_fdtable.table[fdi->fd].fdip;
  ci_assert(fdip_is_busy(*p_fdip));
#endif


  Log_V(ci_log("%s: fd=%d nonb_switch=%d", __FUNCTION__, fdi->fd,
	       fdi->on_rcz.handover_nonb_switch));

  if( fdi->epoll_fd >= 0 ) {
    epoll_fdi = citp_epoll_fdi_from_member(fdi, fdt_locked);
    if( epoll_fdi->protocol->type == CITP_EPOLLB_FD )
      citp_epollb_on_handover(epoll_fdi, fdi);
  }
  rc = fdtable_fd_move(fdi->fd, OO_IOC_TCP_HANDOVER);
  if( rc == -EBUSY && fdi->epoll_fd >= 0 ) {
    ci_assert(fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags &
              CI_SB_AFLAG_MOVED_AWAY);
    /* If this is our epoll, we can do full handover: we manually add os
     * fd into the epoll set.
     * Fixme: ensure we are not in _other_ epoll sets */
    ci_bit_clear(&fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags,
                 CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL_BIT);
    rc = fdtable_fd_move(fdi->fd, OO_IOC_FILE_MOVED);
  }
  if( rc != 0 ) {
    citp_fdinfo* new_fdi;
    if( ! fdt_locked ) CITP_FDTABLE_LOCK();
    new_fdi = citp_fdtable_probe_locked(fdi->fd, CI_TRUE, CI_TRUE);
    citp_fdinfo_release_ref(new_fdi, 1);
    if( ! fdt_locked ) CITP_FDTABLE_UNLOCK();
    ci_assert_equal(citp_fdinfo_get_type(new_fdi), CITP_PASSTHROUGH_FD);
    os_fd = fdi_to_alien_fdi(new_fdi)->os_socket;
  }
  if( fdi->on_rcz.handover_nonb_switch >= 0 ) {
    int on_off = !! fdi->on_rcz.handover_nonb_switch;
    int rc = ci_sys_ioctl(os_fd, FIONBIO, &on_off);
    if( rc < 0 )
      Log_E(ci_log("%s: ioctl failed on_off=%d", __FUNCTION__, on_off));
  }
  if( rc != 0 )
    goto exit;
  citp_fdtable_busy_clear(fdi->fd, fdip_passthru, fdt_locked);
exit:
  citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked);
  if( epoll_fdi != NULL && epoll_fdi->protocol->type == CITP_EPOLL_FD )
    citp_epoll_on_handover(epoll_fdi, fdi, fdt_locked);
  if( epoll_fdi != NULL )
    citp_fdinfo_release_ref(epoll_fdi, fdt_locked);
  citp_fdinfo_free(fdi);
}
示例#2
0
static int ci_udp_ioctl_slow(ci_netif* ni, ci_udp_state* us,
                             ci_fd_t fd, int request, void* arg)
{
  int os_rc, rc = 0;

  /* Keep the O/S socket in sync.  Also checks that this is a valid ioctl()
   * for a UDP socket on this kernel.
   */
  if( request != FIOASYNC &&
      (os_rc = oo_os_sock_ioctl(ni, us->s.b.bufid, request, arg, NULL)) < 0 )
    return os_rc;

  switch( request ) {
  case FIONBIO:
    /* set asynchronous (*arg == 1) or synchronous (*arg == 0) IO 
     * Want this to stay efficient, so we don't do the extra call to the common 
     * ioctl handler. */
    CI_CMN_IOCTL_FIONBIO(&us->s, arg);
    break;

  case FIOASYNC:
    /* Need to apply this to [fd] so that our fasync file-op will be invoked.
     */
    rc = ci_sys_ioctl(fd, request, arg);
    if( rc < 0 ) {
      /* This is very unexpected, as it worked on the OS socket. */
      LOG_E(ci_log("%s: ERROR: FIOASYNC failed on fd=%d rc=%d errno=%d",
                   __FUNCTION__, fd, rc, errno));
      rc = -errno;
    }
    break;

  case SIOCSPGRP:
    /* Need to apply this to [fd] to get signal delivery to work.  However,
     * SIOCSPGRP is only supported on sockets, so we need to convert to
     * fcntl().
     */
    rc = ci_sys_fcntl(fd, F_SETOWN, CI_IOCTL_GETARG(int, arg));
    if( rc < 0 )
      /* This is very unexpected, as it worked on the OS socket. */
      LOG_E(ci_log("%s: ERROR: fcntl(F_SETOWN) failed on fd=%d rc=%d errno=%d",
                   __FUNCTION__, fd, rc, errno));
    rc = ci_cmn_ioctl(ni, &us->s, request, arg, os_rc, 1);
    break;

  default:
    rc = ci_cmn_ioctl(ni, &us->s, request, arg, os_rc, 1);
  }

  return rc;
}
示例#3
0
int oo_version_check_ul(ci_fd_t fd)
{
  int rc;
  oo_version_check_t vc;
  strncpy(vc.in_version, ONLOAD_VERSION, sizeof(vc.in_version));
  strncpy(vc.in_uk_intf_ver, OO_UK_INTF_VER, sizeof(vc.in_uk_intf_ver));
  vc.debug =
#ifdef NDEBUG
    0;
#else
    1;
#endif
  rc = ci_sys_ioctl(fd, OO_IOC_CHECK_VERSION, &vc);
  if( rc == -1 )
    return -errno;
  return rc;
}
示例#4
0
dev_t oo_onloadfs_dev_t(void)
{
  static ci_uint32 onloadfs_dev_t = 0;

  if( onloadfs_dev_t == 0 ) {
    int fd;
    if( ef_onload_driver_open(&fd, OO_STACK_DEV, 1) != 0 ) {
      fprintf(stderr, "%s: Failed to open /dev/onload\n", __FUNCTION__);
      return 0;
    }
    if( ci_sys_ioctl(fd, OO_IOC_GET_ONLOADFS_DEV, &onloadfs_dev_t) != 0 ) {
      LOG_E(ci_log("%s: Failed to find onloadfs dev_t", __FUNCTION__));
    }
    ci_sys_close(fd);
  }
  return onloadfs_dev_t;
}
示例#5
0
void citp_oo_get_cpu_khz(ci_uint32* cpu_khz)
{
  ef_driver_handle fd;

  /* set up a constant value for the case everything goes wrong */
  *cpu_khz = 1000;

  if( ef_onload_driver_open(&fd, OO_STACK_DEV, 1) != 0 ) {
    fprintf(stderr, "%s: Failed to open /dev/onload\n", __FUNCTION__);
    ci_get_cpu_khz(cpu_khz);
    return;
  }
  if( ci_sys_ioctl(fd, OO_IOC_GET_CPU_KHZ, cpu_khz) != 0 ) {
    Log_E(log("%s: Failed to query cpu_khz", __FUNCTION__));
    ci_get_cpu_khz(cpu_khz);
  }
  ef_onload_driver_close(fd);
}
示例#6
0
int citp_pipe_splice_read(citp_fdinfo* fdi, int alien_fd, loff_t* alien_off,
                          size_t len, int flags,
                          citp_lib_context_t* lib_context)
{
  citp_pipe_fdi* epi = fdi_to_pipe_fdi(fdi);
  int rc;
  int read_len = 0;
  int non_block = (flags & SPLICE_F_NONBLOCK) || (epi->pipe->aflags &
       (CI_PFD_AFLAG_NONBLOCK << CI_PFD_AFLAG_READER_SHIFT));
  if( ! fdi_is_reader(fdi) ) {
    errno = EINVAL;
    return -1;
  }
  if( alien_off ) {
    /* TODO support this */
    errno = ENOTSUP;
    return -1;
  }
  if( len == 0 )
    return 0;
  do {
    struct oo_splice_read_context ctx = {
      .alien_fd = alien_fd,
      .len = len,
      .lib_context = lib_context
    };
    rc = ci_pipe_zc_read(epi->ni, epi->pipe, len,
                         non_block ? MSG_DONTWAIT : 0,
                         oo_splice_read_cb, &ctx);
    if( rc > 0 )
      read_len += rc;
  } while(0);

  if( rc < 0 && errno == EPIPE && ! (flags & MSG_NOSIGNAL) ) {
    ci_sys_ioctl(ci_netif_get_driver_handle(epi->ni),
                 OO_IOC_KILL_SELF_SIGPIPE, NULL);
    return rc;
  }
  if( rc > 0 )
    return read_len;
  return rc;
}
示例#7
0
int oo_os_sock_ioctl(ci_netif* ni, oo_sp sock_p, int request, void* arg,
                     int* ioctl_rc)
{
  oo_os_file os_sock_fd;
  int rc;
  if( (rc = oo_os_sock_get(ni, sock_p, &os_sock_fd)) == 0 ) {
    rc = ci_sys_ioctl(os_sock_fd, request, arg);
    if( rc < 0 )
      rc = -errno;
    oo_os_sock_release(ni, os_sock_fd);
    if( ioctl_rc != NULL ) {
      *ioctl_rc = rc;
      rc = 0;
    }
  }
  else {
    LOG_E(ci_log("%s: [%d:%d] ERROR: failed to get kernel sock fd "
                 "(rc=%d req=%d)", __FUNCTION__, NI_ID(ni), OO_SP_FMT(sock_p),
                 rc, request));
  }
  return rc;
}
示例#8
0
static int
citp_passthrough_ioctl(citp_fdinfo* fdi, int request, void* arg)
{
  return ci_sys_ioctl(fdi_to_alien_fdi(fdi)->os_socket,
                      request, arg);
}
示例#9
0
/* NOTE: in the kernel version [fd] is unused and, if it's a ptr, [arg] will
 * be in user-space and may need to be fetched into kernel memory. */
static int ci_tcp_ioctl_lk(citp_socket* ep, ci_fd_t fd, int request,
                           void* arg)
{
  ci_netif* netif = ep->netif;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* ts = NULL;
  int rc = 0;
  int os_socket_exists = s->b.sb_aflags & CI_SB_AFLAG_OS_BACKED;

  if( s->b.state != CI_TCP_LISTEN )
    ts = SOCK_TO_TCP(s);

  /* Keep the os socket in sync.  If this is a "get" request then the
   * return will be based on our support, not the os's (except for EFAULT
   * handling which we get for free).
   * Exceptions:
   *  - FIONBIO is applied just in time on handover if needed (listening
   *    sockets always have a non-blocking OS socket)
   *  - FIONREAD, TIOCOUTQ, SIOCOUTQNSD and SIOCATMARK are useless on OS
   *    socket, let's avoid syscall.
   */
  if( os_socket_exists && request != FIONREAD && request != SIOCATMARK &&
      request != FIOASYNC && request != TIOCOUTQ && request != SIOCOUTQNSD &&
      request != (int) FIONBIO ) {
    rc = oo_os_sock_ioctl(netif, s->b.bufid, request, arg, NULL);
    if( rc < 0 )
      return rc;
  }

  /* ioctl defines are listed in `man ioctl_list` and the CI equivalent
   * CI defines are in include/ci/net/ioctls.h  */
  LOG_TV( ci_log("%s:  request = %d, arg = %ld", __FUNCTION__, request, 
		 (long)arg));

  switch( request ) {
  case FIONBIO:
    if( CI_IOCTL_ARG_OK(int, arg) ) {
      CI_CMN_IOCTL_FIONBIO(ep->s, arg);
      rc = 0;
      break;
    }
    goto fail_fault;
  case FIONREAD: /* synonym of SIOCINQ */
    if( !CI_IOCTL_ARG_OK(int, arg) )
      goto fail_fault;
    if( s->b.state == CI_TCP_LISTEN )
      goto fail_inval;

    if( s->b.state == CI_TCP_SYN_SENT ) {
      CI_IOCTL_SETARG((int*)arg, 0);
    } else {
      /* In inline mode, return the total number of bytes in the receive queue.
         If SO_OOBINLINE isn't set then return the number of bytes up to the
         mark but without counting the mark */
      int bytes_in_rxq = tcp_rcv_usr(ts);
      if (bytes_in_rxq && ! (ts->s.s_flags & CI_SOCK_FLAG_OOBINLINE)) {

        if (tcp_urg_data(ts) & CI_TCP_URG_PTR_VALID) {
          /*! \TODO: what if FIN has been received? */
          unsigned int readnxt = tcp_rcv_nxt(ts) - bytes_in_rxq;
          if (SEQ_LT(readnxt, tcp_rcv_up(ts))) {
            bytes_in_rxq = tcp_rcv_up(ts) - readnxt;
          } else if (SEQ_EQ(readnxt, tcp_rcv_up(ts))) {
            bytes_in_rxq--;
          }
        }

      }
      CI_IOCTL_SETARG((int*)arg, bytes_in_rxq);
    }
    break;

  case TIOCOUTQ: /* synonym of SIOCOUTQ */
  case SIOCOUTQNSD:
    {
    CI_BUILD_ASSERT(TIOCOUTQ == SIOCOUTQ);
    int outq_bytes = 0;

    if( !CI_IOCTL_ARG_OK(int, arg) )
      goto fail_fault;
    if( s->b.state == CI_TCP_LISTEN )
      goto fail_inval;

    if( s->b.state != CI_TCP_SYN_SENT ) {

      /* TIOCOUTQ counts all unacknowledged data, so includes retrans queue. */
      if( request == TIOCOUTQ )
        outq_bytes = SEQ_SUB(tcp_enq_nxt(ts), tcp_snd_una(ts));
      else
        outq_bytes = SEQ_SUB(tcp_enq_nxt(ts), tcp_snd_nxt(ts));
    }
    CI_IOCTL_SETARG((int*)arg, outq_bytes);
    }
    break;

  case SIOCATMARK:
    {
     if( !CI_IOCTL_ARG_OK(int, arg) )
       goto fail_fault;

      /* return true, if we are at the out-of-band byte */
      CI_IOCTL_SETARG((int*)arg, 0);
      if( s->b.state != CI_TCP_LISTEN ) {
	int readnxt;

        readnxt = SEQ_SUB(tcp_rcv_nxt(ts), tcp_rcv_usr(ts));
        if( ~ts->s.b.state & CI_TCP_STATE_ACCEPT_DATA )
          readnxt = SEQ_SUB(readnxt, 1);
        if( tcp_urg_data(ts) & CI_TCP_URG_PTR_VALID )
          CI_IOCTL_SETARG((int*)arg, readnxt == tcp_rcv_up(ts));
        LOG_URG(log(NTS_FMT "SIOCATMARK atmark=%d  readnxt=%u rcv_up=%u%s",
                    NTS_PRI_ARGS(ep->netif, ts), readnxt == tcp_rcv_up(ts), 
		    readnxt,  tcp_rcv_up(SOCK_TO_TCP(ep->s)),
                    (tcp_urg_data(ts)&CI_TCP_URG_PTR_VALID)?"":" (invalid)"));
      }
      break;
    }

#ifndef __KERNEL__
  case FIOASYNC:
    /* Need to apply this to [fd] so that our fasync file-op will be
     * invoked.
     */
    rc = ci_sys_ioctl(fd, request, arg);
    break;

  case SIOCSPGRP:
    if( !CI_IOCTL_ARG_OK(int, arg) )
      goto fail_fault;
    /* Need to apply this to [fd] to get signal delivery to work.  However,
     * SIOCSPGRP is only supported on sockets, so we need to convert to
     * fcntl().
     */
    rc = ci_sys_fcntl(fd, F_SETOWN, CI_IOCTL_GETARG(int, arg));
    if( rc == 0 ) {
      rc = ci_cmn_ioctl(netif, ep->s, request, arg, rc, os_socket_exists);
    }
    else {
      CI_SET_ERROR(rc, -rc);
    }
    break;
#endif

  default:
    return ci_cmn_ioctl(netif, ep->s, request, arg, rc, os_socket_exists);
  }
示例#10
0
文件: epoll_fd.c 项目: ido/openonload
static int citp_epoll_ioctl(citp_fdinfo *fdi, int cmd, void *arg)
{
  return ci_sys_ioctl(fdi->fd, cmd, arg);
}
示例#11
0
int ef_onload_driver_open(ef_driver_handle* pfd,
                          enum oo_device_type dev_type,
                          int do_cloexec)
{
  int rc;
  int flags = 0;
  int saved_errno = errno;

#ifdef O_CLOEXEC
  if( do_cloexec )
    flags = O_CLOEXEC;
#endif

  ci_assert(pfd);
  rc = oo_open(pfd, dev_type, flags);
  if( rc != 0 && errno != EMFILE && fd_is_saved[dev_type] >= 0 ) {
    ci_clone_fd_t op;
    op.do_cloexec = do_cloexec;
    LOG_NV(ci_log("%s: open failed, but cloning from saved fd", __func__));
    rc = ci_sys_ioctl((ci_fd_t) saved_fd[dev_type],
                      clone_ioctl[dev_type], &op);
    if( rc < 0 )
      return rc;
    errno = saved_errno;
    *pfd = op.fd;
  }

  if( rc != 0 )
    return rc;

  /* Our internal driver handles are not visible to the application.  It may
   * make assumptions about the fd space available to it, and try to dup2/3
   * onto one of our driver fds.  To try and minimise this we allow the user
   * to specify a minimum value for us to use, to try and keep out of their
   * way.
   *
   * We have to be able to cope with them coming along and trying to dup onto
   * one of these fds anyway, as they may not have set the option up.  As such
   * we treat failure to shift the fd as acceptable, and just retain the old
   * one.
   */
  if( *pfd < CITP_OPTS.fd_base )
    if( ef_onload_handle_move_and_do_cloexec(pfd, do_cloexec) == 0 )
      return 0;
      
  if( do_cloexec ) {
#if defined(O_CLOEXEC)
    static int o_cloexec_fails = -1;
    if( o_cloexec_fails < 0 ) {
      int arg;
      rc = ci_sys_fcntl(*(int *)pfd, F_GETFD, &arg);
      if( rc == 0 && (arg & FD_CLOEXEC) )
        o_cloexec_fails = 0;
      else
        o_cloexec_fails = 1;
    }
#else
    static const int o_cloexec_fails = 1;
#endif
    if( o_cloexec_fails )
      CI_DEBUG_TRY(ci_sys_fcntl(*(int *)pfd, F_SETFD, FD_CLOEXEC));
  }

  return 0;
}
示例#12
0
int citp_pipe_splice_write(citp_fdinfo* fdi, int alien_fd, loff_t* alien_off,
                           size_t olen, int flags,
                           citp_lib_context_t* lib_context)
{
  citp_pipe_fdi* epi = fdi_to_pipe_fdi(fdi);
  int len_in_bufs = OO_PIPE_SIZE_TO_BUFS(olen);
  struct iovec iov_on_stack[CITP_PIPE_SPLICE_WRITE_STACK_IOV_LEN];
  struct iovec* iov = iov_on_stack;
  int want_buf_count;
  int rc;
  int bytes_to_read;
  int len = olen;
  int no_more = 1; /* for now we only run single loop */
  int written_total = 0;
  int non_block = (flags & SPLICE_F_NONBLOCK) || (epi->pipe->aflags &
      (CI_PFD_AFLAG_NONBLOCK << CI_PFD_AFLAG_WRITER_SHIFT));
  if( fdi_is_reader(fdi) ) {
    errno = EINVAL;
    return -1;
  }
  if( alien_off ) {
    /* TODO support this */
    errno = ENOTSUP;
    return -1;
  }
  do {
    int count;
    int iov_num;
    int bytes_to_write;
    struct ci_pipe_pkt_list pkts = {};
    struct ci_pipe_pkt_list pkts2;
    want_buf_count = len_in_bufs;
    /* We might need to wait for buffers here on the first iteration */
    rc = ci_pipe_zc_alloc_buffers(epi->ni, epi->pipe, want_buf_count,
                                  MSG_NOSIGNAL | (non_block || written_total ?
                                  MSG_DONTWAIT : 0),
                                  &pkts);
    if( rc < 0 && written_total ) {
      /* whatever the error we need to report already written_bytes */
      rc = written_total;
      break;
    }
    else if( rc < 0 )
      break;
    else if( pkts.count == 0 && non_block ) {
      errno = EAGAIN;
      rc = -1;
      break;
    }
    else
      ci_assert_gt(pkts.count, 0);
    count = pkts.count;

    if( count > CITP_PIPE_SPLICE_WRITE_STACK_IOV_LEN ) {
      void* niov = realloc(iov == iov_on_stack ? NULL : iov,
                           sizeof(*iov) * len_in_bufs);
      if( niov == NULL )
        /* we can still move quite a few pkts */
        count = CITP_PIPE_SPLICE_WRITE_STACK_IOV_LEN;
      else
        niov = iov;
    }

    ci_assert_ge(count, 1);

    iov_num = count;
    pkts2 = pkts;
    bytes_to_read = ci_pipe_list_to_iovec(epi->ni, epi->pipe, iov, &iov_num,
                                          &pkts2, len);

    citp_exit_lib_if(lib_context, TRUE);
    /* Note: the following call might be non-blocking as well as blocking */
    rc = readv(alien_fd, iov, count);
    citp_reenter_lib(lib_context);

    if( rc > 0 ) {
      bytes_to_write = rc;
      written_total += bytes_to_write;
      len -= bytes_to_write;
      no_more |= bytes_to_write < bytes_to_read;
    }
    else {
      bytes_to_write = 0;
      no_more = 1;
    }

    {
      /* pipe zc_write will write non_empty buffers and release the empty
       * ones */
      int rc2 = ci_pipe_zc_write(epi->ni, epi->pipe, &pkts, bytes_to_write,
                  CI_PIPE_ZC_WRITE_FLAG_FORCE | MSG_DONTWAIT | MSG_NOSIGNAL);
      (void) rc2;
      ci_assert_equal(rc2, bytes_to_write);
    }
    /* for now we will not be doing second iteration, to allow for that
     * we'd need to have guarantee that read will not block
     * e.g. insight into type of fd and a nonblokcing operation
     * (to name a valid case: socket, recvmsg) */
  } while( ! no_more );

  if( iov != iov_on_stack )
    free(iov);
  if( rc > 0 )
    return written_total;
  if( rc < 0 && errno == EPIPE && ! (flags & MSG_NOSIGNAL) ) {
    ci_sys_ioctl(ci_netif_get_driver_handle(epi->ni),
                 OO_IOC_KILL_SELF_SIGPIPE, NULL);
  }
  return rc;
}