示例#1
0
void citp_passthrough_init(citp_alien_fdi* epi)
{
  int rc = oo_os_sock_get(epi->netif, epi->ep->bufid, &epi->os_socket);

  /* No sensible way to handle oo_os_sock_get failure.  Just record it. */
  if( rc != 0 ) {
    Log_U(ci_log("%s: oo_os_sock_get([%d:%d]) returned %d",
                 __func__, NI_ID(epi->netif), epi->ep->bufid, rc));
    epi->os_socket = -1;
    return;
  }
  __citp_fdtable_reserve(epi->os_socket, 1);
  /* ci_tcp_helper_get_sock_fd gets the citp_dup2_lock lock: release it  */
  oo_rwlock_unlock_read(&citp_dup2_lock);
}
示例#2
0
static citp_pipe_fdi *citp_pipe_epi_alloc(ci_netif *ni, int flags)
{
  citp_pipe_fdi* epi;

  epi = CI_ALLOC_OBJ(citp_pipe_fdi);
  if( ! epi ) {
    Log_U(ci_log(LPF "pipe: failed to allocate epi"));
    errno = ENOMEM;
    return NULL;
  }
  if( flags == O_WRONLY )
    citp_fdinfo_init(&epi->fdinfo, &citp_pipe_write_protocol_impl);
  else
    citp_fdinfo_init(&epi->fdinfo, &citp_pipe_read_protocol_impl);
  epi->ni = ni;

  return epi;
}
示例#3
0
/* This function does some simple tests to ensure that the fdtable makes sense.
 * There are many more tests we could do; feel free to add them at your
 * leisure!
 */
void
citp_fdtable_assert_valid(void)
{
  int i;

  if( ! citp_fdtable.table )  return;

  CITP_FDTABLE_LOCK_RD();

  for( i = 0; i < citp_fdtable.inited_count; i++ ) {
    citp_fdinfo_p fdip = citp_fdtable.table[i].fdip;

    if( fdip_is_normal(fdip) ) {
      citp_fdinfo * fdi = fdip_to_fdi(fdip);

      ci_assert(fdi);
      ci_assert(fdi->protocol);
      if( ( fdi->protocol->type == CITP_TCP_SOCKET ||
            fdi->protocol->type == CITP_UDP_SOCKET )
          && fdi_to_socket(fdi)->s )
	ci_assert(! (fdi_to_socket(fdi)->s->b.sb_aflags & CI_SB_AFLAG_ORPHAN));

      if (!fdi->is_special) {
        /* Ensure the "back pointer" makes sense */
        ci_assert (fdi->fd == i);
        /* Ensure that the reference count is in a vaguely sensible range */
        ci_assert ((oo_atomic_read (&fdi->ref_count) > 0) &&
                   (oo_atomic_read (&fdi->ref_count) < 10000));

        /* 10,000 threads is a bit mad, warn if more than 20 */
        if (oo_atomic_read (&fdi->ref_count) > 20) {
          Log_U (log ("Warning: fd %d's ref-count suspiciously large (%d)\n",
                      i, oo_atomic_read (&fdi->ref_count)));
        }
      }
    }
  }

  CITP_FDTABLE_UNLOCK_RD();
}
示例#4
0
int citp_fdtable_ctor()
{
  struct rlimit rlim;
  int rc;

  Log_S(log("%s:", __FUNCTION__));

  /* How big should our fdtable be by default?  It's pretty arbitrary, but we have
   * seen a few apps that use setrlimit to set the fdtable to 4096 entries on
   * start-up (see bugs 3253 and 3373), so we choose that.  (Note: we can't grow
   * the table if the app later does setrlimit, and unused entries consume virtual
   * space only, so it's worth allocating a table of reasonable sized.)
   */
  citp_fdtable.size = 4096;

  if( getrlimit(RLIMIT_NOFILE, &rlim) == 0 ) {
    citp_fdtable.size = rlim.rlim_max;
    if( CITP_OPTS.fdtable_size != 0 &&
        CITP_OPTS.fdtable_size != rlim.rlim_max ) {
      Log_S(ci_log("Set the limits for the number of opened files "
                   "to EF_FDTABLE_SIZE=%u value.",
                   CITP_OPTS.fdtable_size));
      rlim.rlim_max = CITP_OPTS.fdtable_size;
      if( rlim.rlim_cur > rlim.rlim_max )
        rlim.rlim_cur = rlim.rlim_max;
      if( ci_sys_setrlimit(RLIMIT_NOFILE, &rlim) == 0 )
          citp_fdtable.size = rlim.rlim_max;
      else {
        /* Most probably, we've got EPERM */
        ci_assert_lt(citp_fdtable.size, CITP_OPTS.fdtable_size);
        ci_log("Can't set EF_FDTABLE_SIZE=%u; using %u",
               CITP_OPTS.fdtable_size, citp_fdtable.size);
        rlim.rlim_max = rlim.rlim_cur = citp_fdtable.size;
        CI_TRY(ci_sys_setrlimit(RLIMIT_NOFILE, &rlim));
      }
    }
  }
  else
    Log_S(ci_log("Assume EF_FDTABLE_SIZE=%u", citp_fdtable.size));

  citp_fdtable.inited_count = 0;

  citp_fdtable.table = ci_libc_malloc(sizeof (citp_fdtable_entry) *
                                      citp_fdtable.size);
  if( ! citp_fdtable.table ) {
    Log_U(log("%s: failed to allocate fdtable (0x%x)", __FUNCTION__,
              citp_fdtable.size));
    return -1;
  }

  /* The whole table is not initialised at start-of-day, but is initialised
  ** on demand.  citp_fdtable.inited_count counts the number of initialised
  ** entries.
  */

  if( (rc = oo_rwlock_ctor(&citp_ul_lock)) != 0 ) {
    Log_E(log("%s: oo_rwlock_ctor %d", __FUNCTION__, rc));
    return -1;
  }

  /* Install SIGONLOAD handler */
  {
    struct sigaction sa;
    memset(&sa, 0, sizeof(sa)); /* sa_flags and sa_mask = 0 */
    sa.sa_handler = sighandler_do_nothing;
    sigaction(SIGONLOAD, &sa, NULL);
  }

  return 0;
}
示例#5
0
/* Find out what sort of thing [fd] is, and if it is a user-level socket
 * then map in the user-level state.
 */
static citp_fdinfo * citp_fdtable_probe_locked(unsigned fd, int print_banner,
                                               int fdip_is_already_busy)
{
  citp_fdinfo* fdi = NULL;
  struct stat64 st;
  ci_ep_info_t info;


  if( ! fdip_is_already_busy ) {
    volatile citp_fdinfo_p* p_fdip;
    citp_fdinfo_p fdip;
    /* ?? We're repeating some effort already expended in lookup() here, but
    ** this keeps it cleaner.  May optimise down the line when I understand
    ** what other code needs to call this.
    */
    
    p_fdip = &citp_fdtable.table[fd].fdip;
   again:
    fdip = *p_fdip;
    if( fdip_is_busy(fdip) )  fdip = citp_fdtable_busy_wait(fd, 1);
    if( ! fdip_is_unknown(fdip) && ! fdip_is_normal(fdip) )  goto exit;
    if( fdip_cas_fail(p_fdip, fdip, fdip_busy) )  goto again;
    
    if( fdip_is_normal(fdip) ) {
      fdi = fdip_to_fdi(fdip);
      citp_fdinfo_ref(fdi);
      citp_fdtable_busy_clear(fd, fdip, 1);
      goto exit;
    }
  }

  if( ci_sys_fstat64(fd, &st) != 0 ) {
    /* fstat() failed.  Must be a bad (closed) file descriptor, so
    ** leave this entry as unknown.  Return citp_the_closed_fd to avoid the
    ** caller passing through to an fd that is created asynchronously.
    */
    citp_fdtable_busy_clear(fd, fdip_unknown, 1);
    fdi = &citp_the_closed_fd;
    citp_fdinfo_ref(fdi);
    goto exit;
  }

  /* oo_get_st_rdev() and oo_onloadfs_dev_t() open-and-close fd, so
   * fdtable should be locked if strict mode requested. */
  if( fdtable_strict() )  { CITP_FDTABLE_ASSERT_LOCKED(1); }

  if(  st.st_dev == oo_onloadfs_dev_t() ) {
    /* Retrieve user-level endpoint info */
    if( oo_ep_info(fd, &info) < 0 ) {
      Log_V(log("%s: fd=%d type=%d unknown", __FUNCTION__,fd,info.fd_type));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;
    }

    switch( info.fd_type ) {
    case CI_PRIV_TYPE_TCP_EP:
    case CI_PRIV_TYPE_UDP_EP:
    case CI_PRIV_TYPE_PASSTHROUGH_EP:
    case CI_PRIV_TYPE_ALIEN_EP:
#if CI_CFG_USERSPACE_PIPE
    case CI_PRIV_TYPE_PIPE_READER:
    case CI_PRIV_TYPE_PIPE_WRITER:
#endif
    {
      citp_fdinfo_p fdip;

      Log_V(log("%s: fd=%d %s restore", __FUNCTION__, fd,
		info.fd_type == CI_PRIV_TYPE_TCP_EP ? "TCP":
#if CI_CFG_USERSPACE_PIPE
                info.fd_type != CI_PRIV_TYPE_UDP_EP ? "PIPE" :
#endif
                "UDP"));
      fdip = citp_fdtable_probe_restore(fd, &info, print_banner);
      if( fdip_is_normal(fdip) )
        fdi = fdip_to_fdi(fdip);
      else
        citp_fdtable_busy_clear(fd, fdip, 1);
      goto exit;
    }

    case CI_PRIV_TYPE_NETIF:
      /* This should never happen, because netif fds are close-on-exec.
      ** But let's leave this code here just in case my reasoning is bad.
      */
      Log_U(log("%s: fd=%d NETIF reserved", __FUNCTION__, fd));
      citp_fdtable_busy_clear(fd, fdip_reserved, 1);
      fdi = &citp_the_reserved_fd;
      citp_fdinfo_ref(fdi);
      goto exit;

    case CI_PRIV_TYPE_NONE:
      /* This happens if a thread gets at an onload driver fd that has just
       * been created, but not yet specialised.  On Linux I think this
       * means it will shortly be a new netif internal fd.  (fds associated
       * with sockets and pipes are never unspecialised).
       */
      Log_V(log("%s: fd=%d TYPE_NONE", __FUNCTION__, fd));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;

    default:
      CI_TEST(0);
      break;
    }
  }
  else if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_EPOLL_DEV)) ) {
    citp_epollb_fdi *epi = CI_ALLOC_OBJ(citp_epollb_fdi);
    if( ! epi ) {
      Log_E(log("%s: out of memory (epoll_fdi)", __FUNCTION__));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;
    }
    oo_epollb_ctor(epi);
    fdi = &epi->fdinfo;
    citp_fdinfo_init(fdi, &citp_epollb_protocol_impl);
    citp_fdinfo_ref(fdi);
    citp_fdtable_insert(fdi, fd, 1);
    goto exit;
  }

#ifndef NDEBUG
  /* /dev/onload may be netif only; they are closed on fork or exec */
  if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_STACK_DEV)) )
    Log_U(log("%s: %d is /dev/onload", __FUNCTION__, fd));
#endif

  /* Not one of ours, so pass-through. */
  Log_V(log("%s: fd=%u non-efab", __FUNCTION__, fd));
  citp_fdtable_busy_clear(fd, fdip_passthru, 1);

 exit:
  return fdi;

}
示例#6
0
int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags)
{
  volatile citp_fdinfo_p* p_tofdip;
  citp_fdinfo_p tofdip;
  unsigned max;

  Log_V(log("%s(%d, %d)", __FUNCTION__, fromfd, tofd));

  /* Must be checked by callers. */
  ci_assert(fromfd != tofd);

  /* Hack: if [tofd] is the fd we're using for logging, we'd better choose
  ** a different one!
  */
  if( tofd == citp.log_fd )  citp_log_change_fd();

  ci_assert(citp.init_level >= CITP_INIT_FDTABLE);

  max = CI_MAX(fromfd, tofd);
  if( max >= citp_fdtable.inited_count ) {
    ci_assert(max < citp_fdtable.size);
    CITP_FDTABLE_LOCK();
    __citp_fdtable_extend(max);
    CITP_FDTABLE_UNLOCK();
  }

  /* Bug1151: Concurrent threads doing dup2(x,y) and dup2(y,x) can deadlock
  ** against one another.  So we take out a fat lock to prevent concurrent
  ** dup2()s.
  */
  /* Lock tofd.  We need to interlock against select and poll etc, so we
  ** also grab the exclusive lock.  Also grab the bug1151 lock.
  */
  pthread_mutex_lock(&citp_dup_lock);
  CITP_FDTABLE_LOCK();
  p_tofdip = &citp_fdtable.table[tofd].fdip;
 lock_tofdip_again:
  tofdip = *p_tofdip;
  if( fdip_is_busy(tofdip) )
    tofdip = citp_fdtable_busy_wait(tofd, 1);
  if( fdip_is_closing(tofdip) )
    tofdip = citp_fdtable_closing_wait(tofd, 1);
  if( fdip_is_reserved(tofdip) ) {
    /* ?? FIXME: we can't cope with this at the moment */
    CITP_FDTABLE_UNLOCK();
    Log_U(log("%s(%d, %d): target is reserved", __FUNCTION__, fromfd, tofd));
    errno = EBUSY;
    tofd = -1;
    goto out;
  }
  if( fdip_cas_fail(p_tofdip, tofdip, fdip_busy) )
    goto lock_tofdip_again;
  CITP_FDTABLE_UNLOCK();
  ci_assert(fdip_is_normal(tofdip) | fdip_is_passthru(tofdip) |
 	    fdip_is_unknown(tofdip));

  if( fdip_is_normal(tofdip) ) {
    /* We're duping onto a user-level socket. */
    citp_fdinfo* tofdi = fdip_to_fdi(tofdip);
    if( tofdi->epoll_fd >= 0 ) {
      citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(tofdi, 0);
      if( epoll_fdi ) {
        if( epoll_fdi->protocol->type == CITP_EPOLL_FD )
          citp_epoll_on_close(epoll_fdi, tofdi, 0);
        citp_fdinfo_release_ref(epoll_fdi, 0);
      }
    }
    ci_assert_equal(tofdi->on_ref_count_zero, FDI_ON_RCZ_NONE);
    tofdi->on_ref_count_zero = FDI_ON_RCZ_DUP2;
    tofdi->on_rcz.dup3_args.fd = fromfd;
    tofdi->on_rcz.dup3_args.flags = flags;
    citp_fdinfo_release_ref(tofdi, 0);
    {
      int i = 0;
      /* We need to free this fdi.  If someone is using it right now,
       * we are in trouble.  So, we spin for a while and interrupt the
       * user.  See bug 28123. */
      while( tofdi->on_ref_count_zero != FDI_ON_RCZ_DONE ) {
        if( ci_is_multithreaded() && i % 10000 == 9999 ) {
          pthread_t pth = tofdi->thread_id;
          if( pth !=  pthread_self() && pth != PTHREAD_NULL ) {
            pthread_kill(pth, SIGONLOAD);
            sleep(1);
          }
        }
        ci_spinloop_pause();
        i++;
      }
      ci_rmb();
    }
    if( tofdi->on_rcz.dup2_result < 0 ) {
      errno = -tofdi->on_rcz.dup2_result;
      /* Need to re-insert [tofdi] into the table. */
      ci_assert_equal(oo_atomic_read(&tofdi->ref_count), 0);
      oo_atomic_set(&tofdi->ref_count, 1);
      CI_DEBUG(tofdi->on_ref_count_zero = FDI_ON_RCZ_NONE);
      citp_fdtable_busy_clear(tofd, tofdip, 0);
      tofd = -1;
    }
    else {
      ci_assert(tofdi->on_rcz.dup2_result == tofd);
      citp_fdinfo_get_ops(tofdi)->dtor(tofdi, 0);
      citp_fdinfo_free(tofdi);
    }
    goto out;
  }

  ci_assert(fdip_is_passthru(tofdip) | fdip_is_unknown(tofdip));

  { /* We're dupping onto an O/S descriptor, or it may be closed.  Create a
    ** dummy [citp_fdinfo], just so we can share code with the case above.
    */
    citp_fdinfo fdi;
    fdi.fd = tofd;
    fdi.on_rcz.dup3_args.fd = fromfd;
    fdi.on_rcz.dup3_args.flags = flags;
    dup2_complete(&fdi, tofdip, 0);
    if( fdi.on_rcz.dup2_result < 0 ) {
      errno = -fdi.on_rcz.dup2_result;
      citp_fdtable_busy_clear(tofd, tofdip, 0);
      tofd = -1;
    }
    else
      ci_assert(fdi.on_rcz.dup2_result == tofd);
  }

 out:
  pthread_mutex_unlock(&citp_dup_lock);
  return tofd;
}
示例#7
0
/* we don't register protocol impl */
int citp_pipe_create(int fds[2], int flags)
{
  citp_pipe_fdi* epi_read;
  citp_pipe_fdi* epi_write;
  struct oo_pipe* p = NULL;         /* make compiler happy */
  ci_netif* ni;
  int rc = -1;
  ef_driver_handle fd = -1;

  Log_V(log(LPF "pipe()"));

  /* citp_netif_exists() does not need citp_ul_lock here */
  if( CITP_OPTS.ul_pipe == CI_UNIX_PIPE_ACCELERATE_IF_NETIF &&
      ! citp_netif_exists() ) {
    return CITP_NOT_HANDLED;
  }

  rc = citp_netif_alloc_and_init(&fd, &ni);
  if( rc != 0 ) {
    if( rc == CI_SOCKET_HANDOVER ) {
      /* This implies EF_DONT_ACCELERATE is set, so we handover
       * regardless of CITP_OPTS.no_fail */
      return CITP_NOT_HANDLED;
    }
    /* may be lib mismatch - errno will be ELIBACC */
    goto fail1;
  }
  rc = -1;

  CI_MAGIC_CHECK(ni, NETIF_MAGIC);

  /* add another reference as we have 2 fdis */
  citp_netif_add_ref(ni);

  epi_read = citp_pipe_epi_alloc(ni, O_RDONLY);
  if( epi_read == NULL )
    goto fail2;
  epi_write = citp_pipe_epi_alloc(ni, O_WRONLY);
  if( epi_write == NULL )
    goto fail3;

  /* oo_pipe init code */
  if( fdtable_strict() )  CITP_FDTABLE_LOCK();
  rc = oo_pipe_ctor(ni, &p, fds, flags);
  if( rc < 0 )
      goto fail4;
  citp_fdtable_new_fd_set(fds[0], fdip_busy, fdtable_strict());
  citp_fdtable_new_fd_set(fds[1], fdip_busy, fdtable_strict());
  if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();

  LOG_PIPE("%s: pipe=%p id=%d", __FUNCTION__, p, p->b.bufid);

  /* as pipe is created it should be attached to the end-points */
  epi_read->pipe = p;
  epi_write->pipe = p;

  /* We're ready.  Unleash us onto the world! */
  ci_assert(epi_read->pipe->b.sb_aflags & CI_SB_AFLAG_NOT_READY);
  ci_assert(epi_write->pipe->b.sb_aflags & CI_SB_AFLAG_NOT_READY);
  ci_atomic32_and(&epi_read->pipe->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY);
  ci_atomic32_and(&epi_read->pipe->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY);
  citp_fdtable_insert(&epi_read->fdinfo, fds[0], 0);
  citp_fdtable_insert(&epi_write->fdinfo, fds[1], 0);

  CI_MAGIC_CHECK(ni, NETIF_MAGIC);

  return 0;

fail4:
  if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();
fail3:
  CI_FREE_OBJ(epi_write);
fail2:
  CI_FREE_OBJ(epi_read);
  citp_netif_release_ref(ni, 0);
  citp_netif_release_ref(ni, 0);
fail1:
  if( CITP_OPTS.no_fail && errno != ELIBACC ) {
    Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno));
    return CITP_NOT_HANDLED;
  }

  return rc;
}
示例#8
0
/* fixme kostik: this is partially copy-paste from citp_sock_fcntl */
static int citp_pipe_fcntl(citp_fdinfo* fdinfo, int cmd, long arg)
{
  int rc = 0;
  citp_pipe_fdi* epi = fdi_to_pipe_fdi(fdinfo);
  struct oo_pipe* p = epi->pipe;

  switch ( cmd ) {
  case F_GETFL: {
    ci_uint32 flag_nonb = CI_PFD_AFLAG_NONBLOCK;
    if( ! fdi_is_reader(fdinfo) ) {
      rc = O_WRONLY;
      flag_nonb <<= CI_PFD_AFLAG_WRITER_SHIFT;
    }
    else
      flag_nonb <<= CI_PFD_AFLAG_READER_SHIFT;
    if ( p->aflags & flag_nonb ) rc |= O_NONBLOCK;
    break;
  }
  case F_SETFL: {
    ci_uint32 bit;

    rc = ci_sys_fcntl(fdinfo->fd, cmd, arg);
    if( rc < 0 )
      break;

    bit = CI_PFD_AFLAG_NONBLOCK <<
                (fdi_is_reader(fdinfo) ? CI_PFD_AFLAG_READER_SHIFT :
                 CI_PFD_AFLAG_WRITER_SHIFT);
    if( arg & (O_NONBLOCK | O_NDELAY) )
      ci_bit_mask_set(&p->aflags, bit);
    else
      ci_bit_mask_clear(&p->aflags, bit);
    break;
  }
  case F_DUPFD:
    rc = citp_ep_dup(fdinfo->fd, citp_ep_dup_fcntl_dup, arg);
    break;
#ifdef F_DUPFD_CLOEXEC
  case F_DUPFD_CLOEXEC:
    rc = citp_ep_dup(fdinfo->fd, citp_ep_dup_fcntl_dup_cloexec, arg);
    break;
#endif
  case F_GETFD:
  case F_SETFD:
    rc = ci_sys_fcntl(fdinfo->fd, cmd, arg);
    break;
  case F_GETLK:
  case F_SETLK:
  case F_SETLKW:
    /* File locks not supported on sockets */
    Log_U(ci_log("%s: cmd %d not supported on sockets!",__FUNCTION__,
                 cmd));
    errno = ENOTSUP;
    rc = CI_SOCKET_ERROR;
    break;
  case F_GETOWN:
  case F_SETOWN:
#ifdef F_GETOWN_EX
  case F_GETOWN_EX:
#endif
#ifdef F_SETOWN_EX
  case F_SETOWN_EX:
#endif
    rc = ci_sys_fcntl(fdinfo->fd, cmd, arg);
    if( rc != 0 )
        break;
    p->b.sigown = arg;
    if( p->b.sigown && (p->b.sb_aflags & CI_SB_AFLAG_O_ASYNC) )
      ci_bit_set(&p->b.wake_request, CI_SB_FLAG_WAKE_RX_B);
    break;
#ifdef F_SETPIPE_SZ
  case F_SETPIPE_SZ:
    /* System pipe buf size is rounded up to power of two. We
     * cannot replicate this.
     */
    rc = ci_pipe_set_size(epi->ni, p, arg);
    if( rc < 0 ) {
        errno = EINVAL;
        rc = CI_SOCKET_ERROR;
        break;
    }
    rc = 0;
    break;
#endif
#ifdef F_GETPIPE_SZ
  case F_GETPIPE_SZ:
    rc = (p->bufs_max - 1) * OO_PIPE_BUF_MAX_SIZE;
    break;
#endif
  default:
    /* fixme kostik: logging should include some pipe identification */
    errno = ENOTSUP;
    rc = CI_SOCKET_ERROR;
  }

  Log_VSC(log("%s(%d, %d, %ld) = %d  (errno=%d)",
              __FUNCTION__, fdinfo->fd, cmd, arg, rc, errno));

  return rc;
}
示例#9
0
文件: udp_fd.c 项目: ido/openonload
static int citp_udp_socket(int domain, int type, int protocol)
{
  citp_fdinfo* fdi;
  citp_sock_fdi* epi;
  ef_driver_handle fd;
  int rc;
  ci_netif* ni;

  Log_V(log(LPF "socket(%d, %d, %d)", domain, type, protocol));

  epi = CI_ALLOC_OBJ(citp_sock_fdi);
  if( ! epi ) {
    Log_U(ci_log(LPF "socket: failed to allocate epi"));
    errno = ENOMEM;
    goto fail1;
  }
  fdi = &epi->fdinfo;
  citp_fdinfo_init(fdi, &citp_udp_protocol_impl);

  rc = citp_netif_alloc_and_init(&fd, &ni);
  if( rc != 0 ) {
    if( rc == CI_SOCKET_HANDOVER ) {
      /* This implies EF_DONT_ACCELERATE is set, so we handover
       * regardless of CITP_OPTS.no_fail */
      CI_FREE_OBJ(epi);
      return rc;
    }
    goto fail2;
  }

  /* Protect the fdtable entry until we're done initialising. */
  if( fdtable_strict() )  CITP_FDTABLE_LOCK();
  if((fd = ci_udp_ep_ctor(&epi->sock, ni, domain, type)) < 0) {
    /*! ?? \TODO unpick the ci_udp_ep_ctor according to how failed */
    Log_U(ci_log(LPF "socket: udp_ep_ctor failed"));
    errno = -fd;
    goto fail3;
  }

  citp_fdtable_new_fd_set(fd, fdip_busy, fdtable_strict());
  if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();

  CI_DEBUG(epi->sock.s->pid = getpid());

  /* We're ready.  Unleash us onto the world! */
  ci_assert(epi->sock.s->b.sb_aflags & CI_SB_AFLAG_NOT_READY);
  ci_atomic32_and(&epi->sock.s->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY);
  citp_fdtable_insert(fdi, fd, 0);

  Log_VSS(log(LPF "socket(%d, %d, %d) = "EF_FMT, domain, type, protocol,
              EF_PRI_ARGS(epi,fd)));
  return fd;

 fail3:
  if( CITP_OPTS.no_fail && errno != ELIBACC )
    CITP_STATS_NETIF(++ni->state->stats.udp_handover_socket);
  citp_netif_release_ref(ni, 0);
 fail2:
  CI_FREE_OBJ(epi);
 fail1:
  /* BUG1408: Graceful failure. We'll only fail outright if there's a
   * driver/library mismatch */
  if( CITP_OPTS.no_fail && errno != ELIBACC ) {
    Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno));
    return CI_SOCKET_HANDOVER;
  }
  return -1;
}