Exemplo n.º 1
0
static int citp_udp_shutdown(citp_fdinfo* fdinfo, int how)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  int rc;

  Log_V(ci_log("%s("EF_FMT", %d)", __FUNCTION__, EF_PRI_ARGS(epi,fdinfo->fd), how));

  ci_netif_lock_fdi(epi);
  rc = ci_udp_shutdown(&epi->sock, fdinfo->fd, how);
  ci_netif_unlock_fdi(epi);
  Log_V(log(LPF "shutdown: fd=%d rc=%d", fdinfo->fd, rc));
  return rc;
}
Exemplo n.º 2
0
static int citp_udp_ioctl(citp_fdinfo* fdinfo, int request, void* arg)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  int rc;

  Log_V(log("%s("EF_FMT", %d, 0x%lx)", __FUNCTION__,
            EF_PRI_ARGS(epi, fdinfo->fd), request, (long) arg));

  rc = ci_udp_ioctl(&epi->sock, fdinfo->fd, request, arg);
  Log_V(log(LPF "ioctl()=%d", rc));
  if( rc < 0 )
    CI_SET_ERROR(rc, -rc);
  return rc;
}
Exemplo n.º 3
0
static int citp_udp_setsockopt(citp_fdinfo* fdinfo, int level,
		       int optname, const void* optval, socklen_t optlen)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  citp_socket* ep    = &epi->sock;
  ci_sock_cmn* s     = ep->s;
  int rc;

  Log_VSC(log("%s("EF_FMT", %d, %d)", __FUNCTION__,
	      EF_PRI_ARGS(epi, fdinfo->fd),  level, optname));

  rc = ci_udp_setsockopt(&epi->sock, fdinfo->fd,
			 level, optname, optval, optlen);

  Log_V(log(LPF "setsockopt: fd=%d rc=%d", fdinfo->fd, rc));

  if( rc == CI_SOCKET_HANDOVER ) {
    CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_setsockopt);
    citp_fdinfo_handover(fdinfo, -1);
    return 0;
  }

  if( ci_opt_is_setting_reuseport(level, optname, optval, optlen) != 0 &&
      ! CI_SOCK_NOT_BOUND(s) ) {
    ci_log("%s: setting reuseport after binding on udp not supported",
           __FUNCTION__);
    return -ENOSYS;
  }

  citp_fdinfo_release_ref(fdinfo, 0);
  return rc;
}
Exemplo n.º 4
0
citp_fdinfo_p citp_fdtable_busy_wait(unsigned fd, int fdt_locked)
{
  volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip;
  citp_fdtable_waiter waiter;
  int saved_errno = errno;

  Log_V(ci_log("%s: fd=%u", __FUNCTION__, fd));

  ci_assert(ci_is_multithreaded());

  oo_rwlock_cond_init(&waiter.cond);
  oo_rwlock_cond_lock(&waiter.cond);
 again:
  waiter.next = *p_fdip;
  if( fdip_is_busy(waiter.next) ) {
    /* we can replace one "busy" fdip by another without fdtable lock */
    if( fdip_cas_succeed(p_fdip, waiter.next, waiter_to_fdip(&waiter)) )
      oo_rwlock_cond_wait(&waiter.cond);
    goto again;
  }
  oo_rwlock_cond_unlock(&waiter.cond);
  oo_rwlock_cond_destroy(&waiter.cond);

  errno = saved_errno;
  return waiter.next;
}
Exemplo n.º 5
0
static citp_fdinfo_p citp_fdtable_closing_wait(unsigned fd, int fdt_locked)
{
  /* We're currently spinning in this case.  Not ideal, but implementing
  ** blocking here is slightly tricky.  (Can be done, but I want proof that
  ** it's needed first!)
  */
  volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip;
  citp_fdinfo_p fdip;

  Log_V(ci_log("%s: fd=%u", __FUNCTION__, fd));

 again:
  fdip = *p_fdip;
  if( fdip_is_busy(fdip)    )  fdip = citp_fdtable_busy_wait(fd, fdt_locked);
  if( fdip_is_closing(fdip) ) {
    if( fdt_locked ) {
      /* Need to drop the lock to avoid deadlock with the other thread
      ** trying to closing this fd! */
      CITP_FDTABLE_UNLOCK();
      CITP_FDTABLE_LOCK();
    }
    goto again;
  }
  return fdip;
}
Exemplo n.º 6
0
static int citp_udp_connect(citp_fdinfo* fdinfo,
			    const struct sockaddr* sa, socklen_t sa_len,
                            citp_lib_context_t* lib_context)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  int rc;

  Log_V(log(LPF "connect(%d, sa, %d)", fdinfo->fd, sa_len));

  if( (epi->sock.s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) != 0 ) {
    log("ERROR: connect of socket with SO_REUSEPORT not supported unless "
        "supported by the OS.");
    return -1;
  }

  ci_netif_lock_fdi(epi);
  rc = ci_udp_connect(&epi->sock, fdinfo->fd, sa, sa_len);
  ci_netif_unlock_fdi(epi);

  if( rc == CI_SOCKET_HANDOVER ) {
    CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_connect);
    citp_fdinfo_handover(fdinfo, -1);
    return 0;
  }

  citp_fdinfo_release_ref( fdinfo, 0 );
  return rc;
}
Exemplo n.º 7
0
static int citp_udp_sendmmsg(citp_fdinfo* fdinfo, struct mmsghdr* mmsg, 
                             unsigned vlen, int flags)
{
  citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo);
  ci_udp_iomsg_args a;
  int i, rc;

  Log_V(log(LPF "sendmmsg(%d, msg, %u, %#x)", fdinfo->fd, vlen, 
            (unsigned) flags));

  if( vlen == 0 ) 
    return 0;

  a.ep = &epi->sock;
  a.fd = fdinfo->fd;
  a.ni = epi->sock.netif;
  a.us = SOCK_TO_UDP(epi->sock.s);

  i = 0;

  do {
    rc = ci_udp_sendmsg(&a, &mmsg[i].msg_hdr, flags);
    if(CI_LIKELY( rc >= 0 ) )
      mmsg[i].msg_len = rc;
    ++i;
  } while( rc >= 0 && i < vlen );
  return (rc>=0) ? i : rc;
}
Exemplo n.º 8
0
static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked)
{
  int rc;
  citp_fdinfo* epoll_fdi = NULL;
  int os_fd = fdi->fd;
#ifndef NDEBUG
  /* Yuk: does for UDP too. */
  volatile citp_fdinfo_p* p_fdip;
  p_fdip = &citp_fdtable.table[fdi->fd].fdip;
  ci_assert(fdip_is_busy(*p_fdip));
#endif


  Log_V(ci_log("%s: fd=%d nonb_switch=%d", __FUNCTION__, fdi->fd,
	       fdi->on_rcz.handover_nonb_switch));

  if( fdi->epoll_fd >= 0 ) {
    epoll_fdi = citp_epoll_fdi_from_member(fdi, fdt_locked);
    if( epoll_fdi->protocol->type == CITP_EPOLLB_FD )
      citp_epollb_on_handover(epoll_fdi, fdi);
  }
  rc = fdtable_fd_move(fdi->fd, OO_IOC_TCP_HANDOVER);
  if( rc == -EBUSY && fdi->epoll_fd >= 0 ) {
    ci_assert(fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags &
              CI_SB_AFLAG_MOVED_AWAY);
    /* If this is our epoll, we can do full handover: we manually add os
     * fd into the epoll set.
     * Fixme: ensure we are not in _other_ epoll sets */
    ci_bit_clear(&fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags,
                 CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL_BIT);
    rc = fdtable_fd_move(fdi->fd, OO_IOC_FILE_MOVED);
  }
  if( rc != 0 ) {
    citp_fdinfo* new_fdi;
    if( ! fdt_locked ) CITP_FDTABLE_LOCK();
    new_fdi = citp_fdtable_probe_locked(fdi->fd, CI_TRUE, CI_TRUE);
    citp_fdinfo_release_ref(new_fdi, 1);
    if( ! fdt_locked ) CITP_FDTABLE_UNLOCK();
    ci_assert_equal(citp_fdinfo_get_type(new_fdi), CITP_PASSTHROUGH_FD);
    os_fd = fdi_to_alien_fdi(new_fdi)->os_socket;
  }
  if( fdi->on_rcz.handover_nonb_switch >= 0 ) {
    int on_off = !! fdi->on_rcz.handover_nonb_switch;
    int rc = ci_sys_ioctl(os_fd, FIONBIO, &on_off);
    if( rc < 0 )
      Log_E(ci_log("%s: ioctl failed on_off=%d", __FUNCTION__, on_off));
  }
  if( rc != 0 )
    goto exit;
  citp_fdtable_busy_clear(fdi->fd, fdip_passthru, fdt_locked);
exit:
  citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked);
  if( epoll_fdi != NULL && epoll_fdi->protocol->type == CITP_EPOLL_FD )
    citp_epoll_on_handover(epoll_fdi, fdi, fdt_locked);
  if( epoll_fdi != NULL )
    citp_fdinfo_release_ref(epoll_fdi, fdt_locked);
  citp_fdinfo_free(fdi);
}
Exemplo n.º 9
0
void __citp_fdinfo_ref_count_zero(citp_fdinfo* fdi, int fdt_locked)
{
  Log_V(log("%s: fd=%d on_rcz=%d", __FUNCTION__, fdi->fd,
	    fdi->on_ref_count_zero));

  citp_fdinfo_assert_valid(fdi);
  ci_assert(oo_atomic_read(&fdi->ref_count) == 0);
  ci_assert_ge(fdi->fd, 0);
  ci_assert_lt(fdi->fd, citp_fdtable.inited_count);
  ci_assert_nequal(fdi_to_fdip(fdi), citp_fdtable.table[fdi->fd].fdip);

  switch( fdi->on_ref_count_zero ) {
  case FDI_ON_RCZ_CLOSE:
#if CI_CFG_FD_CACHING
    if( citp_fdinfo_get_ops(fdi)->cache(fdi) == 1 ) {
      if( ! fdt_locked && fdtable_strict() )  CITP_FDTABLE_LOCK();
      fdtable_swap(fdi->fd, fdip_closing, fdip_unknown,
                   fdt_locked | fdtable_strict());
      citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict());
      if( ! fdt_locked && fdtable_strict() )  CITP_FDTABLE_UNLOCK();
      citp_fdinfo_free(fdi);
      break;
    }
    else 
#endif
    {
      if( ! fdt_locked && fdtable_strict() )  CITP_FDTABLE_LOCK();
      ci_tcp_helper_close_no_trampoline(fdi->fd);
      /* The swap must occur after the close, otherwise another thread could
       * cause a probe of the old endpoint info, which is about be freed.
       */
      fdtable_swap(fdi->fd, fdip_closing, fdip_unknown,
		   fdt_locked | fdtable_strict());
      citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked | fdtable_strict());
      if( ! fdt_locked && fdtable_strict() )  CITP_FDTABLE_UNLOCK();
      citp_fdinfo_free(fdi);
      break;
    }
  case FDI_ON_RCZ_DUP2:
    dup2_complete(fdi, fdi_to_fdip(fdi), fdt_locked);
    break;
  case FDI_ON_RCZ_HANDOVER:
    citp_fdinfo_do_handover(fdi, fdt_locked);
    break;
  case FDI_ON_RCZ_MOVED:
    citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked);
    citp_fdinfo_free(fdi);
    break;
  default:
    CI_DEBUG(ci_log("%s: fd=%d on_ref_count_zero=%d", __FUNCTION__,
		    fdi->fd, fdi->on_ref_count_zero));
    ci_assert(0);
  }
}
Exemplo n.º 10
0
int citp_ep_ioctl(citp_fdinfo* fdinfo, unsigned long request, long arg)
{
  CITP_FDINFO_ASSERT_VALID(fdinfo);

  Log_V(log(LPF "ioctl(%d, %lu, %ld)", fdinfo->fd, request, arg));

  /*! \TODO see /usr/include/bits/ioctls.h for lots of socketey ones */

  ci_fail(("?? not yet implemented"));
  errno = ENOTSUP;
  return -1;
}
Exemplo n.º 11
0
/* 2004/08/16 stg: added [fdt_locked] to allow the fd table to be
 * locked before this function.  [fdt_locked] = 0 for legacy operation
 */
void __citp_epinfo_ref_count_zero(citp_epinfo* epinfo,citp_fdinfo* last_fdinfo, 
				  int fdt_locked)
{
  Log_V(log(LPF "ref_count_zero(%p, %d)", epinfo, last_fdinfo->fd));

  ci_assert(epinfo);
  ci_assert(oo_atomic_read(&epinfo->ref_count) == 0);
  ci_assert(epinfo->protocol);
  ci_assert(last_fdinfo);
  ci_assert(last_fdinfo->ep == epinfo);

  epinfo->protocol->ops.dtor(epinfo, last_fdinfo, fdt_locked);
}
Exemplo n.º 12
0
static int citp_udp_getpeername(citp_fdinfo* fdinfo,
				struct sockaddr* sa, socklen_t* p_sa_len)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  int rc;

  Log_V(log("%s("EF_FMT")", __FUNCTION__, EF_PRI_ARGS(epi,fdinfo->fd)));

  ci_netif_lock_fdi(epi);
  rc = ci_udp_getpeername(&epi->sock, sa, p_sa_len);
  ci_netif_unlock_fdi(epi);
  return rc;
}
Exemplo n.º 13
0
static int citp_udp_recv(citp_fdinfo* fdinfo, struct msghdr* msg, int flags)
{
  citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo);
  ci_udp_iomsg_args a;

  Log_V(log(LPF "recv(%d, msg, %#x)", fdinfo->fd, (unsigned) flags));

  a.fd = fdinfo->fd;
  a.ep = &epi->sock;
  a.ni = epi->sock.netif;
  a.us = SOCK_TO_UDP(epi->sock.s);

  return ci_udp_recvmsg( &a, msg, flags);
}
Exemplo n.º 14
0
static int citp_udp_getsockopt(citp_fdinfo* fdinfo, int level,
			       int optname, void* optval, socklen_t* optlen)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  int rc;

  Log_V(log("%s("EF_FMT", %d, %d)", __FUNCTION__, EF_PRI_ARGS(epi,fdinfo->fd),
            level, optname ));

  ci_netif_lock_fdi(epi);
  rc = ci_udp_getsockopt(&epi->sock, fdinfo->fd,
			 level, optname, optval, optlen);
  ci_netif_unlock_fdi(epi);
  return rc;
}
Exemplo n.º 15
0
static int citp_udp_recvmmsg(citp_fdinfo* fdinfo, struct mmsghdr* msg, 
                             unsigned vlen, int flags, 
                             const struct timespec *timeout)
{
  citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo);
  ci_udp_iomsg_args a;

  Log_V(log(LPF "recvmmsg(%d, msg, %u, %#x)", fdinfo->fd, vlen, 
            (unsigned) flags));

  a.fd = fdinfo->fd;
  a.ep = &epi->sock;
  a.ni = epi->sock.netif;
  a.us = SOCK_TO_UDP(epi->sock.s);

  return ci_udp_recvmmsg(&a, msg, vlen, flags, timeout);
}
Exemplo n.º 16
0
static int citp_udp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa,
			 socklen_t sa_len)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  citp_socket* ep = &epi->sock;
  ci_sock_cmn* s = ep->s;
  int rc;

  Log_V(log(LPF "bind(%d, sa, %d)", fdinfo->fd, sa_len));

  ci_udp_handle_force_reuseport(fdinfo->fd, ep, sa, sa_len);

  if( (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0 ) {
    if( (rc = ci_udp_reuseport_bind(ep, fdinfo->fd, sa, sa_len)) == 0 ) {
      /* The socket has moved so need to reprobe the fd.  This will also
       * map the the new stack into user space of the executing process.
       */
      fdinfo = citp_fdtable_lookup(fdinfo->fd);
      fdinfo = citp_reprobe_moved(fdinfo, CI_FALSE);
      epi = fdi_to_sock_fdi(fdinfo);
      ep = &epi->sock;
      ci_netif_cluster_prefault(ep->netif);
    }
    else {
      goto done;
    }
  }

  ci_netif_lock_fdi(epi);
  rc = ci_udp_bind(ep, fdinfo->fd, sa, sa_len);
  ci_netif_unlock_fdi(epi);

 done:
  if( rc == CI_SOCKET_HANDOVER ) {
    ci_assert_equal(s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY, 0);
    CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_bind);
    citp_fdinfo_handover(fdinfo, -1);
    return 0;
  }

  citp_fdinfo_release_ref( fdinfo, 0 );
  return rc;
}
Exemplo n.º 17
0
/* Find out what sort of thing [fd] is, and if it is a user-level socket
 * then map in the user-level state.
 */
static citp_fdinfo * citp_fdtable_probe_locked(unsigned fd, int print_banner,
                                               int fdip_is_already_busy)
{
  citp_fdinfo* fdi = NULL;
  struct stat64 st;
  ci_ep_info_t info;


  if( ! fdip_is_already_busy ) {
    volatile citp_fdinfo_p* p_fdip;
    citp_fdinfo_p fdip;
    /* ?? We're repeating some effort already expended in lookup() here, but
    ** this keeps it cleaner.  May optimise down the line when I understand
    ** what other code needs to call this.
    */
    
    p_fdip = &citp_fdtable.table[fd].fdip;
   again:
    fdip = *p_fdip;
    if( fdip_is_busy(fdip) )  fdip = citp_fdtable_busy_wait(fd, 1);
    if( ! fdip_is_unknown(fdip) && ! fdip_is_normal(fdip) )  goto exit;
    if( fdip_cas_fail(p_fdip, fdip, fdip_busy) )  goto again;
    
    if( fdip_is_normal(fdip) ) {
      fdi = fdip_to_fdi(fdip);
      citp_fdinfo_ref(fdi);
      citp_fdtable_busy_clear(fd, fdip, 1);
      goto exit;
    }
  }

  if( ci_sys_fstat64(fd, &st) != 0 ) {
    /* fstat() failed.  Must be a bad (closed) file descriptor, so
    ** leave this entry as unknown.  Return citp_the_closed_fd to avoid the
    ** caller passing through to an fd that is created asynchronously.
    */
    citp_fdtable_busy_clear(fd, fdip_unknown, 1);
    fdi = &citp_the_closed_fd;
    citp_fdinfo_ref(fdi);
    goto exit;
  }

  /* oo_get_st_rdev() and oo_onloadfs_dev_t() open-and-close fd, so
   * fdtable should be locked if strict mode requested. */
  if( fdtable_strict() )  { CITP_FDTABLE_ASSERT_LOCKED(1); }

  if(  st.st_dev == oo_onloadfs_dev_t() ) {
    /* Retrieve user-level endpoint info */
    if( oo_ep_info(fd, &info) < 0 ) {
      Log_V(log("%s: fd=%d type=%d unknown", __FUNCTION__,fd,info.fd_type));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;
    }

    switch( info.fd_type ) {
    case CI_PRIV_TYPE_TCP_EP:
    case CI_PRIV_TYPE_UDP_EP:
    case CI_PRIV_TYPE_PASSTHROUGH_EP:
    case CI_PRIV_TYPE_ALIEN_EP:
#if CI_CFG_USERSPACE_PIPE
    case CI_PRIV_TYPE_PIPE_READER:
    case CI_PRIV_TYPE_PIPE_WRITER:
#endif
    {
      citp_fdinfo_p fdip;

      Log_V(log("%s: fd=%d %s restore", __FUNCTION__, fd,
		info.fd_type == CI_PRIV_TYPE_TCP_EP ? "TCP":
#if CI_CFG_USERSPACE_PIPE
                info.fd_type != CI_PRIV_TYPE_UDP_EP ? "PIPE" :
#endif
                "UDP"));
      fdip = citp_fdtable_probe_restore(fd, &info, print_banner);
      if( fdip_is_normal(fdip) )
        fdi = fdip_to_fdi(fdip);
      else
        citp_fdtable_busy_clear(fd, fdip, 1);
      goto exit;
    }

    case CI_PRIV_TYPE_NETIF:
      /* This should never happen, because netif fds are close-on-exec.
      ** But let's leave this code here just in case my reasoning is bad.
      */
      Log_U(log("%s: fd=%d NETIF reserved", __FUNCTION__, fd));
      citp_fdtable_busy_clear(fd, fdip_reserved, 1);
      fdi = &citp_the_reserved_fd;
      citp_fdinfo_ref(fdi);
      goto exit;

    case CI_PRIV_TYPE_NONE:
      /* This happens if a thread gets at an onload driver fd that has just
       * been created, but not yet specialised.  On Linux I think this
       * means it will shortly be a new netif internal fd.  (fds associated
       * with sockets and pipes are never unspecialised).
       */
      Log_V(log("%s: fd=%d TYPE_NONE", __FUNCTION__, fd));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;

    default:
      CI_TEST(0);
      break;
    }
  }
  else if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_EPOLL_DEV)) ) {
    citp_epollb_fdi *epi = CI_ALLOC_OBJ(citp_epollb_fdi);
    if( ! epi ) {
      Log_E(log("%s: out of memory (epoll_fdi)", __FUNCTION__));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;
    }
    oo_epollb_ctor(epi);
    fdi = &epi->fdinfo;
    citp_fdinfo_init(fdi, &citp_epollb_protocol_impl);
    citp_fdinfo_ref(fdi);
    citp_fdtable_insert(fdi, fd, 1);
    goto exit;
  }

#ifndef NDEBUG
  /* /dev/onload may be netif only; they are closed on fork or exec */
  if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_STACK_DEV)) )
    Log_U(log("%s: %d is /dev/onload", __FUNCTION__, fd));
#endif

  /* Not one of ours, so pass-through. */
  Log_V(log("%s: fd=%u non-efab", __FUNCTION__, fd));
  citp_fdtable_busy_clear(fd, fdip_passthru, 1);

 exit:
  return fdi;

}
Exemplo n.º 18
0
int citp_ep_close(unsigned fd)
{
  volatile citp_fdinfo_p* p_fdip;
  citp_fdinfo_p fdip;
  int rc, got_lock;
  citp_fdinfo* fdi;

  /* Do not touch shared fdtable when in vfork child. */
  if( oo_per_thread_get()->in_vfork_child )
    return ci_tcp_helper_close_no_trampoline(fd);

  /* Interlock against other closes, against the fdtable being extended,
  ** and against select and poll.
  */
  CITP_FDTABLE_LOCK();
  got_lock = 1;

  __citp_fdtable_extend(fd);

  if( fd >= citp_fdtable.inited_count ) {
    rc = ci_sys_close(fd);
    goto done;
  }

  p_fdip = &citp_fdtable.table[fd].fdip;
 again:
  fdip = *p_fdip;
  if( fdip_is_busy(fdip) )  fdip = citp_fdtable_busy_wait(fd, 1);

  if( fdip_is_closing(fdip) | fdip_is_reserved(fdip) ) {
    /* Concurrent close or attempt to close reserved. */
    Log_V(ci_log("%s: fd=%d closing=%d reserved=%d", __FUNCTION__, fd,
		 fdip_is_closing(fdip), fdip_is_reserved(fdip)));
    errno = EBADF;
    rc = -1;
    goto done;
  }

#if CI_CFG_FD_CACHING
  /* Need to check in case this sucker's cached */
  if( fdip_is_unknown(fdip) ) {
    fdi = citp_fdtable_probe_locked(fd, CI_FALSE, CI_FALSE);
    if( fdi == &citp_the_closed_fd ) {
      citp_fdinfo_release_ref(fdi, CI_TRUE);
      errno = EBADF;
      rc = -1;
      goto done;
    }
    if( fdi )
      citp_fdinfo_release_ref(fdi, CI_TRUE);
  }
#endif

  ci_assert(fdip_is_normal(fdip) | fdip_is_passthru(fdip) |
	    fdip_is_unknown(fdip));

  /* Swap in the "closed" pseudo-fdinfo.  This lets any other thread know
  ** that we're in the middle of closing this fd.
  */
  if( fdip_cas_fail(p_fdip, fdip, fdip_closing) )
    goto again;

  if( fdip_is_normal(fdip) ) {
    fdi = fdip_to_fdi(fdip);

    CITP_FDTABLE_UNLOCK();
    got_lock = 0;

    if( fdi->is_special ) {
      Log_V(ci_log("%s: fd=%d is_special, returning EBADF", __FUNCTION__, fd));
      errno = EBADF;
      rc = -1;
      fdtable_swap(fd, fdip_closing, fdip, 0);
      goto done;
    }

    Log_V(ci_log("%s: fd=%d u/l socket", __FUNCTION__, fd));
    ci_assert_equal(fdi->fd, fd);
    ci_assert_equal(fdi->on_ref_count_zero, FDI_ON_RCZ_NONE);
    fdi->on_ref_count_zero = FDI_ON_RCZ_CLOSE;

    if( fdi->epoll_fd >= 0 ) {
      citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(fdi, 0);
      if( epoll_fdi ) {
        if( epoll_fdi->protocol->type == CITP_EPOLL_FD )
          citp_epoll_on_close(epoll_fdi, fdi, 0);
        citp_fdinfo_release_ref(epoll_fdi, 0);
      }
    }

    citp_fdinfo_release_ref(fdi, 0);
    rc = 0;
  }
  else {
    ci_assert(fdip_is_passthru(fdip) ||
	      fdip_is_unknown(fdip));
    if( ! fdtable_strict() ) {
      CITP_FDTABLE_UNLOCK();
      got_lock = 0;
    }
    Log_V(ci_log("%s: fd=%d passthru=%d unknown=%d", __FUNCTION__, fd,
		 fdip_is_passthru(fdip), fdip_is_unknown(fdip)));
    fdtable_swap(fd, fdip_closing, fdip_unknown, fdtable_strict());
    rc = ci_tcp_helper_close_no_trampoline(fd);
  }

 done:
  if( got_lock )  CITP_FDTABLE_UNLOCK();
  FDTABLE_ASSERT_VALID();
  return rc;
}
Exemplo n.º 19
0
int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags)
{
  volatile citp_fdinfo_p* p_tofdip;
  citp_fdinfo_p tofdip;
  unsigned max;

  Log_V(log("%s(%d, %d)", __FUNCTION__, fromfd, tofd));

  /* Must be checked by callers. */
  ci_assert(fromfd != tofd);

  /* Hack: if [tofd] is the fd we're using for logging, we'd better choose
  ** a different one!
  */
  if( tofd == citp.log_fd )  citp_log_change_fd();

  ci_assert(citp.init_level >= CITP_INIT_FDTABLE);

  max = CI_MAX(fromfd, tofd);
  if( max >= citp_fdtable.inited_count ) {
    ci_assert(max < citp_fdtable.size);
    CITP_FDTABLE_LOCK();
    __citp_fdtable_extend(max);
    CITP_FDTABLE_UNLOCK();
  }

  /* Bug1151: Concurrent threads doing dup2(x,y) and dup2(y,x) can deadlock
  ** against one another.  So we take out a fat lock to prevent concurrent
  ** dup2()s.
  */
  /* Lock tofd.  We need to interlock against select and poll etc, so we
  ** also grab the exclusive lock.  Also grab the bug1151 lock.
  */
  pthread_mutex_lock(&citp_dup_lock);
  CITP_FDTABLE_LOCK();
  p_tofdip = &citp_fdtable.table[tofd].fdip;
 lock_tofdip_again:
  tofdip = *p_tofdip;
  if( fdip_is_busy(tofdip) )
    tofdip = citp_fdtable_busy_wait(tofd, 1);
  if( fdip_is_closing(tofdip) )
    tofdip = citp_fdtable_closing_wait(tofd, 1);
  if( fdip_is_reserved(tofdip) ) {
    /* ?? FIXME: we can't cope with this at the moment */
    CITP_FDTABLE_UNLOCK();
    Log_U(log("%s(%d, %d): target is reserved", __FUNCTION__, fromfd, tofd));
    errno = EBUSY;
    tofd = -1;
    goto out;
  }
  if( fdip_cas_fail(p_tofdip, tofdip, fdip_busy) )
    goto lock_tofdip_again;
  CITP_FDTABLE_UNLOCK();
  ci_assert(fdip_is_normal(tofdip) | fdip_is_passthru(tofdip) |
 	    fdip_is_unknown(tofdip));

  if( fdip_is_normal(tofdip) ) {
    /* We're duping onto a user-level socket. */
    citp_fdinfo* tofdi = fdip_to_fdi(tofdip);
    if( tofdi->epoll_fd >= 0 ) {
      citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(tofdi, 0);
      if( epoll_fdi ) {
        if( epoll_fdi->protocol->type == CITP_EPOLL_FD )
          citp_epoll_on_close(epoll_fdi, tofdi, 0);
        citp_fdinfo_release_ref(epoll_fdi, 0);
      }
    }
    ci_assert_equal(tofdi->on_ref_count_zero, FDI_ON_RCZ_NONE);
    tofdi->on_ref_count_zero = FDI_ON_RCZ_DUP2;
    tofdi->on_rcz.dup3_args.fd = fromfd;
    tofdi->on_rcz.dup3_args.flags = flags;
    citp_fdinfo_release_ref(tofdi, 0);
    {
      int i = 0;
      /* We need to free this fdi.  If someone is using it right now,
       * we are in trouble.  So, we spin for a while and interrupt the
       * user.  See bug 28123. */
      while( tofdi->on_ref_count_zero != FDI_ON_RCZ_DONE ) {
        if( ci_is_multithreaded() && i % 10000 == 9999 ) {
          pthread_t pth = tofdi->thread_id;
          if( pth !=  pthread_self() && pth != PTHREAD_NULL ) {
            pthread_kill(pth, SIGONLOAD);
            sleep(1);
          }
        }
        ci_spinloop_pause();
        i++;
      }
      ci_rmb();
    }
    if( tofdi->on_rcz.dup2_result < 0 ) {
      errno = -tofdi->on_rcz.dup2_result;
      /* Need to re-insert [tofdi] into the table. */
      ci_assert_equal(oo_atomic_read(&tofdi->ref_count), 0);
      oo_atomic_set(&tofdi->ref_count, 1);
      CI_DEBUG(tofdi->on_ref_count_zero = FDI_ON_RCZ_NONE);
      citp_fdtable_busy_clear(tofd, tofdip, 0);
      tofd = -1;
    }
    else {
      ci_assert(tofdi->on_rcz.dup2_result == tofd);
      citp_fdinfo_get_ops(tofdi)->dtor(tofdi, 0);
      citp_fdinfo_free(tofdi);
    }
    goto out;
  }

  ci_assert(fdip_is_passthru(tofdip) | fdip_is_unknown(tofdip));

  { /* We're dupping onto an O/S descriptor, or it may be closed.  Create a
    ** dummy [citp_fdinfo], just so we can share code with the case above.
    */
    citp_fdinfo fdi;
    fdi.fd = tofd;
    fdi.on_rcz.dup3_args.fd = fromfd;
    fdi.on_rcz.dup3_args.flags = flags;
    dup2_complete(&fdi, tofdip, 0);
    if( fdi.on_rcz.dup2_result < 0 ) {
      errno = -fdi.on_rcz.dup2_result;
      citp_fdtable_busy_clear(tofd, tofdip, 0);
      tofd = -1;
    }
    else
      ci_assert(fdi.on_rcz.dup2_result == tofd);
  }

 out:
  pthread_mutex_unlock(&citp_dup_lock);
  return tofd;
}
Exemplo n.º 20
0
/*
** Why do these live here?  Because they need to hack into the low-level
** dirty nastiness of the fdtable.
*/
int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg),
		long arg)
{
  /* This implements dup(oldfd) and fcntl(oldfd, F_DUPFD, arg). */

  volatile citp_fdinfo_p* p_oldfdip;
  citp_fdinfo_p oldfdip;
  citp_fdinfo* newfdi = 0;
  citp_fdinfo* oldfdi;
  int newfd;

  Log_V(log("%s(%d)", __FUNCTION__, oldfd));

  if(CI_UNLIKELY( citp.init_level < CITP_INIT_FDTABLE ||
                  oo_per_thread_get()->in_vfork_child ))
    /* Lib not initialised, so no U/L state, and therefore system dup()
    ** will do just fine. */
    return syscall(oldfd, arg);

  if( oldfd >= citp_fdtable.inited_count ) {
    /* NB. We can't just pass through in this case because we need to worry
    ** about other threads racing with us.  So we need to be able to lock
    ** this fd while we do the dup. */
    ci_assert(oldfd < citp_fdtable.size);
    CITP_FDTABLE_LOCK();
    __citp_fdtable_extend(oldfd);
    CITP_FDTABLE_UNLOCK();
  }

  p_oldfdip = &citp_fdtable.table[oldfd].fdip;
 again:
  oldfdip = *p_oldfdip;
  if( fdip_is_busy(oldfdip) )
    oldfdip = citp_fdtable_busy_wait(oldfd, 0);
  if( fdip_is_closing(oldfdip) | fdip_is_reserved(oldfdip) ) {
    errno = EBADF;
    return -1;
  }
#if CI_CFG_FD_CACHING
  /* Need to check in case this sucker's cached */
  if( fdip_is_unknown(oldfdip) ) {
    CITP_FDTABLE_LOCK();
    oldfdi = citp_fdtable_probe_locked(oldfd, CI_FALSE, CI_FALSE);
    CITP_FDTABLE_UNLOCK();
    if( oldfdi == &citp_the_closed_fd ) {
      citp_fdinfo_release_ref(oldfdi, CI_TRUE);
      errno = EBADF;
      return -1;
    }
    if( oldfdi )
      citp_fdinfo_release_ref(oldfdi, CI_TRUE);
  }
#endif
  if( fdip_cas_fail(p_oldfdip, oldfdip, fdip_busy) )
    goto again;

#if CI_CFG_FD_CACHING
  /* May end up with multiple refs to this, don't allow it to be cached. */
  if( fdip_is_normal(oldfdip) )
    fdip_to_fdi(oldfdip)->can_cache = 0;
#endif

  if( fdip_is_normal(oldfdip) &&
      (((oldfdi = fdip_to_fdi(oldfdip))->protocol->type) == CITP_EPOLL_FD) ) {
    newfdi = citp_fdinfo_get_ops(oldfdi)->dup(oldfdi);
    if( ! newfdi ) {
      citp_fdtable_busy_clear(oldfd, oldfdip, 0);
      errno = ENOMEM;
      return -1;
    }

    if( fdtable_strict() )  CITP_FDTABLE_LOCK();
    newfd = syscall(oldfd, arg);
    if( newfd >= 0 )
      citp_fdtable_new_fd_set(newfd, fdip_busy, fdtable_strict());
    if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();
    if( newfd >= 0 ) {
      citp_fdtable_insert(newfdi, newfd, 0);
      newfdi = 0;
    }
  }
  else {
    if( fdtable_strict() )  CITP_FDTABLE_LOCK();
    newfd = syscall(oldfd, arg);
    if( newfd >= 0 && newfd < citp_fdtable.inited_count ) {
      /* Mark newfd as unknown.  When used, it'll get probed.
       *
       * We are not just being lazy here: Setting to unknown rather than
       * installing a proper fdi (when oldfd is accelerated) is essential to
       * vfork()+dup()+exec() working properly.  Reason is that child and
       * parent share address space, so child is modifying the parent's
       * fdtable.  Setting an entry to unknown is safe.
       */
      citp_fdtable_new_fd_set(newfd, fdip_unknown, fdtable_strict());
    }
    if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();
  }

  citp_fdtable_busy_clear(oldfd, oldfdip, 0);
  if( newfdi )  citp_fdinfo_free(newfdi);
  return newfd;
}
Exemplo n.º 21
0
/*! Initialise - called on startup to save away any relevant current
**  environment variables.
** \return          0 for success, -1 for failure
*/
int citp_environ_init(void)
{
  char **env_ptr;
  size_t mem_needed = 0;
  unsigned int n;
  char *string_buf, *p;

  const char *lib_path = NULL;
  const char *ld_preload_value = NULL;

  env_ptr = __environ ? __environ : empty_env;
  saved_env_count = 0;
  while (*env_ptr != NULL) {
    if (is_our_env_var(*env_ptr)) {
      mem_needed += strlen(*env_ptr) + 1;
      saved_env_count++;
      if (env_is_ld_preload(*env_ptr))
        ld_preload_value = *env_ptr + 11;
    }
    /* temporary hack for djr */
    if (strcmp(*env_ptr, "EF_NO_PRELOAD_RESTORE=1") == 0) {
        Log_V(log("Environment restore disabled"));
        saved_env_count = 0;
        return 0;
    }
    /* end temporary hack */
    env_ptr++;
  }
  if (saved_env_count == 0) {
    Log_V(log("Invoked without LD_PRELOAD?  Environment restore disabled."));
    return 0;
  }

  /* Add ourself to LD_PRELOAD if we've been asked to. */
  if (getenv("EF_LD_PRELOAD")) {
    const char *full_path = citp_find_loaded_library();

    ci_assert(full_path);
    ci_assert(strrchr(full_path, '/'));
    lib_path = strrchr(full_path, '/') + 1;

    /* Correct LD_PRELOAD value should be the same as lib_path, or at least
     * start with lib_path+':'. */
    if ((!ld_preload_value ||
       (strlen(ld_preload_value) < strlen(lib_path) ||
       (strncmp(ld_preload_value, lib_path,
                strlen(lib_path)) != 0) ||
       ((ld_preload_value[strlen(lib_path)] != ':') &&
        (ld_preload_value[strlen(lib_path)] != '\0'))
      ))) {

      mem_needed += strlen(lib_path); /* Add our library */
      if (!ld_preload_value) {
        mem_needed += 12; /* Add "LD_PRELOAD=" line */
        saved_env_count++;
      }
      if (ld_preload_value && ld_preload_value[0] == '\0')
        ld_preload_value = NULL; /* Do not set ":" at the end */
      if (ld_preload_value)
        mem_needed++; /* Add ':' separator */
      Log_V(log("%s: LD_PRELOAD=\"%s\", but we are loaded as %s",
                __FUNCTION__, ld_preload_value ? : "", full_path));
    }
    else
Exemplo n.º 22
0
/* we don't register protocol impl */
int citp_pipe_create(int fds[2], int flags)
{
  citp_pipe_fdi* epi_read;
  citp_pipe_fdi* epi_write;
  struct oo_pipe* p = NULL;         /* make compiler happy */
  ci_netif* ni;
  int rc = -1;
  ef_driver_handle fd = -1;

  Log_V(log(LPF "pipe()"));

  /* citp_netif_exists() does not need citp_ul_lock here */
  if( CITP_OPTS.ul_pipe == CI_UNIX_PIPE_ACCELERATE_IF_NETIF &&
      ! citp_netif_exists() ) {
    return CITP_NOT_HANDLED;
  }

  rc = citp_netif_alloc_and_init(&fd, &ni);
  if( rc != 0 ) {
    if( rc == CI_SOCKET_HANDOVER ) {
      /* This implies EF_DONT_ACCELERATE is set, so we handover
       * regardless of CITP_OPTS.no_fail */
      return CITP_NOT_HANDLED;
    }
    /* may be lib mismatch - errno will be ELIBACC */
    goto fail1;
  }
  rc = -1;

  CI_MAGIC_CHECK(ni, NETIF_MAGIC);

  /* add another reference as we have 2 fdis */
  citp_netif_add_ref(ni);

  epi_read = citp_pipe_epi_alloc(ni, O_RDONLY);
  if( epi_read == NULL )
    goto fail2;
  epi_write = citp_pipe_epi_alloc(ni, O_WRONLY);
  if( epi_write == NULL )
    goto fail3;

  /* oo_pipe init code */
  if( fdtable_strict() )  CITP_FDTABLE_LOCK();
  rc = oo_pipe_ctor(ni, &p, fds, flags);
  if( rc < 0 )
      goto fail4;
  citp_fdtable_new_fd_set(fds[0], fdip_busy, fdtable_strict());
  citp_fdtable_new_fd_set(fds[1], fdip_busy, fdtable_strict());
  if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();

  LOG_PIPE("%s: pipe=%p id=%d", __FUNCTION__, p, p->b.bufid);

  /* as pipe is created it should be attached to the end-points */
  epi_read->pipe = p;
  epi_write->pipe = p;

  /* We're ready.  Unleash us onto the world! */
  ci_assert(epi_read->pipe->b.sb_aflags & CI_SB_AFLAG_NOT_READY);
  ci_assert(epi_write->pipe->b.sb_aflags & CI_SB_AFLAG_NOT_READY);
  ci_atomic32_and(&epi_read->pipe->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY);
  ci_atomic32_and(&epi_read->pipe->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY);
  citp_fdtable_insert(&epi_read->fdinfo, fds[0], 0);
  citp_fdtable_insert(&epi_write->fdinfo, fds[1], 0);

  CI_MAGIC_CHECK(ni, NETIF_MAGIC);

  return 0;

fail4:
  if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();
fail3:
  CI_FREE_OBJ(epi_write);
fail2:
  CI_FREE_OBJ(epi_read);
  citp_netif_release_ref(ni, 0);
  citp_netif_release_ref(ni, 0);
fail1:
  if( CITP_OPTS.no_fail && errno != ELIBACC ) {
    Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno));
    return CITP_NOT_HANDLED;
  }

  return rc;
}
Exemplo n.º 23
0
static int citp_udp_socket(int domain, int type, int protocol)
{
  citp_fdinfo* fdi;
  citp_sock_fdi* epi;
  ef_driver_handle fd;
  int rc;
  ci_netif* ni;

  Log_V(log(LPF "socket(%d, %d, %d)", domain, type, protocol));

  epi = CI_ALLOC_OBJ(citp_sock_fdi);
  if( ! epi ) {
    Log_U(ci_log(LPF "socket: failed to allocate epi"));
    errno = ENOMEM;
    goto fail1;
  }
  fdi = &epi->fdinfo;
  citp_fdinfo_init(fdi, &citp_udp_protocol_impl);

  rc = citp_netif_alloc_and_init(&fd, &ni);
  if( rc != 0 ) {
    if( rc == CI_SOCKET_HANDOVER ) {
      /* This implies EF_DONT_ACCELERATE is set, so we handover
       * regardless of CITP_OPTS.no_fail */
      CI_FREE_OBJ(epi);
      return rc;
    }
    goto fail2;
  }

  /* Protect the fdtable entry until we're done initialising. */
  if( fdtable_strict() )  CITP_FDTABLE_LOCK();
  if((fd = ci_udp_ep_ctor(&epi->sock, ni, domain, type)) < 0) {
    /*! ?? \TODO unpick the ci_udp_ep_ctor according to how failed */
    Log_U(ci_log(LPF "socket: udp_ep_ctor failed"));
    errno = -fd;
    goto fail3;
  }

  citp_fdtable_new_fd_set(fd, fdip_busy, fdtable_strict());
  if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();

  CI_DEBUG(epi->sock.s->pid = getpid());

  /* We're ready.  Unleash us onto the world! */
  ci_assert(epi->sock.s->b.sb_aflags & CI_SB_AFLAG_NOT_READY);
  ci_atomic32_and(&epi->sock.s->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY);
  citp_fdtable_insert(fdi, fd, 0);

  Log_VSS(log(LPF "socket(%d, %d, %d) = "EF_FMT, domain, type, protocol,
              EF_PRI_ARGS(epi,fd)));
  return fd;

 fail3:
  if( CITP_OPTS.no_fail && errno != ELIBACC )
    CITP_STATS_NETIF(++ni->state->stats.udp_handover_socket);
  citp_netif_release_ref(ni, 0);
 fail2:
  CI_FREE_OBJ(epi);
 fail1:
  /* BUG1408: Graceful failure. We'll only fail outright if there's a
   * driver/library mismatch */
  if( CITP_OPTS.no_fail && errno != ELIBACC ) {
    Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno));
    return CI_SOCKET_HANDOVER;
  }
  return -1;
}