Ejemplo n.º 1
0
/* Looks up the user-level 'FD info' for a given file descriptor.
** Returns pointer to the user-level 'FD info' for a given file
** descriptor, or NULL if the FD is not user-level.
** NOTE: The reference count of the 'FD info' is incremented, the
**       caller should ensure the reference is dropped when no
**       longer needed by calling citp_fdinfo_release_ref().
*/
citp_fdinfo* citp_fdtable_lookup_noprobe(unsigned fd)
{
  /* Need to be initialised before we can try and grab the lock at the
  ** moment.  TODO: make this more efficient by using a trylock to grab the
  ** fdtable lock, and on fail see if we need to initialise it.
  */
  if( CI_UNLIKELY(citp.init_level < CITP_INIT_FDTABLE) ) {
    if (_citp_do_init_inprogress == 0)
      CI_TRY(citp_do_init(CITP_INIT_ALL));
    else
      CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */

    return NULL;
  }

  if( fd < citp_fdtable.inited_count ) {

    volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip;
    citp_fdinfo_p fdip;

  again:
    /* Swap in the busy marker. */
    fdip = *p_fdip;
    if( fdip_is_normal(fdip) ) {
      if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) {
	/* Bump the reference count. */
	citp_fdinfo* fdi = fdip_to_fdi(fdip);
	citp_fdinfo_ref(fdi);
	/* Swap the busy marker out again. */
	citp_fdtable_busy_clear(fd, fdip, 0);
        return fdi;
      }
      goto again;
    }
    /* Not normal! */
    else if( fdip_is_busy(fdip) ) {
      citp_fdtable_busy_wait(fd, 0);
      goto again;
    }

  }

  return NULL;
}
Ejemplo n.º 2
0
/* This function does some simple tests to ensure that the fdtable makes sense.
 * There are many more tests we could do; feel free to add them at your
 * leisure!
 */
void
citp_fdtable_assert_valid(void)
{
  int i;

  if( ! citp_fdtable.table )  return;

  CITP_FDTABLE_LOCK_RD();

  for( i = 0; i < citp_fdtable.inited_count; i++ ) {
    citp_fdinfo_p fdip = citp_fdtable.table[i].fdip;

    if( fdip_is_normal(fdip) ) {
      citp_fdinfo * fdi = fdip_to_fdi(fdip);

      ci_assert(fdi);
      ci_assert(fdi->protocol);
      if( ( fdi->protocol->type == CITP_TCP_SOCKET ||
            fdi->protocol->type == CITP_UDP_SOCKET )
          && fdi_to_socket(fdi)->s )
	ci_assert(! (fdi_to_socket(fdi)->s->b.sb_aflags & CI_SB_AFLAG_ORPHAN));

      if (!fdi->is_special) {
        /* Ensure the "back pointer" makes sense */
        ci_assert (fdi->fd == i);
        /* Ensure that the reference count is in a vaguely sensible range */
        ci_assert ((oo_atomic_read (&fdi->ref_count) > 0) &&
                   (oo_atomic_read (&fdi->ref_count) < 10000));

        /* 10,000 threads is a bit mad, warn if more than 20 */
        if (oo_atomic_read (&fdi->ref_count) > 20) {
          Log_U (log ("Warning: fd %d's ref-count suspiciously large (%d)\n",
                      i, oo_atomic_read (&fdi->ref_count)));
        }
      }
    }
  }

  CITP_FDTABLE_UNLOCK_RD();
}
Ejemplo n.º 3
0
citp_fdinfo*
citp_fdtable_lookup_fast(citp_lib_context_t* ctx, unsigned fd)
{
  /* Note that if we haven't yet initialised this module, then
  ** [inited_count] will be zero, and the following test will fail.  So the
  ** test for initialisation is done further down...
  **
  ** This is highly performance critial.  DO NOT add any code between here
  ** and the first [return] statement.
  */
  citp_fdinfo* fdi;

  /* Try to avoid entering lib. */
  ctx->thread = NULL;

  if( fd < citp_fdtable.inited_count ) {
    volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip;
    citp_fdinfo_p fdip;

  again:
    fdip = *p_fdip;
    if( fdip_is_normal(fdip) ) {

      citp_enter_lib_if(ctx);
      if( citp_fdtable_is_mt_safe() ) {
	/* No need to use atomic ops or add a ref to the fdi when MT-safe.
         * The definition of "fds_mt_safe" is that the app does not change
         * the meaning of a file descriptor in one thread when it is being
         * used in another thread.
         */
        fdi = fdip_to_fdi(fdip);
        if( ! citp_fdinfo_is_consistent(fdi) )
          fdi = citp_reprobe_moved(fdi, CI_TRUE, CI_FALSE);

	return fdi;
      }
      else {
        /* Swap in the busy marker. */
	if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) {
	  fdi = fdip_to_fdi(fdip);

	  ci_assert(fdi);
	  ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0);
	  ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) ||
		    fdi->fd == fd);
	  /* Bump the reference count. */
	  citp_fdinfo_ref(fdi);

          if( ! citp_fdinfo_is_consistent(fdi) )
            fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_TRUE);
          else {
            /* Swap the busy marker out again. */
            citp_fdtable_busy_clear(fd, fdip, 0);
          }
	  return fdi;
	}
	goto again;
      }
    }

    /* Not normal! */
    if( fdip_is_passthru(fdip) )
      return NULL;

    citp_enter_lib_if(ctx);
    if( fdip_is_busy(fdip) ) {
      citp_fdtable_busy_wait(fd, 0);
      goto again;
    }

    ci_assert(fdip_is_unknown(fdip));
    goto probe;
  }

  if( citp.init_level < CITP_INIT_FDTABLE ) {
    if( _citp_do_init_inprogress == 0 )
      CI_TRY(citp_do_init(CITP_INIT_ALL));
    else
      CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */
  }

  if( fd >= citp_fdtable.size )
    return NULL;

 probe:
  citp_enter_lib_if(ctx);
  fdi = citp_fdtable_probe(fd);
  if( fdi && citp_fdtable_is_mt_safe() )
    citp_fdinfo_release_ref(fdi, 0);
  return fdi;
}
Ejemplo n.º 4
0
citp_fdinfo *
citp_fdtable_lookup(unsigned fd)
{
  /* Note that if we haven't yet initialised this module, then
  ** [inited_count] will be zero, and the following test will fail.  So the
  ** test for initialisation is done further down...
  **
  ** This is highly performance critial.  DO NOT add any code between here
  ** and the first [return] statement.
  */
  citp_fdinfo* fdi;

  /* In some cases, we'll lock fdtable.  Assert that it is possible: */
  ci_assert(oo_per_thread_get()->sig.inside_lib);

  if( fd < citp_fdtable.inited_count ) {

    volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip;
    citp_fdinfo_p fdip;

  again:
    /* Swap in the busy marker. */
    fdip = *p_fdip;

    if( fdip_is_normal(fdip) ) {
      if( citp_fdtable_not_mt_safe() ) {
	if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) {
	  fdi = fdip_to_fdi(fdip);
	  ci_assert(fdi);
	  ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0);
	  ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) ||
		    fdi->fd == fd);
	  /* Bump the reference count. */
	  citp_fdinfo_ref(fdi);

          if( ! citp_fdinfo_is_consistent(fdi) ) {
            /* Something is wrong.  Re-probe. */
            fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_TRUE);
          }
          else {
            /* Swap the busy marker out again. */
            citp_fdtable_busy_clear(fd, fdip, 0);
          }
	  return fdi;
	}
	goto again;
      }
      else {
	/* No need to use atomic ops when single-threaded.  The definition
         * of "fds_mt_safe" is that the app does not change the meaning of
         * a file descriptor in one thread when it is being used in another
         * thread.  In that case I'm hoping this should be safe, but at
         * time of writing I'm really not confident.  (FIXME).
         */
	fdi = fdip_to_fdi(fdip);
        if( ci_is_multithreaded() )
	  citp_fdinfo_ref(fdi);
        else
          ++fdi->ref_count.n;

        if( ! citp_fdinfo_is_consistent(fdi) )
          fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_FALSE);

	return fdi;
      }
    }

    /* Not normal! */
    if( fdip_is_passthru(fdip) )  return NULL;

    if( fdip_is_busy(fdip) ) {
      citp_fdtable_busy_wait(fd, 0);
      goto again;
    }

    ci_assert(fdip_is_unknown(fdip));
    goto probe;
  }

  if (citp.init_level < CITP_INIT_FDTABLE) {
    if (_citp_do_init_inprogress == 0)
      CI_TRY(citp_do_init(CITP_INIT_ALL));
    else
      CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */
  }

  if( fd >= citp_fdtable.size )  return NULL;

 probe:
  fdi = citp_fdtable_probe(fd);

  return fdi;
}
Ejemplo n.º 5
0
/* Find out what sort of thing [fd] is, and if it is a user-level socket
 * then map in the user-level state.
 */
static citp_fdinfo * citp_fdtable_probe_locked(unsigned fd, int print_banner,
                                               int fdip_is_already_busy)
{
  citp_fdinfo* fdi = NULL;
  struct stat64 st;
  ci_ep_info_t info;


  if( ! fdip_is_already_busy ) {
    volatile citp_fdinfo_p* p_fdip;
    citp_fdinfo_p fdip;
    /* ?? We're repeating some effort already expended in lookup() here, but
    ** this keeps it cleaner.  May optimise down the line when I understand
    ** what other code needs to call this.
    */
    
    p_fdip = &citp_fdtable.table[fd].fdip;
   again:
    fdip = *p_fdip;
    if( fdip_is_busy(fdip) )  fdip = citp_fdtable_busy_wait(fd, 1);
    if( ! fdip_is_unknown(fdip) && ! fdip_is_normal(fdip) )  goto exit;
    if( fdip_cas_fail(p_fdip, fdip, fdip_busy) )  goto again;
    
    if( fdip_is_normal(fdip) ) {
      fdi = fdip_to_fdi(fdip);
      citp_fdinfo_ref(fdi);
      citp_fdtable_busy_clear(fd, fdip, 1);
      goto exit;
    }
  }

  if( ci_sys_fstat64(fd, &st) != 0 ) {
    /* fstat() failed.  Must be a bad (closed) file descriptor, so
    ** leave this entry as unknown.  Return citp_the_closed_fd to avoid the
    ** caller passing through to an fd that is created asynchronously.
    */
    citp_fdtable_busy_clear(fd, fdip_unknown, 1);
    fdi = &citp_the_closed_fd;
    citp_fdinfo_ref(fdi);
    goto exit;
  }

  /* oo_get_st_rdev() and oo_onloadfs_dev_t() open-and-close fd, so
   * fdtable should be locked if strict mode requested. */
  if( fdtable_strict() )  { CITP_FDTABLE_ASSERT_LOCKED(1); }

  if(  st.st_dev == oo_onloadfs_dev_t() ) {
    /* Retrieve user-level endpoint info */
    if( oo_ep_info(fd, &info) < 0 ) {
      Log_V(log("%s: fd=%d type=%d unknown", __FUNCTION__,fd,info.fd_type));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;
    }

    switch( info.fd_type ) {
    case CI_PRIV_TYPE_TCP_EP:
    case CI_PRIV_TYPE_UDP_EP:
    case CI_PRIV_TYPE_PASSTHROUGH_EP:
    case CI_PRIV_TYPE_ALIEN_EP:
#if CI_CFG_USERSPACE_PIPE
    case CI_PRIV_TYPE_PIPE_READER:
    case CI_PRIV_TYPE_PIPE_WRITER:
#endif
    {
      citp_fdinfo_p fdip;

      Log_V(log("%s: fd=%d %s restore", __FUNCTION__, fd,
		info.fd_type == CI_PRIV_TYPE_TCP_EP ? "TCP":
#if CI_CFG_USERSPACE_PIPE
                info.fd_type != CI_PRIV_TYPE_UDP_EP ? "PIPE" :
#endif
                "UDP"));
      fdip = citp_fdtable_probe_restore(fd, &info, print_banner);
      if( fdip_is_normal(fdip) )
        fdi = fdip_to_fdi(fdip);
      else
        citp_fdtable_busy_clear(fd, fdip, 1);
      goto exit;
    }

    case CI_PRIV_TYPE_NETIF:
      /* This should never happen, because netif fds are close-on-exec.
      ** But let's leave this code here just in case my reasoning is bad.
      */
      Log_U(log("%s: fd=%d NETIF reserved", __FUNCTION__, fd));
      citp_fdtable_busy_clear(fd, fdip_reserved, 1);
      fdi = &citp_the_reserved_fd;
      citp_fdinfo_ref(fdi);
      goto exit;

    case CI_PRIV_TYPE_NONE:
      /* This happens if a thread gets at an onload driver fd that has just
       * been created, but not yet specialised.  On Linux I think this
       * means it will shortly be a new netif internal fd.  (fds associated
       * with sockets and pipes are never unspecialised).
       */
      Log_V(log("%s: fd=%d TYPE_NONE", __FUNCTION__, fd));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;

    default:
      CI_TEST(0);
      break;
    }
  }
  else if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_EPOLL_DEV)) ) {
    citp_epollb_fdi *epi = CI_ALLOC_OBJ(citp_epollb_fdi);
    if( ! epi ) {
      Log_E(log("%s: out of memory (epoll_fdi)", __FUNCTION__));
      citp_fdtable_busy_clear(fd, fdip_passthru, 1);
      goto exit;
    }
    oo_epollb_ctor(epi);
    fdi = &epi->fdinfo;
    citp_fdinfo_init(fdi, &citp_epollb_protocol_impl);
    citp_fdinfo_ref(fdi);
    citp_fdtable_insert(fdi, fd, 1);
    goto exit;
  }

#ifndef NDEBUG
  /* /dev/onload may be netif only; they are closed on fork or exec */
  if( ci_major(st.st_rdev) == ci_major(oo_get_st_rdev(OO_STACK_DEV)) )
    Log_U(log("%s: %d is /dev/onload", __FUNCTION__, fd));
#endif

  /* Not one of ours, so pass-through. */
  Log_V(log("%s: fd=%u non-efab", __FUNCTION__, fd));
  citp_fdtable_busy_clear(fd, fdip_passthru, 1);

 exit:
  return fdi;

}
Ejemplo n.º 6
0
/* Re-probe fdinfo after endpoint was moved to another stack.
 * The function assumes that fdinfo was obtained via citp_fdtable_lookup()
 * or from citp_fdtable_lookup_fast(). */
citp_fdinfo* citp_reprobe_moved(citp_fdinfo* fdinfo, int from_fast_lookup,
                                int fdip_is_already_busy)
{
  int fd = fdinfo->fd;
  citp_fdinfo* new_fdinfo = NULL;

  CITP_FDTABLE_LOCK();

  if( ! fdip_is_already_busy ) {
    volatile citp_fdinfo_p* p_fdip;
    citp_fdinfo_p fdip;
    
    p_fdip = &citp_fdtable.table[fd].fdip;
   again:
    fdip = *p_fdip;
    if( fdip_is_busy(fdip) )  fdip = citp_fdtable_busy_wait(fd, 1);
    ci_assert( fdip_is_normal(fdip) || fdip_is_passthru(fdip) );
    if( fdip_cas_fail(p_fdip, fdip, fdip_busy) )  goto again;
    
    /* Possibly, a parrallel thread have already called
     * citp_reprobe_moved() for us. */
    if( fdip_is_passthru(fdip) ) {
      citp_fdtable_busy_clear(fd, fdip, 1);
      if( new_fdinfo != NULL )
        citp_fdinfo_ref(new_fdinfo);
      goto done;
    }
    ci_assert( fdip_is_normal(fdip) );
    new_fdinfo = fdip_to_fdi(fdip);
    if( new_fdinfo != fdinfo) {
      citp_fdtable_busy_clear(fd, fdip, 1);
      if( new_fdinfo != NULL )
        citp_fdinfo_ref(new_fdinfo);
      goto done;
    }
  }
  else
    ci_assert(fdip_is_busy(citp_fdtable.table[fd].fdip));

  /* re-probe new fd */
  new_fdinfo = citp_fdtable_probe_locked(fd, CI_TRUE, CI_TRUE);

  if( fdinfo->epoll_fd >= 0 ) {
    citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(fdinfo, 1);
    if( epoll_fdi->protocol->type == CITP_EPOLL_FD )
      citp_epoll_on_move(epoll_fdi, fdinfo, new_fdinfo, 1);
    else
      citp_epollb_on_handover(epoll_fdi, fdinfo);
    citp_fdinfo_release_ref(epoll_fdi, 1);
  }

  /* Drop refcount from fdtable */
  fdinfo->on_ref_count_zero = FDI_ON_RCZ_MOVED;
  citp_fdinfo_release_ref(fdinfo, 1);

 done:
  /* One refcount from the caller */
  if( from_fast_lookup )
    citp_fdinfo_release_ref_fast(fdinfo);
  else
    citp_fdinfo_release_ref(fdinfo, 1);

  CITP_FDTABLE_UNLOCK();
  if( new_fdinfo == NULL )
    return NULL;

  if( from_fast_lookup ) {
    citp_fdinfo_ref_fast(new_fdinfo);
    citp_fdinfo_release_ref(new_fdinfo, 0);
  }

  return new_fdinfo;
}
Ejemplo n.º 7
0
int citp_ep_close(unsigned fd)
{
  volatile citp_fdinfo_p* p_fdip;
  citp_fdinfo_p fdip;
  int rc, got_lock;
  citp_fdinfo* fdi;

  /* Do not touch shared fdtable when in vfork child. */
  if( oo_per_thread_get()->in_vfork_child )
    return ci_tcp_helper_close_no_trampoline(fd);

  /* Interlock against other closes, against the fdtable being extended,
  ** and against select and poll.
  */
  CITP_FDTABLE_LOCK();
  got_lock = 1;

  __citp_fdtable_extend(fd);

  if( fd >= citp_fdtable.inited_count ) {
    rc = ci_sys_close(fd);
    goto done;
  }

  p_fdip = &citp_fdtable.table[fd].fdip;
 again:
  fdip = *p_fdip;
  if( fdip_is_busy(fdip) )  fdip = citp_fdtable_busy_wait(fd, 1);

  if( fdip_is_closing(fdip) | fdip_is_reserved(fdip) ) {
    /* Concurrent close or attempt to close reserved. */
    Log_V(ci_log("%s: fd=%d closing=%d reserved=%d", __FUNCTION__, fd,
		 fdip_is_closing(fdip), fdip_is_reserved(fdip)));
    errno = EBADF;
    rc = -1;
    goto done;
  }

#if CI_CFG_FD_CACHING
  /* Need to check in case this sucker's cached */
  if( fdip_is_unknown(fdip) ) {
    fdi = citp_fdtable_probe_locked(fd, CI_FALSE, CI_FALSE);
    if( fdi == &citp_the_closed_fd ) {
      citp_fdinfo_release_ref(fdi, CI_TRUE);
      errno = EBADF;
      rc = -1;
      goto done;
    }
    if( fdi )
      citp_fdinfo_release_ref(fdi, CI_TRUE);
  }
#endif

  ci_assert(fdip_is_normal(fdip) | fdip_is_passthru(fdip) |
	    fdip_is_unknown(fdip));

  /* Swap in the "closed" pseudo-fdinfo.  This lets any other thread know
  ** that we're in the middle of closing this fd.
  */
  if( fdip_cas_fail(p_fdip, fdip, fdip_closing) )
    goto again;

  if( fdip_is_normal(fdip) ) {
    fdi = fdip_to_fdi(fdip);

    CITP_FDTABLE_UNLOCK();
    got_lock = 0;

    if( fdi->is_special ) {
      Log_V(ci_log("%s: fd=%d is_special, returning EBADF", __FUNCTION__, fd));
      errno = EBADF;
      rc = -1;
      fdtable_swap(fd, fdip_closing, fdip, 0);
      goto done;
    }

    Log_V(ci_log("%s: fd=%d u/l socket", __FUNCTION__, fd));
    ci_assert_equal(fdi->fd, fd);
    ci_assert_equal(fdi->on_ref_count_zero, FDI_ON_RCZ_NONE);
    fdi->on_ref_count_zero = FDI_ON_RCZ_CLOSE;

    if( fdi->epoll_fd >= 0 ) {
      citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(fdi, 0);
      if( epoll_fdi ) {
        if( epoll_fdi->protocol->type == CITP_EPOLL_FD )
          citp_epoll_on_close(epoll_fdi, fdi, 0);
        citp_fdinfo_release_ref(epoll_fdi, 0);
      }
    }

    citp_fdinfo_release_ref(fdi, 0);
    rc = 0;
  }
  else {
    ci_assert(fdip_is_passthru(fdip) ||
	      fdip_is_unknown(fdip));
    if( ! fdtable_strict() ) {
      CITP_FDTABLE_UNLOCK();
      got_lock = 0;
    }
    Log_V(ci_log("%s: fd=%d passthru=%d unknown=%d", __FUNCTION__, fd,
		 fdip_is_passthru(fdip), fdip_is_unknown(fdip)));
    fdtable_swap(fd, fdip_closing, fdip_unknown, fdtable_strict());
    rc = ci_tcp_helper_close_no_trampoline(fd);
  }

 done:
  if( got_lock )  CITP_FDTABLE_UNLOCK();
  FDTABLE_ASSERT_VALID();
  return rc;
}
Ejemplo n.º 8
0
int citp_ep_dup3(unsigned fromfd, unsigned tofd, int flags)
{
  volatile citp_fdinfo_p* p_tofdip;
  citp_fdinfo_p tofdip;
  unsigned max;

  Log_V(log("%s(%d, %d)", __FUNCTION__, fromfd, tofd));

  /* Must be checked by callers. */
  ci_assert(fromfd != tofd);

  /* Hack: if [tofd] is the fd we're using for logging, we'd better choose
  ** a different one!
  */
  if( tofd == citp.log_fd )  citp_log_change_fd();

  ci_assert(citp.init_level >= CITP_INIT_FDTABLE);

  max = CI_MAX(fromfd, tofd);
  if( max >= citp_fdtable.inited_count ) {
    ci_assert(max < citp_fdtable.size);
    CITP_FDTABLE_LOCK();
    __citp_fdtable_extend(max);
    CITP_FDTABLE_UNLOCK();
  }

  /* Bug1151: Concurrent threads doing dup2(x,y) and dup2(y,x) can deadlock
  ** against one another.  So we take out a fat lock to prevent concurrent
  ** dup2()s.
  */
  /* Lock tofd.  We need to interlock against select and poll etc, so we
  ** also grab the exclusive lock.  Also grab the bug1151 lock.
  */
  pthread_mutex_lock(&citp_dup_lock);
  CITP_FDTABLE_LOCK();
  p_tofdip = &citp_fdtable.table[tofd].fdip;
 lock_tofdip_again:
  tofdip = *p_tofdip;
  if( fdip_is_busy(tofdip) )
    tofdip = citp_fdtable_busy_wait(tofd, 1);
  if( fdip_is_closing(tofdip) )
    tofdip = citp_fdtable_closing_wait(tofd, 1);
  if( fdip_is_reserved(tofdip) ) {
    /* ?? FIXME: we can't cope with this at the moment */
    CITP_FDTABLE_UNLOCK();
    Log_U(log("%s(%d, %d): target is reserved", __FUNCTION__, fromfd, tofd));
    errno = EBUSY;
    tofd = -1;
    goto out;
  }
  if( fdip_cas_fail(p_tofdip, tofdip, fdip_busy) )
    goto lock_tofdip_again;
  CITP_FDTABLE_UNLOCK();
  ci_assert(fdip_is_normal(tofdip) | fdip_is_passthru(tofdip) |
 	    fdip_is_unknown(tofdip));

  if( fdip_is_normal(tofdip) ) {
    /* We're duping onto a user-level socket. */
    citp_fdinfo* tofdi = fdip_to_fdi(tofdip);
    if( tofdi->epoll_fd >= 0 ) {
      citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(tofdi, 0);
      if( epoll_fdi ) {
        if( epoll_fdi->protocol->type == CITP_EPOLL_FD )
          citp_epoll_on_close(epoll_fdi, tofdi, 0);
        citp_fdinfo_release_ref(epoll_fdi, 0);
      }
    }
    ci_assert_equal(tofdi->on_ref_count_zero, FDI_ON_RCZ_NONE);
    tofdi->on_ref_count_zero = FDI_ON_RCZ_DUP2;
    tofdi->on_rcz.dup3_args.fd = fromfd;
    tofdi->on_rcz.dup3_args.flags = flags;
    citp_fdinfo_release_ref(tofdi, 0);
    {
      int i = 0;
      /* We need to free this fdi.  If someone is using it right now,
       * we are in trouble.  So, we spin for a while and interrupt the
       * user.  See bug 28123. */
      while( tofdi->on_ref_count_zero != FDI_ON_RCZ_DONE ) {
        if( ci_is_multithreaded() && i % 10000 == 9999 ) {
          pthread_t pth = tofdi->thread_id;
          if( pth !=  pthread_self() && pth != PTHREAD_NULL ) {
            pthread_kill(pth, SIGONLOAD);
            sleep(1);
          }
        }
        ci_spinloop_pause();
        i++;
      }
      ci_rmb();
    }
    if( tofdi->on_rcz.dup2_result < 0 ) {
      errno = -tofdi->on_rcz.dup2_result;
      /* Need to re-insert [tofdi] into the table. */
      ci_assert_equal(oo_atomic_read(&tofdi->ref_count), 0);
      oo_atomic_set(&tofdi->ref_count, 1);
      CI_DEBUG(tofdi->on_ref_count_zero = FDI_ON_RCZ_NONE);
      citp_fdtable_busy_clear(tofd, tofdip, 0);
      tofd = -1;
    }
    else {
      ci_assert(tofdi->on_rcz.dup2_result == tofd);
      citp_fdinfo_get_ops(tofdi)->dtor(tofdi, 0);
      citp_fdinfo_free(tofdi);
    }
    goto out;
  }

  ci_assert(fdip_is_passthru(tofdip) | fdip_is_unknown(tofdip));

  { /* We're dupping onto an O/S descriptor, or it may be closed.  Create a
    ** dummy [citp_fdinfo], just so we can share code with the case above.
    */
    citp_fdinfo fdi;
    fdi.fd = tofd;
    fdi.on_rcz.dup3_args.fd = fromfd;
    fdi.on_rcz.dup3_args.flags = flags;
    dup2_complete(&fdi, tofdip, 0);
    if( fdi.on_rcz.dup2_result < 0 ) {
      errno = -fdi.on_rcz.dup2_result;
      citp_fdtable_busy_clear(tofd, tofdip, 0);
      tofd = -1;
    }
    else
      ci_assert(fdi.on_rcz.dup2_result == tofd);
  }

 out:
  pthread_mutex_unlock(&citp_dup_lock);
  return tofd;
}
Ejemplo n.º 9
0
static void dup2_complete(citp_fdinfo* prev_tofdi,
			  citp_fdinfo_p prev_tofdip, int fdt_locked)
{
  volatile citp_fdinfo_p *p_fromfdip;
  unsigned fromfd = prev_tofdi->on_rcz.dup3_args.fd;
  unsigned tofd = prev_tofdi->fd;
  citp_fdinfo_p fromfdip;
  int rc;
#if CI_LIBC_HAS_dup3 || !defined(NDEBUG)
  int flags = prev_tofdi->on_rcz.dup3_args.flags;
#endif

#ifndef NDEBUG
  volatile citp_fdinfo_p* p_tofdip;
  p_tofdip = &citp_fdtable.table[tofd].fdip;
  ci_assert(fdip_is_busy(*p_tofdip));
#endif
  citp_fdinfo* fromfdi;

  p_fromfdip = &citp_fdtable.table[fromfd].fdip;
 lock_fromfdip_again:
  fromfdip = *p_fromfdip;
  if( fdip_is_busy(fromfdip) )
    fromfdip = citp_fdtable_busy_wait(fromfd, fdt_locked);
  if( fdip_is_closing(fromfdip) | fdip_is_reserved(fromfdip) ) {
    prev_tofdi->on_rcz.dup2_result = -EBADF;
    ci_wmb();
    prev_tofdi->on_ref_count_zero = FDI_ON_RCZ_DONE;
    return;
  }
#if CI_CFG_FD_CACHING
  /* Need to check in case this sucker's cached */
  if( fdip_is_unknown(fromfdip) ) {
    if( !fdt_locked ) CITP_FDTABLE_LOCK();
    fromfdi = citp_fdtable_probe_locked(fromfd, CI_FALSE, CI_FALSE);
    if( !fdt_locked ) CITP_FDTABLE_UNLOCK();
    if( fromfdi == &citp_the_closed_fd ) {
      prev_tofdi->on_rcz.dup2_result = -EBADF;
      ci_wmb();
      prev_tofdi->on_ref_count_zero = FDI_ON_RCZ_DONE;
      citp_fdinfo_release_ref(fromfdi, CI_TRUE);
      return;
    }
    if( fromfdi )
      citp_fdinfo_release_ref(fromfdi, CI_TRUE);
  }
#endif
  if( fdip_cas_fail(p_fromfdip, fromfdip, fdip_busy) )
    goto lock_fromfdip_again;

  oo_rwlock_lock_write(&citp_dup2_lock);
#if CI_LIBC_HAS_dup3
  rc = ci_sys_dup3(fromfd, tofd, flags);
#else
  ci_assert_equal(flags, 0);
  rc = ci_sys_dup2(fromfd, tofd);
#endif
  oo_rwlock_unlock_write(&citp_dup2_lock);
  if( rc < 0 ) {
    citp_fdtable_busy_clear(fromfd, fromfdip, fdt_locked);
    prev_tofdi->on_rcz.dup2_result = -errno;
    ci_wmb();
    prev_tofdi->on_ref_count_zero = FDI_ON_RCZ_DONE;
    return;
  }

  ci_assert(fdip_is_normal(fromfdip) | fdip_is_passthru(fromfdip) |
	    fdip_is_unknown(fromfdip));

  if( fdip_is_normal(fromfdip) &&
     (((fromfdi = fdip_to_fdi(fromfdip))->protocol->type) == CITP_EPOLL_FD) ) {
    citp_fdinfo* newfdi = citp_fdinfo_get_ops(fromfdi)->dup(fromfdi);
    if( newfdi ) {
      citp_fdinfo_init(newfdi, fdip_to_fdi(fromfdip)->protocol);
      citp_fdtable_insert(newfdi, tofd, fdt_locked);
    }
    else {
      /* Out of memory.  Can't probe epoll1 fd later on, so fail. */
      citp_fdtable_busy_clear(fromfd, fromfdip, fdt_locked);
      prev_tofdi->on_rcz.dup2_result = -ENOMEM;
      ci_wmb();
      prev_tofdi->on_ref_count_zero = FDI_ON_RCZ_DONE;
      return;
    }
  }
  else {
    /* Mark newfd as unknown.  When used, it'll get probed.
     *
     * We are not just being lazy here: Setting to unknown rather than
     * installing a proper fdi (when oldfd is accelerated) is essential to
     * vfork()+dup2()+exec() working properly.  Reason is that child and
     * parent share address space, so child is modifying the parent's
     * fdtable.  Setting an entry to unknown is safe.
     */
    citp_fdtable_busy_clear(tofd, fdip_unknown, fdt_locked);

#if CI_CFG_FD_CACHING
    /* Multiple refs to this now, don't allow it to be cached. */
    if( fdip_is_normal(fromfdip) )
      fdip_to_fdi(fromfdip)->can_cache = 0;
#endif
  }

  citp_fdtable_busy_clear(fromfd, fromfdip, fdt_locked);
  prev_tofdi->on_rcz.dup2_result = tofd;
  ci_wmb();
  prev_tofdi->on_ref_count_zero = FDI_ON_RCZ_DONE;
}
Ejemplo n.º 10
0
/*
** Why do these live here?  Because they need to hack into the low-level
** dirty nastiness of the fdtable.
*/
int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg),
		long arg)
{
  /* This implements dup(oldfd) and fcntl(oldfd, F_DUPFD, arg). */

  volatile citp_fdinfo_p* p_oldfdip;
  citp_fdinfo_p oldfdip;
  citp_fdinfo* newfdi = 0;
  citp_fdinfo* oldfdi;
  int newfd;

  Log_V(log("%s(%d)", __FUNCTION__, oldfd));

  if(CI_UNLIKELY( citp.init_level < CITP_INIT_FDTABLE ||
                  oo_per_thread_get()->in_vfork_child ))
    /* Lib not initialised, so no U/L state, and therefore system dup()
    ** will do just fine. */
    return syscall(oldfd, arg);

  if( oldfd >= citp_fdtable.inited_count ) {
    /* NB. We can't just pass through in this case because we need to worry
    ** about other threads racing with us.  So we need to be able to lock
    ** this fd while we do the dup. */
    ci_assert(oldfd < citp_fdtable.size);
    CITP_FDTABLE_LOCK();
    __citp_fdtable_extend(oldfd);
    CITP_FDTABLE_UNLOCK();
  }

  p_oldfdip = &citp_fdtable.table[oldfd].fdip;
 again:
  oldfdip = *p_oldfdip;
  if( fdip_is_busy(oldfdip) )
    oldfdip = citp_fdtable_busy_wait(oldfd, 0);
  if( fdip_is_closing(oldfdip) | fdip_is_reserved(oldfdip) ) {
    errno = EBADF;
    return -1;
  }
#if CI_CFG_FD_CACHING
  /* Need to check in case this sucker's cached */
  if( fdip_is_unknown(oldfdip) ) {
    CITP_FDTABLE_LOCK();
    oldfdi = citp_fdtable_probe_locked(oldfd, CI_FALSE, CI_FALSE);
    CITP_FDTABLE_UNLOCK();
    if( oldfdi == &citp_the_closed_fd ) {
      citp_fdinfo_release_ref(oldfdi, CI_TRUE);
      errno = EBADF;
      return -1;
    }
    if( oldfdi )
      citp_fdinfo_release_ref(oldfdi, CI_TRUE);
  }
#endif
  if( fdip_cas_fail(p_oldfdip, oldfdip, fdip_busy) )
    goto again;

#if CI_CFG_FD_CACHING
  /* May end up with multiple refs to this, don't allow it to be cached. */
  if( fdip_is_normal(oldfdip) )
    fdip_to_fdi(oldfdip)->can_cache = 0;
#endif

  if( fdip_is_normal(oldfdip) &&
      (((oldfdi = fdip_to_fdi(oldfdip))->protocol->type) == CITP_EPOLL_FD) ) {
    newfdi = citp_fdinfo_get_ops(oldfdi)->dup(oldfdi);
    if( ! newfdi ) {
      citp_fdtable_busy_clear(oldfd, oldfdip, 0);
      errno = ENOMEM;
      return -1;
    }

    if( fdtable_strict() )  CITP_FDTABLE_LOCK();
    newfd = syscall(oldfd, arg);
    if( newfd >= 0 )
      citp_fdtable_new_fd_set(newfd, fdip_busy, fdtable_strict());
    if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();
    if( newfd >= 0 ) {
      citp_fdtable_insert(newfdi, newfd, 0);
      newfdi = 0;
    }
  }
  else {
    if( fdtable_strict() )  CITP_FDTABLE_LOCK();
    newfd = syscall(oldfd, arg);
    if( newfd >= 0 && newfd < citp_fdtable.inited_count ) {
      /* Mark newfd as unknown.  When used, it'll get probed.
       *
       * We are not just being lazy here: Setting to unknown rather than
       * installing a proper fdi (when oldfd is accelerated) is essential to
       * vfork()+dup()+exec() working properly.  Reason is that child and
       * parent share address space, so child is modifying the parent's
       * fdtable.  Setting an entry to unknown is safe.
       */
      citp_fdtable_new_fd_set(newfd, fdip_unknown, fdtable_strict());
    }
    if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();
  }

  citp_fdtable_busy_clear(oldfd, oldfdip, 0);
  if( newfdi )  citp_fdinfo_free(newfdi);
  return newfd;
}
Ejemplo n.º 11
0
static void ufd_fmt(int fd, char* buf, int* buf_n, int buf_len)
{
  citp_fdinfo_p fdip;
  citp_fdinfo* fdi;
  char s[30];

  if( fd >= citp_fdtable.inited_count ) {
    bprintf("unknown");
    return;
  }
  fdip = citp_fdtable.table[fd].fdip;
  if( fdip_is_passthru(fdip) ) {
    bprintf("passthru");
    return;
  }
  else if( fdip_is_busy(fdip) ) {
    bprintf("busy");
    return;
  }
  else if( fdip_is_unknown(fdip) ) {
    bprintf("unknown");
    return;
  }

  fdi = fdip_to_fdi(fdip);

#if CI_CFG_FD_CACHING
  sprintf(s, "%s%s",
	  fdi->is_special ? "Special":"",
	  fdi->can_cache ? "Cancache":"");
#else
  sprintf(s, "%s",
	  fdi->is_special ? "Special":"");
#endif

  if( fdi == &citp_the_closed_fd ) {
    bprintf("closed_fd[%s]", s);
    return;
  }
  else if( fdi == &citp_the_reserved_fd ) {
    bprintf("reserved_fd[%s]", s);
    return;
  }

  if( fdi->protocol == &citp_tcp_protocol_impl ) {
    citp_sock_fdi* t = fdi_to_sock_fdi(fdi);
    if( citp_fdtable_dump_verbose )
      citp_waitable_dump(t->sock.netif, &t->sock.s->b, "");
    bprintf("tcp[%s]", s);
  }
  else if( fdi->protocol == &citp_udp_protocol_impl ) {
    citp_sock_fdi* u = fdi_to_sock_fdi(fdi);
    if( citp_fdtable_dump_verbose )
      citp_waitable_dump(u->sock.netif, &u->sock.s->b, "");
    bprintf("udp[%s]", s);
  }
#if CI_CFG_USERSPACE_EPOLL
  else if( fdi->protocol == &citp_epoll_protocol_impl ) {
    bprintf("epoll[%s]", s);
  }
#endif
  else {
    bprintf("bad[%s,%p] *****", s, fdi->protocol);
  }
}