예제 #1
0
void
ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p,
		       unsigned laddr, unsigned lport,
		       unsigned raddr, unsigned rport, unsigned protocol)
{
  ci_netif_filter_table_entry* entry;
  unsigned hash1, hash2, tbl_i;
  ci_netif_filter_table* tbl;
  int hops = 0;
  unsigned first;

  ci_assert(ci_netif_is_locked(netif)
#ifdef __KERNEL__
            /* release_ep_tbl might be called without the stack lock.
             * Do not complain about this. */
            || (netif2tcp_helper_resource(netif)->k_ref_count &
                TCP_HELPER_K_RC_DEAD)
#endif
            );


  tbl = netif->filter_table;
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_TC(ci_log("%s: [%d:%d] REMOVE %s %s:%u->%s:%u hash=%u:%u",
                __FUNCTION__, NI_ID(netif), OO_SP_FMT(sock_p),
                CI_IP_PROTOCOL_STR(protocol),
		ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
		ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
		hash1, hash2));

  tbl_i = hash1;
  while( 1 ) {
    entry = &tbl->table[tbl_i];
    if( entry->id == OO_SP_TO_INT(sock_p) ) {
      if( laddr == entry->laddr )
        break;
    }
    else if( entry->id == EMPTY ) {
      /* We allow multiple removes of the same filter -- helps avoid some
       * complexity in the filter module.
       */
      return;
    }
    tbl_i = (tbl_i + hash2) & tbl->table_size_mask;
    ++hops;
    if( tbl_i == first ) {
      LOG_E(ci_log(FN_FMT "ERROR: LOOP [%d] %s %s:%u->%s:%u",
                   FN_PRI_ARGS(netif), OO_SP_FMT(sock_p),
                   CI_IP_PROTOCOL_STR(protocol),
                   ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
                   ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport)));
      return;
    }
  }

  __ci_netif_filter_remove(netif, hash1, hash2, hops, tbl_i);
}
예제 #2
0
static int
efab_tcp_helper_stack_attach(ci_private_t* priv, void *arg)
{
  oo_stack_attach_t* op = arg;
  tcp_helper_resource_t* trs = priv->thr;
  int rc;

  if( trs == NULL ) {
    LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__));
    return -EINVAL;
  }
  OO_DEBUG_TCPH(ci_log("%s: [%d]", __FUNCTION__, NI_ID(&trs->netif)));

  rc = oo_create_stack_fd(trs);
  if( rc < 0 ) {
    OO_DEBUG_ERR(ci_log("%s: oo_create_stack_fd failed (%d)",
                        __FUNCTION__, rc));
    return rc;
  }
  op->fd = rc;

  /* Re-read the OS socket buffer size settings.  This ensures we'll use
   * up-to-date values for this new socket.
   */
  efab_get_os_settings(&NI_OPTS_TRS(trs));
  op->out_nic_set = trs->netif.nic_set;
  op->out_map_size = trs->mem_mmap_bytes;
  return 0;
}
예제 #3
0
void oo_os_sock_release(ci_netif* ni, oo_os_file fd)
{
  int rc = ci_sys_close(fd);
  oo_rwlock_unlock_read(&citp_dup2_lock);
  if( rc != 0 )
    LOG_E(ci_log("%s: [%d] ci_sys_close returned %d (errno=%d)",
                 __FUNCTION__, NI_ID(ni), rc, errno));
}
예제 #4
0
int oo_os_sock_get(ci_netif* ni, oo_sp sock_p, oo_os_file* os_sock_out)
{
  int sock_id = OO_SP_TO_INT(sock_p);
  tcp_helper_endpoint_t* ep;

  if( sock_id != TRUSTED_SOCK_ID(ni, sock_id) ) {
    LOG_E(ci_log("%s: ERROR: %d:%d bad sock_id",
                 __FUNCTION__, NI_ID(ni), sock_id));
    return -EINVAL;
  }
  ep = ci_netif_ep_get(ni, sock_p);
  if( oo_os_sock_get_from_ep(ep, os_sock_out) == 0 )
    return 0;

  LOG_E(ci_log("%s: ERROR: %d:%d has no O/S socket",
               __FUNCTION__, NI_ID(ni), sock_id));
  return -ENOENT;
}
예제 #5
0
static ci_tcp_state_synrecv*
ci_tcp_listenq_bucket_lookup(ci_netif* ni, ci_tcp_listen_bucket* bucket,
                             ciip_tcp_rx_pkt* rxp,
                             int level)
{
  ci_ni_aux_mem* aux;
  int idx = ci_tcp_listenq_hash2idx(rxp->hash, level);
  ci_tcp_state_synrecv* tsr;
  unsigned saddr, daddr, sport;
#ifdef __KERNEL__
  int i = 0;

  if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) {
    ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE,
                            __FUNCTION__);
    return 0;
  }
#endif

  LOG_TV(ci_log("%s([%d] level=%d hash:%x l:%s r:%s:%d)", __func__,
                NI_ID(ni), level, rxp->hash,
                ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32),
                ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32),
                CI_BSWAP_BE16(rxp->tcp->tcp_source_be16)));
  if( OO_P_IS_NULL(bucket->bucket[idx]) )
    return NULL;

  level++;
  aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]);
  if( aux->type == CI_TCP_AUX_TYPE_BUCKET )
    return ci_tcp_listenq_bucket_lookup(ni, &aux->u.bucket, rxp, level);

  saddr = oo_ip_hdr(rxp->pkt)->ip_saddr_be32;
  daddr = oo_ip_hdr(rxp->pkt)->ip_daddr_be32;
  sport = rxp->tcp->tcp_source_be16;

  tsr = &aux->u.synrecv;
  do {
    if( ! ((saddr - tsr->r_addr) | (daddr - tsr->l_addr) |
           (sport - tsr->r_port)) )
      return tsr;
    if( OO_P_IS_NULL(tsr->bucket_link) )
      return NULL;
    aux = ci_ni_aux_p2aux(ni, tsr->bucket_link);
    tsr = &aux->u.synrecv;
#ifdef __KERNEL__
    if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) {
      ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE,
                              __FUNCTION__);
      return NULL;
    }
#endif
  } while(1);

  /* unreachable */
  return NULL;
}
예제 #6
0
static void
ci_tcp_listenq_bucket_insert(ci_netif* ni, ci_tcp_socket_listen* tls,
                             ci_tcp_listen_bucket* bucket,
                             ci_tcp_state_synrecv* tsr, int level)
{
  ci_ni_aux_mem* aux;
  int idx = ci_tcp_listenq_hash2idx(tsr->hash, level);
  oo_p tsr_p = ci_tcp_synrecv2p(ni, tsr);
#ifdef __KERNEL__
  int i = 0;
#endif

  LOG_TV(ci_log("%s([%d] level=%d "TSR_FMT")", __func__,
                NI_ID(ni), level, TSR_ARGS(tsr)));

  if( OO_P_IS_NULL(bucket->bucket[idx]) ) {
    bucket->bucket[idx] = tsr_p;
    return;
  }

  level++;
  aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]);
  if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) {
    ci_tcp_listenq_bucket_insert(ni, tls, &aux->u.bucket, tsr, level);
    return;
  }

  /* So, this bucket contains of a list of other synrecv states.  We add
   * our trs to this list and try to improve things by allocating
   * next-level bucket. */
  tsr->bucket_link = bucket->bucket[idx];
  bucket->bucket[idx] = tsr_p;

  if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) )
    return;

  bucket->bucket[idx] = ci_ni_aux_alloc_bucket(ni);
  if( OO_P_IS_NULL(bucket->bucket[idx]) )
    return;
  bucket = ci_ni_aux_p2bucket(ni, bucket->bucket[idx]);
  tls->n_buckets++;

  while( OO_P_NOT_NULL(tsr_p) ) {
    tsr = &ci_ni_aux_p2aux(ni, tsr_p)->u.synrecv;
#ifdef __KERNEL__
    if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) {
      ci_tcp_listenq_bucket_insert(ni, tls, bucket, tsr, level);
      ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE,
                              __FUNCTION__);
      return;
    }
#endif
    tsr_p = tsr->bucket_link;
    tsr->bucket_link = OO_P_NULL;
    ci_tcp_listenq_bucket_insert(ni, tls, bucket, tsr, level);
  }
}
예제 #7
0
void citp_waitable_all_fds_gone(ci_netif* ni, oo_sp w_id)
{
  citp_waitable_obj* wo;

  ci_assert(ni);
  ci_assert(IS_VALID_SOCK_P(ni, w_id));
  ci_assert(ci_netif_is_locked(ni));

  wo = SP_TO_WAITABLE_OBJ(ni, w_id);
  ci_assert(wo->waitable.state != CI_TCP_STATE_FREE);

  LOG_NC(ci_log("%s: %d:%d %s", __FUNCTION__, NI_ID(ni), OO_SP_FMT(w_id),
		ci_tcp_state_str(wo->waitable.state)));

  /* listening socket is closed in blocking conext, see
   * efab_tcp_helper_close_endpoint().
   * CI_SB_AFLAG_ORPHAN is set earlier in this case.. */
  CI_DEBUG(if( (wo->waitable.sb_aflags & CI_SB_AFLAG_ORPHAN) &&
               wo->waitable.state != CI_TCP_LISTEN )
	     ci_log("%s: %d:%d already orphan", __FUNCTION__,
                    NI_ID(ni), OO_SP_FMT(w_id)));

  /* It's essential that an ORPHANed socket not be on the deferred
   * socket list, because the same link field is used as timewait
   * list, free list etc.  We must purge the deferred list before
   * setting the orphan flag.
   *
   * NB. This socket cannot now be added to the deferred list, because
   * no-one has a reference to it.
   */
  ci_netif_purge_deferred_socket_list(ni);
  ci_bit_set(&wo->waitable.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);

  /* We also need to remove the socket from the post-poll list.  It may
   * have been left there because the stack believes a wakeup is needed.
   */
  ci_ni_dllist_remove_safe(ni, &wo->waitable.post_poll_link);
  ci_ni_dllist_remove_safe(ni, &wo->waitable.ready_link);
  wo->waitable.ready_list_id = 0;

  citp_waitable_cleanup(ni, wo, 1);
}
예제 #8
0
static void citp_waitable_dump2(ci_netif* ni, citp_waitable* w, const char* pf,
                                oo_dump_log_fn_t logger, void* log_arg)
{
  unsigned tmp;

  if( CI_TCP_STATE_IS_SOCKET(w->state) ) {
    ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w);
    logger(log_arg, "%s%s "NT_FMT"lcl="OOF_IP4PORT" rmt="OOF_IP4PORT" %s",
           pf, citp_waitable_type_str(w), NI_ID(ni), W_FMT(w),
           OOFA_IP4PORT(sock_laddr_be32(s), sock_lport_be16(s)),
           OOFA_IP4PORT(sock_raddr_be32(s), sock_rport_be16(s)),
           ci_tcp_state_str(w->state));
  }
  else
    logger(log_arg, "%s%s "NT_FMT, pf,
           citp_waitable_type_str(w), NI_ID(ni), W_FMT(w));

  if( w->state == CI_TCP_STATE_FREE || w->state == CI_TCP_STATE_AUXBUF )
    return;

  tmp = w->lock.wl_val;
  logger(log_arg, "%s  lock: %x %s%s", pf, tmp,
         (tmp & OO_WAITABLE_LK_LOCKED) ? "LOCKED" : "",
         (tmp & OO_WAITABLE_LK_NEED_WAKE) ? " CONTENDED": "");

  logger(log_arg, "%s  rx_wake=%08x%s tx_wake=%08x%s flags: "CI_SB_FLAGS_FMT,
         pf,
         w->sleep_seq.rw.rx,
         ci_bit_test(&w->wake_request, CI_SB_FLAG_WAKE_RX_B) ? "(RQ)":"    ",
         w->sleep_seq.rw.tx,
         ci_bit_test(&w->wake_request, CI_SB_FLAG_WAKE_TX_B) ? "(RQ)":"    ",
         CI_SB_FLAGS_PRI_ARG(w));

  if( w->spin_cycles == -1 )
    logger(log_arg, "%s  ul_poll: -1 spin cycles -1 usecs", pf);
  else
    logger(log_arg, "%s  ul_poll: %llu spin cycles %u usec", pf,
         w->spin_cycles, oo_cycles64_to_usec(ni, w->spin_cycles));
}
예제 #9
0
파일: udp.c 프로젝트: majek/openonload
ci_fd_t ci_udp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type)
{
  ci_udp_state* us;
  ci_fd_t fd;

  VERB( log(LPFIN "ctor( )" ) );

  ci_assert(ep);
  ci_assert(netif);

  ci_netif_lock(netif);
  us = ci_udp_get_state_buf(netif);
  if (!us) {
    ci_netif_unlock(netif);
    LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__,NI_ID(netif)));
    return -ENOMEM;
  }

  /* It's required to set protocol before ci_tcp_helper_sock_attach()
   * since it's used to determine if TCP or UDP file operations should be
   * attached to the file descriptor in kernel. */
   sock_protocol(&us->s) = IPPROTO_UDP;

  /* NB: this attach will close the os_sock_fd */
  fd = ci_tcp_helper_sock_attach(ci_netif_get_driver_handle(netif),  
                                 SC_SP(&us->s), domain, type);
  if( fd < 0 ) {
    if( fd == -EAFNOSUPPORT )
      LOG_U(ci_log("%s: ci_tcp_helper_sock_attach (domain=%d, type=%d) "
                   "failed %d", __FUNCTION__, domain, type, fd));
    else
      LOG_E(ci_log("%s: ci_tcp_helper_sock_attach (domain=%d, type=%d) "
                   "failed %d", __FUNCTION__, domain, type, fd));
    ci_netif_unlock(netif);
    return fd;
  }

  ci_assert(~us->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN);

  us->s.rx_errno = 0;
  us->s.tx_errno = 0;
  us->s.so_error = 0;
  us->s.cp.sock_cp_flags |= OO_SCP_UDP_WILD;

  ep->s = &us->s;
  ep->netif = netif;
  CHECK_UEP(ep);
  ci_netif_unlock(netif);
  return fd;
}
예제 #10
0
void citp_passthrough_init(citp_alien_fdi* epi)
{
  int rc = oo_os_sock_get(epi->netif, epi->ep->bufid, &epi->os_socket);

  /* No sensible way to handle oo_os_sock_get failure.  Just record it. */
  if( rc != 0 ) {
    Log_U(ci_log("%s: oo_os_sock_get([%d:%d]) returned %d",
                 __func__, NI_ID(epi->netif), epi->ep->bufid, rc));
    epi->os_socket = -1;
    return;
  }
  __citp_fdtable_reserve(epi->os_socket, 1);
  /* ci_tcp_helper_get_sock_fd gets the citp_dup2_lock lock: release it  */
  oo_rwlock_unlock_read(&citp_dup2_lock);
}
예제 #11
0
static int
efab_stacks_seq_show(struct seq_file *seq, void *v)
{
  ci_netif *ni = v;
  ci_netif_stats* s = &ni->state->stats;
  seq_printf(seq,
             "%d: %d %d %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n",
             NI_ID(ni), (int) ni->state->pid, (int) ni->state->uid,
             s->periodic_polls, s->periodic_evs,
             s->timeout_interrupts, s->interrupts, s->interrupt_polls,
             s->interrupt_wakes, s->interrupt_evs,
             s->interrupt_primes, s->select_primes,
             s->sock_wakes_rx + s->sock_wakes_tx +
             s->sock_wakes_rx_os + s->sock_wakes_tx_os,
             s->pkt_wakes, s->unlock_slow,
             s->lock_wakes, s->deferred_work, s->sock_lock_sleeps,
             s->rx_evs, s->tx_evs);
  return 0;
}
예제 #12
0
struct oof_socket*
oof_cb_sw_filter_lookup(struct tcp_helper_resource_s* stack,
                        unsigned laddr, int lport,
                        unsigned raddr, int rport, int protocol)
{
  ci_netif* ni = &stack->netif;
  int sock_id, tbl_idx;
  tbl_idx = ci_netif_filter_lookup(ni, laddr, lport, raddr, rport, protocol);
  if( tbl_idx < 0 )
    return NULL;
  sock_id = ni->filter_table->table[tbl_idx].id;
  if( ! IS_VALID_SOCK_ID(ni, sock_id) ) {
    OO_DEBUG_ERR(ci_log("%s: ERROR: %d %s "IPPORT_FMT" "IPPORT_FMT,
                        __FUNCTION__, NI_ID(ni), FMT_PROTOCOL(protocol),
                        IPPORT_ARG(laddr, lport), IPPORT_ARG(raddr, rport));
                 ci_log("--> idx=%d sock_id=%d sock_id_max=%d", tbl_idx,
                        sock_id, ni->ep_tbl_n));
    return NULL;
  }
예제 #13
0
ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type)
{
  ci_tcp_state* ts;
  ci_fd_t fd;

  ci_assert(ep);
  ci_assert(netif);

  ci_netif_lock(netif);
  ts = ci_tcp_get_state_buf(netif);
  if( ts == NULL ) {
    ci_netif_unlock(netif);
    LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__,NI_ID(netif)));
    return -ENOMEM;
  }

  fd = ci_tcp_helper_sock_attach(ci_netif_get_driver_handle(netif), S_SP(ts),
                                 domain, type);
  if( fd < 0 ) {
    if( fd == -EAFNOSUPPORT )
      LOG_U(ci_log("%s: ci_tcp_helper_sock_attach" \
                   "(domain=%d, type=%d) failed %d",
                   __FUNCTION__, domain, type, fd));
    else
      LOG_E(ci_log("%s: ci_tcp_helper_sock_attach" \
                   "(domain=%d, type=%d) failed %d",
                   __FUNCTION__, domain, type, fd));
  }
  else {
    ci_assert(~ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN);
    /* Apply default sockbuf sizes now we've updated them from the kernel
    ** defaults. */
    ts->s.so.sndbuf = NI_OPTS(netif).tcp_sndbuf_def;
    ts->s.so.rcvbuf = NI_OPTS(netif).tcp_rcvbuf_def;
    ep->netif = netif;
    ep->s = &ts->s;
    CHECK_TEP(ep);
  }

  ci_netif_unlock(netif);
  return fd;
}
예제 #14
0
int oo_os_sock_ioctl(ci_netif* ni, oo_sp sock_p, int request, void* arg,
                     int* ioctl_rc)
{
  oo_os_file os_sock_fd;
  int rc;
  if( (rc = oo_os_sock_get(ni, sock_p, &os_sock_fd)) == 0 ) {
    rc = ci_sys_ioctl(os_sock_fd, request, arg);
    if( rc < 0 )
      rc = -errno;
    oo_os_sock_release(ni, os_sock_fd);
    if( ioctl_rc != NULL ) {
      *ioctl_rc = rc;
      rc = 0;
    }
  }
  else {
    LOG_E(ci_log("%s: [%d:%d] ERROR: failed to get kernel sock fd "
                 "(rc=%d req=%d)", __FUNCTION__, NI_ID(ni), OO_SP_FMT(sock_p),
                 rc, request));
  }
  return rc;
}
예제 #15
0
ssize_t linux_tcp_helper_fop_sendpage(struct file* filp, struct page* page, 
                                      int offset, size_t size,
                                      loff_t* ppos, int flags)
{
  ci_private_t* priv = filp->private_data;
  tcp_helper_resource_t* trs = efab_priv_to_thr(priv);
  ci_sock_cmn* s;

  OO_DEBUG_VERB(ci_log("%s: %d:%d offset=%d size=%d flags=%x", __FUNCTION__,
                       NI_ID(&trs->netif), OO_SP_FMT(priv->sock_id), offset,
                       (int) size, flags));

  ci_assert(page);
  ci_assert_ge(offset, 0);
  ci_assert_gt(size, 0);
  ci_assert_le(offset + size, CI_PAGE_SIZE);

#ifndef MSG_SENDPAGE_NOTLAST
  /* "flags" is really "more".  Convert it. */
  if( flags )
    flags = MSG_MORE;

  /* [more] is sometimes true even for the last page.  We get a little
  ** closer to the truth by spotting that we're not reading to the end of
  ** the page. - seen on 2.6.18, but not on 2.6.26 or later
  */
  if( offset + size < CI_PAGE_SIZE && flags )
    flags = 0;
#endif

  s = SP_TO_SOCK(&trs->netif, priv->sock_id);
  if(CI_LIKELY( s->b.state & CI_TCP_STATE_TCP_CONN ))
    return sendpage_copy(&trs->netif,SOCK_TO_TCP(s),page,offset,size,flags);
  else
    /* Closed or listening.  Return epipe.  Do not send SIGPIPE, because
    ** Linux will do it for us. */
    return -s->tx_errno;
}
예제 #16
0
/* c_ni is assumed to be locked on enterance and is always unlocked on
 * exit. */
int ci_tcp_connect_lo_toconn(ci_netif *c_ni, oo_sp c_id, ci_uint32 dst,
                             ci_netif *l_ni, oo_sp l_id)
{
  ci_tcp_state *ts;
  ci_tcp_socket_listen *tls, *alien_tls;
  citp_waitable_obj *wo;
  citp_waitable *w;
  int rc;

  ci_assert(ci_netif_is_locked(c_ni));
  ci_assert(OO_SP_NOT_NULL(c_id));
  ci_assert(OO_SP_NOT_NULL(l_id));

  LOG_TC(log("%s: connect %d:%d to %d:%d", __FUNCTION__,
             c_ni->state->stack_id, OO_SP_TO_INT(c_id),
             l_ni->state->stack_id, OO_SP_TO_INT(l_id)));

  alien_tls = SP_TO_TCP_LISTEN(l_ni, l_id);
  if( (int)ci_tcp_acceptq_n(alien_tls) >= alien_tls->acceptq_max ) {
    ci_netif_unlock(c_ni);
    return -EBUSY;
  }

  /* In c_ni, create shadow listening socket tls (copy l_id) */
  ts = ci_tcp_get_state_buf(c_ni);
  if( ts == NULL ) {
    ci_netif_unlock(c_ni);
    LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__, NI_ID(c_ni)));
    return -ENOMEM;
  }

  /* init common tcp fields */
  ts->s.so = alien_tls->s.so;
  ts->s.cp.ip_ttl = alien_tls->s.cp.ip_ttl;
  S_TCP_HDR(&ts->s)->tcp_source_be16 =
      S_TCP_HDR(&alien_tls->s)->tcp_source_be16;
  ts->s.domain = alien_tls->s.domain;
  ts->c = alien_tls->c;
  ts->c.tcp_defer_accept = OO_TCP_DEFER_ACCEPT_OFF;

  /* make sure nobody will ever connect to our "shadow" socket
   * except us */
  ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);

  ci_tcp_set_slow_state(c_ni, ts, CI_TCP_LISTEN);
  tls = SOCK_TO_TCP_LISTEN(&ts->s);
  /* no timer: */
  tls->s.s_flags = alien_tls->s.s_flags | CI_SOCK_FLAG_BOUND_ALIEN;

  tls->acceptq_max = 1;
  rc = ci_tcp_listen_init(c_ni, tls);
  if( rc != 0 ) {
    citp_waitable_obj_free(c_ni, &tls->s.b);
    return rc;
  }

  /* Connect c_id to tls */
  ts = SP_TO_TCP(c_ni, c_id);
  rc = ci_tcp_connect_lo_samestack(c_ni, ts, tls->s.b.bufid);

  /* Accept as from tls */
  if( !ci_tcp_acceptq_not_empty(tls) ) {
    /* it is possible, for example, if ci_tcp_listenq_try_promote() failed
     * because there are no endpoints */
    ci_tcp_listenq_drop_all(c_ni, tls);
    citp_waitable_obj_free(c_ni, &tls->s.b);
    ci_netif_unlock(c_ni);
    return -EBUSY;
  }
  w = ci_tcp_acceptq_get(c_ni, tls);
  ci_assert(w);
  LOG_TV(ci_log("%s: %d:%d to %d:%d shadow %d:%d accepted %d:%d",
                __FUNCTION__,
                c_ni->state->stack_id, OO_SP_TO_INT(c_id),
                l_ni->state->stack_id, OO_SP_TO_INT(l_id),
                c_ni->state->stack_id, tls->s.b.bufid,
                c_ni->state->stack_id, w->bufid));

  ci_assert(w->state & CI_TCP_STATE_TCP);
  ci_assert(w->state != CI_TCP_LISTEN);

  /* Destroy tls.
   * NB: nobody could possibly connect to it, so no need to do proper
   * shutdown.
   */
  ci_assert_equal(ci_tcp_acceptq_n(tls), 0);
  ci_tcp_listenq_drop_all(c_ni, tls);
  citp_waitable_obj_free(c_ni, &tls->s.b);
  ci_netif_unlock(c_ni);

  /* Keep a port reference */
  {
    tcp_helper_endpoint_t *l_ep, *a_ep;
    struct oo_file_ref* os_sock_ref;
    ci_irqlock_state_t lock_flags;

    l_ep = ci_trs_ep_get(netif2tcp_helper_resource(l_ni), l_id);
    a_ep = ci_trs_ep_get(netif2tcp_helper_resource(c_ni), W_SP(w));
    ci_irqlock_lock(&l_ep->thr->lock, &lock_flags);
    os_sock_ref = l_ep->os_socket;
    ci_assert_equal(a_ep->os_port_keeper, NULL);
    if( os_sock_ref != NULL ) {
      os_sock_ref = oo_file_ref_add(os_sock_ref);
      os_sock_ref = oo_file_ref_xchg(&a_ep->os_port_keeper, os_sock_ref);
      ci_irqlock_unlock(&l_ep->thr->lock, &lock_flags);
      if( os_sock_ref != NULL )
        oo_file_ref_drop(os_sock_ref);
    }
    else {
      ci_irqlock_unlock(&l_ep->thr->lock, &lock_flags);
      goto cleanup;
    }
  }

  /* lock l_ni: Check that l_id is the same socket it used to be */
  /* create ref-sock in l_ni, put it into acc q */
  if( ci_netif_lock(l_ni) != 0 )
    goto cleanup;
  if( alien_tls->s.b.state != CI_TCP_LISTEN ||
      (alien_tls->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN) ||
      S_TCP_HDR(&alien_tls->s)->tcp_source_be16 != TS_TCP(ts)->tcp_dest_be16 ||
      (alien_tls->s.pkt.ip.ip_saddr_be32 != INADDR_ANY &&
       alien_tls->s.pkt.ip.ip_saddr_be32 != ts->s.pkt.ip.ip_daddr_be32) ) {
    ci_netif_unlock(l_ni);
    goto cleanup;
  }

  ci_bit_mask_set(&w->sb_aflags,
                  CI_SB_AFLAG_TCP_IN_ACCEPTQ | CI_SB_AFLAG_ORPHAN);

  wo = citp_waitable_obj_alloc(l_ni);
  if( wo == NULL ) {
    ci_netif_unlock(l_ni);
    goto cleanup;
  }
  wo->waitable.state = CI_TCP_CLOSED;
  wo->waitable.sb_aflags |= CI_SB_AFLAG_MOVED_AWAY;
  wo->waitable.moved_to_stack_id = c_ni->state->stack_id;
  wo->waitable.moved_to_sock_id = W_SP(w);
  LOG_TC(log("%s: put to acceptq %d:%d referencing %d:%d", __func__,
             l_ni->state->stack_id, OO_SP_TO_INT(W_SP(&wo->waitable)),
             c_ni->state->stack_id, OO_SP_TO_INT(W_SP(w))));

  ci_tcp_acceptq_put(l_ni, alien_tls, &wo->waitable);
  citp_waitable_wake_not_in_poll(l_ni, &alien_tls->s.b, CI_SB_FLAG_WAKE_RX);
  ci_netif_unlock(l_ni);

  return rc;

cleanup:
  ci_assert(w->sb_aflags & CI_SB_AFLAG_ORPHAN);
  ci_bit_mask_clear(&w->sb_aflags,
                    CI_SB_AFLAG_TCP_IN_ACCEPTQ | CI_SB_AFLAG_ORPHAN);
  efab_tcp_helper_close_endpoint(netif2tcp_helper_resource(c_ni), w->bufid);
  /* we can not guarantee c_ni lock, so we can' call
   * ci_tcp_drop(c_ni, ts).  So, we return error; UL will handover
   * and close ts endpoint. */
  return -EBUSY;
}
예제 #17
0
/* Return 1 if the bucket is empty now */
static int
ci_tcp_listenq_bucket_remove(ci_netif* ni, ci_tcp_socket_listen* tls,
                             ci_tcp_listen_bucket* bucket,
                             ci_tcp_state_synrecv* tsr, int level)
{
  ci_ni_aux_mem* aux;
  int idx = ci_tcp_listenq_hash2idx(tsr->hash, level);
  oo_p tsr_p = ci_tcp_synrecv2p(ni, tsr);

  /* Fixme: we remove empty buckets only.  In theory, it may be useful to
   * remove a bucket with one non-empty list, but it maked code more
   * complicated. */
  int empty = 0;
#ifdef __KERNEL__
  int i = 0;

  if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) {
    ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__);
    return 0;
  }
#endif

  LOG_TV(ci_log("%s([%d] level=%d "TSR_FMT")", __func__,
                NI_ID(ni), level, TSR_ARGS(tsr)));
  ci_assert( OO_P_NOT_NULL(bucket->bucket[idx]) );
#ifdef __KERNEL__
  if( OO_P_IS_NULL(bucket->bucket[idx]) ) {
    ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE,
                            __FUNCTION__);
    return 0;
  }
#endif

  level++;
  aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]);
  if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) {
    empty = ci_tcp_listenq_bucket_remove(ni, tls, &aux->u.bucket, tsr, level);
    if( empty ) {
      bucket->bucket[idx] = OO_P_NULL;
      ci_ni_aux_free(ni, aux);
      tls->n_buckets--;
    }
  }
  else {
    if( bucket->bucket[idx] == tsr_p ) {
      bucket->bucket[idx] = tsr->bucket_link;
      empty = OO_P_IS_NULL(bucket->bucket[idx]);
    }
    else {
      ci_tcp_state_synrecv* prev = &aux->u.synrecv;
      while( prev->bucket_link != tsr_p ) {
        aux = ci_ni_aux_p2aux(ni, prev->bucket_link);
        prev = &aux->u.synrecv;
#ifdef __KERNEL__
        if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) {
          ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE,
                                  __FUNCTION__);
          return 0;
        }
#endif
      }
      prev->bucket_link = tsr->bucket_link;
    }
  }

  if( empty ) {
    int i;
    for( i = 0; i < CI_TCP_LISTEN_BUCKET_SIZE; i++ )
      if( OO_P_NOT_NULL(bucket->bucket[i]) )
        return 0;
    return 1;
  }
  return 0;
}
예제 #18
0
int
oof_cb_stack_id(struct tcp_helper_resource_s* stack)
{
  return stack ? NI_ID(&stack->netif) : -1;
}
예제 #19
0
/*
** promote a synrecv structure to an established socket
**
** Assumes that the caller will handle a fail if we can't allocate a new
** tcp_state structure due to memory pressure or the like
*/
int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls,
                               ci_tcp_state_synrecv* tsr,
                               ci_ip_cached_hdrs* ipcache,
                               ci_tcp_state** ts_out)
{
  int rc = 0;
  
  ci_assert(netif);
  ci_assert(tls);
  ci_assert(tls->s.b.state == CI_TCP_LISTEN);
  ci_assert(tsr);

  if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) {
    ci_tcp_state* ts;

    /* grab a tcp_state structure that will go onto the accept queue.  We take
     * from the cache of EPs if any are available
     */
    ts = get_ts_from_cache (netif, tsr, tls); 
    if( !ts ) {
      /* None on cache; try allocating a new ts */
      ts = ci_tcp_get_state_buf(netif);
#if CI_CFG_FD_CACHING
      if( ts == NULL ) {
        /* We've reaped.  Did this result in any being cached */
        ts = get_ts_from_cache(netif, tsr, tls);
        if (ts == NULL ) {
          /* No -- try again to allocate. */
          ts = ci_tcp_get_state_buf(netif);
        }
        else {
          CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap);
        }
      }
#endif
      if( ts == NULL ) {
        LOG_TV(ci_log("%s: [%d] out of socket buffers",
                      __FUNCTION__, NI_ID(netif)));
        CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock);
        CI_SET_SO_ERROR(&tls->s, ENOMEM);
        citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
        return -ENOMEM;
      }


      ci_assert(ci_tcp_is_cached(ts) ||
                (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
    }

#ifdef ONLOAD_OFE
    ts->s.ofe_code_start = tls->ofe_promote;
#endif

    if( ! ci_tcp_is_cached(ts) ) {
      /* Need to initialise address information for use when setting filters */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      /* "borrow" filter from listening socket.  For loopback socket, we
       * do not need filters, but we have to take a reference of the OS
       * socket. */
      rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice,
                                 S_SP(tls));
      if( rc < 0 ) {
        LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc));
        /* Either put this back on the list (at the head) or free it */
        ci_tcp_state_free(netif, ts);
        return rc;
      }
    }
#if CI_CFG_FD_CACHING
    else {
      /* Now set the s/w filter.  We leave the hw filter in place for cached
       * EPS. This will probably not have the correct raddr and rport, but as
       * it's sharing the listening socket's filter that's not a problem.  It
       * will be updated if this is still around when the listener is closed.
       */
      rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr,
                                  sock_lport_be16(&tls->s), tsr->r_addr,
                                  tsr->r_port, tcp_protocol(ts));

      if (rc < 0) {
        /* Bung it back on the cache list */
        LOG_EP(ci_log("Unable to create s/w filter!"));
        ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link);
        return rc;
      }

      /* Need to initialise address information.  We do this after trying to
       * insert the sw filter, so we can push the tcp state back onto the
       * cache queue with as few changes as possible if we fail to add the
       * sw filter.
       */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd));
      ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link);
    }
#endif

    ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts)));
    ci_assert(ts->s.b.state == CI_TCP_CLOSED);
    ts->s.domain = tls->s.domain;

    cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache);
    ci_pmtu_state_init(netif, &ts->s, &ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
    ci_pmtu_set(netif, &ts->pmtus,
                CI_MIN(ts->s.pkt.mtu,
                       tsr->tcpopts.smss + sizeof(ci_tcp_hdr)
                         + sizeof(ci_ip4_hdr)));

    /* If we've got SYN via local route, we can handle it */
    ci_assert_equiv(ts->s.pkt.status == retrrc_localroute,
                    OO_SP_NOT_NULL(tsr->local_peer));
    if( ts->s.pkt.status == retrrc_localroute )
      ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE;

    ts->amss = tsr->amss;

    /* options and flags */
    ts->tcpflags = 0;
    ts->tcpflags |= tsr->tcpopts.flags;
    ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED;
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr);
    if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) {
      ts->snd_wscl = tsr->tcpopts.wscl_shft;
      ts->rcv_wscl = tsr->rcv_wscl;
    } else {
      ts->snd_wscl = ts->rcv_wscl = 0u;
    }
    CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl);
    CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl);

    /* Send and receive sequence numbers */
    tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
      tsr->snd_isn + 1;
    ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0);
    ci_tcp_rx_set_isn(ts, tsr->rcv_nxt);
    tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1);

    if( ts->tcpflags & CI_TCPT_FLAG_TSO ) {
      ts->incoming_tcp_hdr_len += 12;
      ts->outgoing_hdrs_len += 12;
      ts->tspaws = ci_tcp_time_now(netif);
      ts->tsrecent = tsr->tspeer;
      ts->tslastack = tsr->rcv_nxt;
    }
    else {
      /* Must be after initialising snd_una. */
      ci_tcp_clear_rtt_timing(ts);
      ts->timed_ts = tsr->timest;
    }
    /* SACK has nothing to be done. */

    /* ?? ECN */
    ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr)));

    ts->smss = tsr->tcpopts.smss;
    ts->c.user_mss = tls->c.user_mss;
    if (ts->c.user_mss && ts->c.user_mss < ts->smss)
      ts->smss = ts->c.user_mss;
#if CI_CFG_LIMIT_SMSS
    ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__);
#endif
    ci_assert(ts->smss>0);
    ci_tcp_set_eff_mss(netif, ts);
    ci_tcp_set_initialcwnd(netif, ts);

    /* Copy socket options & related fields that should be inherited. 
     * Note: Windows does not inherit rcvbuf until the call to accept 
     * completes. The assumption here is that all options can be
     * inherited at the same time (most won't have an effect until there
     * is a socket available for use by the app.).
     */
    ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)");

    /* NB. Must have already set peer (which we have). */
    ci_tcp_set_established_state(netif, ts);
    CITP_STATS_NETIF(++netif->state->stats.synrecv2established);
  
    ci_assert(ts->ka_probes == 0);
    ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts));
    ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);

    /* Remove the synrecv structure from the listen queue, and free the
    ** buffer. */
    if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE )
      ci_free(tsr);
    else {
      ci_tcp_listenq_remove(netif, tls, tsr);
      ci_tcp_synrecv_free(netif, tsr);
    }

    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
    ci_tcp_acceptq_put(netif, tls, &ts->s.b);

    LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x",
               LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags);
           log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x",
               LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts),
               tcp_snd_una(ts),
               tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts)));

    citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
    *ts_out = ts;
    return 0;
  }