예제 #1
0
static int thc_is_thr_name_taken(tcp_helper_cluster_t* thc, char* name)
{
  int i = 0;
  tcp_helper_resource_t* thr_walk = thc->thc_thr_head;
  while( thr_walk != NULL && i < thc->thc_cluster_size ) {
    if( strncmp(name, thr_walk->name, CI_CFG_STACK_NAME_LEN) == 0 )
      return 1;
    thr_walk = thr_walk->thc_thr_next;
    ++i;
  }
  ci_assert_le(i, thc->thc_cluster_size);
  return 0;
}
예제 #2
0
파일: iptimer.c 프로젝트: majek/openonload
/* initialise the iptimer scheduler */
void ci_ip_timer_state_init(ci_netif* netif, unsigned cpu_khz)
{
  ci_ip_timer_state* ipts = IPTIMER_STATE(netif);
  int i;
  int us2isn;

  /* initialise the cycle to tick constants */
  ipts->khz = cpu_khz;
  ipts->ci_ip_time_frc2tick = shift_for_gran(CI_IP_TIME_APP_GRANULARITY, ipts->khz);
  ipts->ci_ip_time_frc2us = shift_for_gran(1, ipts->khz);

  /* The Linux kernel ticks the initial sequence number that it would use for
   * a given tuple every 64 ns.  Onload does the same, when using
   * EF_TCP_ISN_MODE=clocked. However in EF_TCP_ISN_MODE=clocked+cache our use
   * of the clock-driven ISN is slightly different, though, as we remember
   * old sequence numbers in the case where the clock-driven ISN is not known
   * to be safe.  As such, we don't need it to tick so fast, and so we let it
   * tick at most every 256 ns.  This means that it takes more than eight
   * minutes to wrap by half, while four minutes is our assumed maximum
   * peer-MSL.  This in practice reduces the cases in which we have to
   * remember old sequence numbers. */
  us2isn = NI_OPTS(netif).tcp_isn_mode != 0 ? 2 : 4;
  ipts->ci_ip_time_frc2isn = ipts->ci_ip_time_frc2us > us2isn ?
                             ipts->ci_ip_time_frc2us - us2isn : 0;

  ci_ip_time_initial_sync(ipts);
  ipts->sched_ticks = ci_ip_time_now(netif);
  ipts->closest_timer = ipts->sched_ticks + IPTIME_INFINITY;

  /* To convert ms to ticks we will use fixed point arithmetic
   * Calculate conversion factor, which is expected to be in range <0.5,1]
   * */
  ipts->ci_ip_time_ms2tick_fxp =
    (((ci_uint64)ipts->khz) << 32) /
    (1u << ipts->ci_ip_time_frc2tick);
  ci_assert_gt(ipts->ci_ip_time_ms2tick_fxp, 1ull<<31);
  ci_assert_le(ipts->ci_ip_time_ms2tick_fxp, 1ull<<32);

  /* set module specific time constants dependent on frc2tick */
  ci_tcp_timer_init(netif);

  ci_ni_dllist_init(netif, &ipts->fire_list,
		    oo_ptr_to_statep(netif, &ipts->fire_list),
                    "fire");
  
  /* Initialise the wheel lists. */
  for( i=0; i < CI_IPTIME_WHEELSIZE; i++)
    ci_ni_dllist_init(netif, &ipts->warray[i],
		      oo_ptr_to_statep(netif, &ipts->warray[i]),
                      "timw");
}
예제 #3
0
void ci_netif_filter_init(ci_netif_filter_table* tbl, int size_lg2)
{
  unsigned i;
  unsigned size = ci_pow2(size_lg2);

  ci_assert(tbl);
  ci_assert_gt(size_lg2, 0);
  ci_assert_le(size_lg2, 32);

  tbl->table_size_mask = size - 1;

  for( i = 0; i < size; ++i ) {
    tbl->table[i].id = EMPTY;
    tbl->table[i].route_count = 0;
    tbl->table[i].laddr = 0;
  }
}
예제 #4
0
int onload_zc_send(struct onload_zc_mmsg* msgs, int mlen, int flags)
{
  int done = 0, last_fd = -1, i;
  citp_lib_context_t lib_context;
  citp_fdinfo* fdi = NULL;

  Log_CALL(ci_log("%s(%p, %d, %x)", __FUNCTION__, msgs, mlen, flags));

  citp_enter_lib(&lib_context);

  for( i = 0; i < mlen; ++i ) {
    if( msgs[i].fd != last_fd ) {
      if( fdi != NULL )
        citp_fdinfo_release_ref(fdi, 0);
      fdi = citp_fdtable_lookup(msgs[i].fd);
      if( fdi == NULL ) {
        msgs[i].rc = -ESOCKTNOSUPPORT;
        ++done;
        goto out;
      }
      last_fd = msgs[i].fd;
    }

    CI_TRY_EQ( citp_fdinfo_get_ops(fdi)->zc_send(fdi, &msgs[i], flags), 1);
    /* If we got an error, return the number of msgs that have had
     * rc set and exit.  fd_op should have updated msgs.rc appropriately
     */
    ++done;
    if( msgs[i].rc < 0 )
      goto out;
  }

 out:

  if( fdi != NULL )
    citp_fdinfo_release_ref(fdi, 0);

  citp_exit_lib(&lib_context, TRUE);

  ci_assert_gt(done, 0);
  ci_assert_le(done, mlen);

  Log_CALL_RESULT(done);
  return done;
}
예제 #5
0
ssize_t linux_tcp_helper_fop_sendpage(struct file* filp, struct page* page, 
                                      int offset, size_t size,
                                      loff_t* ppos, int flags)
{
  ci_private_t* priv = filp->private_data;
  tcp_helper_resource_t* trs = efab_priv_to_thr(priv);
  ci_sock_cmn* s;

  OO_DEBUG_VERB(ci_log("%s: %d:%d offset=%d size=%d flags=%x", __FUNCTION__,
                       NI_ID(&trs->netif), OO_SP_FMT(priv->sock_id), offset,
                       (int) size, flags));

  ci_assert(page);
  ci_assert_ge(offset, 0);
  ci_assert_gt(size, 0);
  ci_assert_le(offset + size, CI_PAGE_SIZE);

#ifndef MSG_SENDPAGE_NOTLAST
  /* "flags" is really "more".  Convert it. */
  if( flags )
    flags = MSG_MORE;

  /* [more] is sometimes true even for the last page.  We get a little
  ** closer to the truth by spotting that we're not reading to the end of
  ** the page. - seen on 2.6.18, but not on 2.6.26 or later
  */
  if( offset + size < CI_PAGE_SIZE && flags )
    flags = 0;
#endif

  s = SP_TO_SOCK(&trs->netif, priv->sock_id);
  if(CI_LIKELY( s->b.state & CI_TCP_STATE_TCP_CONN ))
    return sendpage_copy(&trs->netif,SOCK_TO_TCP(s),page,offset,size,flags);
  else
    /* Closed or listening.  Return epipe.  Do not send SIGPIPE, because
    ** Linux will do it for us. */
    return -s->tx_errno;
}
예제 #6
0
파일: ip_tx.c 프로젝트: davenso/openonload
void ci_ip_send_tcp_slow(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* pkt)
{
  /* We're here because the ipcache is not valid. */
  int rc, prev_mtu = ts->s.pkt.mtu;

  cicp_user_retrieve(ni, &ts->s.pkt, &ts->s.cp);

  if( ts->s.pkt.status == retrrc_success ) {
    if( ts->s.pkt.mtu != prev_mtu )
      CI_PMTU_TIMER_NOW(ni, &ts->pmtus);
    ci_ip_set_mac_and_port(ni, &ts->s.pkt, pkt);
    ci_netif_send(ni, pkt);
    return;
  }
  else if( ts->s.pkt.status == retrrc_localroute &&
           (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) )
    ci_ip_local_send(ni, pkt, &ts->s, OO_SP_NULL);

  /* For TCP, we want the ipcache to only be valid when onloadable. */
  ci_ip_cache_invalidate(&ts->s.pkt);

  switch( ts->s.pkt.status ) {
  case retrrc_nomac:
    rc = 0;
    /* If we resend SYN, and there is no MAC - it means ARP failed.
     * Connect() should return with EHOSTUNREACH.
     * We verify twice - on the first and the second retransmit.
     * Very hackish.
     */
    if( ts->s.b.state == CI_TCP_SYN_SENT ) {
      if( ts->retransmits == 1 )
        ts->tcpflags |= CI_TCPT_FLAG_NO_ARP;
      else if( (ts->tcpflags & CI_TCPT_FLAG_NO_ARP) &&
               ts->retransmits == 2 ) {
        ci_tcp_drop(ni, ts, EHOSTUNREACH);
        return;
      }
    }
    cicp_user_defer_send(ni, retrrc_nomac, &rc, OO_PKT_P(pkt), 
                         ts->s.pkt.ifindex);
    ++ts->stats.tx_nomac_defer;
    return;
  case retrrc_noroute:
    rc = -EHOSTUNREACH;
    break;
  case retrrc_alienroute:
  case retrrc_localroute:
    /* ?? TODO: inc some stat */
    return;
  default:
    ci_assert_lt(ts->s.pkt.status, 0);
    if( ts->s.pkt.status < 0 )
      rc = ts->s.pkt.status;
    else
      /* belt and braces... */
      rc = 0;
  }

  ci_assert_le(rc, 0);

  /* In most cases, we should ignore return code; the packet will be resend
   * later, because of RTO.  However, in SYN-SENT we should pass errors to
   * user.  At the same time, we should not pass ENOBUFS to user - it is
   * pretty internal problem of cplane, so we should try again.  Possibly,
   * there may be other internal problems, such as ENOMEM.
   *
   * Also, do not break connection when the first SYN fails:
   * - Linux does not do it;
   * - cplane has some latency, so we have false positives here;
   * - ci_tcp_connect() does not expect it.
   */
  if( ts->s.b.state == CI_TCP_SYN_SENT && rc < 0 && ts->retransmits > 0 &&
      (rc == -EHOSTUNREACH || rc == -ENETUNREACH || rc == -ENETDOWN) )
    ci_tcp_drop(ni, ts, -rc);
}
예제 #7
0
int
oo_iobufset_resource_alloc(struct oo_buffer_pages * pages, struct efrm_pd *pd,
                           struct oo_iobufset **iobrs_out, uint64_t *hw_addrs,
                           int reset_pending)
{
  struct oo_iobufset *iobrs;
  int rc;
  int gfp_flag = (in_atomic() || in_interrupt()) ? GFP_ATOMIC : GFP_KERNEL;
  int size = sizeof(struct oo_iobufset) + pages->n_bufs * sizeof(dma_addr_t);
  int nic_order;
  void **addrs;
  unsigned int i;

  ci_assert(iobrs_out);
  ci_assert(pd);

  if( size <= PAGE_SIZE ) {
    iobrs = kmalloc(size, gfp_flag);
    if( iobrs == NULL )
      return -ENOMEM;
    iobrs->dma_addrs = (void *)(iobrs + 1);
  }
  else {
    /* Avoid multi-page allocations */
    iobrs = kmalloc(sizeof(struct oo_iobufset), gfp_flag);
    if( iobrs == NULL )
      return -ENOMEM;
    ci_assert_le(pages->n_bufs * sizeof(dma_addr_t), PAGE_SIZE);
    iobrs->dma_addrs = kmalloc(pages->n_bufs * sizeof(dma_addr_t), gfp_flag);
    if( iobrs->dma_addrs == NULL ) {
      kfree(iobrs);
      return -ENOMEM;
    }

  }

  oo_atomic_set(&iobrs->ref_count, 1);
  iobrs->pd = pd;
  iobrs->pages = pages;

  nic_order = EFHW_GFP_ORDER_TO_NIC_ORDER(compound_order(pages->pages[0]));

  ci_assert_le(sizeof(void *) * pages->n_bufs, PAGE_SIZE);
  addrs = kmalloc(sizeof(void *) * pages->n_bufs, gfp_flag);
  if (addrs == NULL)
  {
    rc = -ENOMEM;
    goto fail;
  }

  for (i = 0; i < pages->n_bufs; i++) {
    addrs[i] = page_address(pages->pages[i]);
  }

  rc = efrm_pd_dma_map(iobrs->pd, pages->n_bufs,
		       nic_order,
		       addrs, sizeof(addrs[0]),
		       &iobrs->dma_addrs[0], sizeof(iobrs->dma_addrs[0]),
		       hw_addrs, sizeof(hw_addrs[0]),
		       put_user_fake, &iobrs->buf_tbl_alloc, reset_pending);
  kfree(addrs);

  if( rc < 0 )
    goto fail;

  OO_DEBUG_VERB(ci_log("%s: [%p] %d pages", __FUNCTION__,
                       iobrs, iobrs->pages->n_bufs));

  efrm_resource_ref(efrm_pd_to_resource(pd));
  oo_atomic_inc(&pages->ref_count);
  *iobrs_out = iobrs;
  return 0;

fail:
  oo_iobufset_free_memory(iobrs);
  return rc;
}
예제 #8
0
static int oo_bufpage_alloc(struct oo_buffer_pages **pages_out,
                            int user_order, int low_order,
                            int *flags, int gfp_flag)
{
  int i;
  struct oo_buffer_pages *pages;
  int n_bufs = 1 << (user_order - low_order);
  int size = sizeof(struct oo_buffer_pages) + n_bufs * sizeof(struct page *);

  if( size < PAGE_SIZE ) {
    pages = kmalloc(size, gfp_flag);
    if( pages == NULL )
      return -ENOMEM;
    pages->pages = (void *)(pages + 1);
  }
  else {
    /* Avoid multi-page allocations */
    pages = kmalloc(sizeof(struct oo_buffer_pages), gfp_flag);
    if( pages == NULL )
      return -ENOMEM;
    ci_assert_le(n_bufs * sizeof(struct page *), PAGE_SIZE);
    pages->pages = kmalloc(n_bufs * sizeof(struct page *), gfp_flag);
    if( pages->pages == NULL ) {
      kfree(pages);
      return -ENOMEM;
    }
  }

  pages->n_bufs = n_bufs;
  oo_atomic_set(&pages->ref_count, 1);

#ifdef OO_DO_HUGE_PAGES
  if( (*flags & (OO_IOBUFSET_FLAG_HUGE_PAGE_TRY |
                 OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE)) &&
      gfp_flag == GFP_KERNEL &&
      low_order == HPAGE_SHIFT - PAGE_SHIFT ) {
    if (oo_bufpage_huge_alloc(pages, flags) == 0) {
      *pages_out = pages;
      return 0;
    }
  }
  pages->shmid = -1;
  if( *flags & OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE ) {
    ci_assert_equal(low_order, HPAGE_SHIFT - PAGE_SHIFT);
    return -ENOMEM;
  }
#endif

  if( low_order > 0 ) {
#ifdef OO_HAVE_COMPOUND_PAGES
    /* __GFP_COMP hint stolen from http://samirdas.blog.com/
     * __GFP_NOWARN is necessary because we properly handle high-order page
     * allocation failure by allocating pages one-by-one. */
    gfp_flag |= __GFP_COMP | __GFP_NOWARN;
#else
    return -EINVAL;
#endif
  }

  for( i = 0; i < n_bufs; ++i ) {
    pages->pages[i] = alloc_pages_node(numa_node_id(), gfp_flag, low_order);
    if( pages->pages[i] == NULL ) {
      OO_DEBUG_VERB(ci_log("%s: failed to allocate page (i=%u) "
                           "user_order=%d page_order=%d",
                           __FUNCTION__, i, user_order, low_order));
      pages->n_bufs = i;
      oo_iobufset_free_pages(pages);
      return -ENOMEM;
    }
    memset(page_address(pages->pages[i]), 0, PAGE_SIZE << low_order);
  }
  
  *pages_out = pages;
  return 0;
}