Ejemplo n.º 1
0
void
oof_cb_sw_filter_apply(ci_netif* ni)
{
  struct oof_cb_sw_filter_op* op;

  ci_assert(ci_netif_is_locked(ni));

  spin_lock_bh(&ni->swf_update_lock);
  for( op = ni->swf_update_first; op != NULL; op = ni->swf_update_first) {

    ni->swf_update_first = op->next;
    if( op->next == NULL )
      ni->swf_update_last = NULL;
    spin_unlock_bh(&ni->swf_update_lock);

    if( op->op == OOF_CB_SW_FILTER_OP_ADD ) {
      ci_netif_filter_insert(ni, op->sock_id, op->laddr, op->lport,
                             op->raddr, op->rport, op->protocol);
    }
    else {
      ci_netif_filter_remove(ni, op->sock_id, op->laddr, op->lport,
                             op->raddr, op->rport, op->protocol);
    }

    ci_free(op);
    spin_lock_bh(&ni->swf_update_lock);
  }
  spin_unlock_bh(&ni->swf_update_lock);
}
Ejemplo n.º 2
0
/****************************************************************************
 *
 * close - cleanup filedescriptor and private state
 *
 ****************************************************************************/
static int
ci_char_fop_close(struct inode *inode, struct file *filp) 
{  
  ci_private_char_t *priv = (ci_private_char_t *) filp->private_data;

  EFCH_TRACE("%s:", __FUNCTION__);

  /* cleanup private state */
  filp->private_data = 0;
  ci_resource_table_dtor(&priv->rt);
  ci_free(priv);

  return 0;  
} 
Ejemplo n.º 3
0
/*! Tear down a private_t */
void
ci_resource_table_dtor( ci_resource_table_t *rt )
{
    efch_resource_t *rs;
    unsigned i;

    ci_assert(rt);

#if CI_CFG_PRIVATE_T_DEBUG_LIST
    ci_lock_lock(&priv_list_lock);
    list_del(&(rt->priv_list));
    ci_lock_unlock(&priv_list_lock);
#endif

    for( i = 0; i < rt->resource_table_highwater; i++ ) {
        rs = rt->resource_table[i];
        ci_assert(rs != NULL);
        efch_resource_free(rs);
        CI_DEBUG(rt->resource_table[i] = NULL);
    }
    if( rt->resource_table != rt->resource_table_static )
        ci_free(rt->resource_table);
    CI_DEBUG_ZERO(rt);
}
Ejemplo n.º 4
0
static int
onload_alloc_file(tcp_helper_resource_t *thr, oo_sp ep_id, int flags,
                  int fd_type)
{
  struct qstr name = { .name = "" };
#ifdef EFX_HAVE_STRUCT_PATH
  struct path path;
#define my_dentry path.dentry
#else
  struct dentry *dentry;
#define my_dentry dentry
#endif
  struct file *file;
  int fd;
  struct inode *inode;
  ci_private_t *priv;
  struct file_operations *fops;

  fops = oo_fops_by_type(fd_type);
  if( fops == NULL )
    return -EINVAL;
  ci_assert_equal(fops->owner, THIS_MODULE);

  inode = new_inode(onload_mnt->mnt_sb);
  if( inode == NULL )
    return -ENOMEM;
#ifdef EFX_FSTYPE_HAS_MOUNT
  inode->i_ino = get_next_ino();
#endif
  if( fd_type == CI_PRIV_TYPE_NETIF )
    inode->i_mode = S_IRWXUGO;
  if( fd_type == CI_PRIV_TYPE_TCP_EP || fd_type == CI_PRIV_TYPE_UDP_EP )
    inode->i_mode = 
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
        /* in 2.6.18 this flag makes us "socket" and sendmsg crashes;
         * see sock_from_file() */
                    S_IFSOCK |
#endif
                    S_IRWXUGO;
  else
    inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
  inode->i_uid = current_fsuid();
  inode->i_gid = current_fsgid();
  priv = &container_of(inode, struct onload_inode, vfs_inode)->priv;
  priv->thr = thr;
  priv->sock_id = ep_id;
  priv->fd_type = fd_type;

  fd = get_unused_fd();
  if( fd < 0 ) {
    iput(inode);
    return fd;
  }
  /*ci_log("[%d]%s(%d:%d) return %d priv=%p", current->pid, __func__,
         thr->id, ep_id, fd, priv);*/

#ifdef EFX_FSTYPE_HAS_MOUNT
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37)
  path.dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name);
  if( path.dentry != NULL )
    path.dentry->d_op = &onloadfs_dentry_operations;
#else
  path.dentry = d_alloc_pseudo(onload_mnt->mnt_sb, &name);
#endif
#else /* EFX_FSTYPE_HAS_MOUNT */
#ifdef EFX_HAVE_D_DNAME
  my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name);
#else
  {
    char str[32];
    name.len = onloadfs_name(&container_of(inode, struct onload_inode,
                                           vfs_inode)->priv,
                             str, sizeof(str));
    name.name = str;
    name.hash = inode->i_ino;
    my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name);
  }
#endif
#endif /* EFX_FSTYPE_HAS_MOUNT */

  if( my_dentry == NULL ) {
    put_unused_fd(fd);
    iput(inode);
    return -ENOMEM;
  }

#if !defined(EFX_FSTYPE_HAS_MOUNT) || defined(EFX_OLD_MOUNT_PSEUDO)
  my_dentry->d_op = &onloadfs_dentry_operations;
#if !defined(EFX_HAVE_STRUCT_PATH) && defined(EFX_HAVE_D_DNAME)
  my_dentry->d_flags &= ~DCACHE_UNHASHED;
#endif
#endif
  d_instantiate(my_dentry, inode);
#ifndef EFX_HAVE_D_DNAME
  d_rehash(my_dentry);
#endif
  inode->i_fop = fops;

#ifdef EFX_HAVE_STRUCT_PATH
  path.mnt = mntget(onload_mnt);
  file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops);
#else
  file = alloc_file(onload_mnt, dentry, FMODE_READ | FMODE_WRITE, fops);
#endif
  if( file == NULL) {
#ifdef EFX_HAVE_STRUCT_PATH
    path_put(&path);
#else
    dput(dentry);
    iput(inode);
#endif
    put_unused_fd(fd);
    return -ENFILE;
  }

  priv->_filp = file;
  file->f_flags = O_RDWR | (flags & O_NONBLOCK);
  file->f_pos = 0;
  file->private_data = priv;

  if( flags & O_CLOEXEC ) {
    struct files_struct *files = current->files;
    struct fdtable *fdt;
    spin_lock(&files->file_lock);
    fdt = files_fdtable(files);
    rcu_assign_pointer(fdt->fd[fd], file);
    efx_set_close_on_exec(fd, fdt);
    spin_unlock(&files->file_lock);
  } else
    fd_install(fd, file);
  try_module_get(THIS_MODULE);

  ci_assert_equal(file->f_op, fops);
  return fd;
}

void onload_priv_free(ci_private_t *priv)
{
  if( priv->_filp->f_vfsmnt != onload_mnt)
    ci_free(priv);
  /* inode will free the priv automatically */
}


int
oo_create_fd(tcp_helper_endpoint_t* ep, int flags, int fd_type)
{
  int fd;
  tcp_helper_resource_t *trs = ep->thr;
  citp_waitable_obj *wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id);

  efab_thr_ref(trs);
  fd = onload_alloc_file(trs, ep->id, flags, fd_type);
  if( fd < 0 ) {
    efab_thr_release(trs);
    OO_DEBUG_ERR(ci_log("%s: onload_alloc_file failed (%d)", __FUNCTION__, fd));
    return fd;
  }
  ci_atomic32_and(&wo-> waitable.sb_aflags,
                  ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ));

  return fd;
}
Ejemplo n.º 5
0
/*
** promote a synrecv structure to an established socket
**
** Assumes that the caller will handle a fail if we can't allocate a new
** tcp_state structure due to memory pressure or the like
*/
int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls,
                               ci_tcp_state_synrecv* tsr,
                               ci_ip_cached_hdrs* ipcache,
                               ci_tcp_state** ts_out)
{
  int rc = 0;
  
  ci_assert(netif);
  ci_assert(tls);
  ci_assert(tls->s.b.state == CI_TCP_LISTEN);
  ci_assert(tsr);

  if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) {
    ci_tcp_state* ts;

    /* grab a tcp_state structure that will go onto the accept queue.  We take
     * from the cache of EPs if any are available
     */
    ts = get_ts_from_cache (netif, tsr, tls); 
    if( !ts ) {
      /* None on cache; try allocating a new ts */
      ts = ci_tcp_get_state_buf(netif);
#if CI_CFG_FD_CACHING
      if( ts == NULL ) {
        /* We've reaped.  Did this result in any being cached */
        ts = get_ts_from_cache(netif, tsr, tls);
        if (ts == NULL ) {
          /* No -- try again to allocate. */
          ts = ci_tcp_get_state_buf(netif);
        }
        else {
          CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap);
        }
      }
#endif
      if( ts == NULL ) {
        LOG_TV(ci_log("%s: [%d] out of socket buffers",
                      __FUNCTION__, NI_ID(netif)));
        CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock);
        CI_SET_SO_ERROR(&tls->s, ENOMEM);
        citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
        return -ENOMEM;
      }


      ci_assert(ci_tcp_is_cached(ts) ||
                (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
    }

#ifdef ONLOAD_OFE
    ts->s.ofe_code_start = tls->ofe_promote;
#endif

    if( ! ci_tcp_is_cached(ts) ) {
      /* Need to initialise address information for use when setting filters */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      /* "borrow" filter from listening socket.  For loopback socket, we
       * do not need filters, but we have to take a reference of the OS
       * socket. */
      rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice,
                                 S_SP(tls));
      if( rc < 0 ) {
        LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc));
        /* Either put this back on the list (at the head) or free it */
        ci_tcp_state_free(netif, ts);
        return rc;
      }
    }
#if CI_CFG_FD_CACHING
    else {
      /* Now set the s/w filter.  We leave the hw filter in place for cached
       * EPS. This will probably not have the correct raddr and rport, but as
       * it's sharing the listening socket's filter that's not a problem.  It
       * will be updated if this is still around when the listener is closed.
       */
      rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr,
                                  sock_lport_be16(&tls->s), tsr->r_addr,
                                  tsr->r_port, tcp_protocol(ts));

      if (rc < 0) {
        /* Bung it back on the cache list */
        LOG_EP(ci_log("Unable to create s/w filter!"));
        ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link);
        return rc;
      }

      /* Need to initialise address information.  We do this after trying to
       * insert the sw filter, so we can push the tcp state back onto the
       * cache queue with as few changes as possible if we fail to add the
       * sw filter.
       */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd));
      ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link);
    }
#endif

    ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts)));
    ci_assert(ts->s.b.state == CI_TCP_CLOSED);
    ts->s.domain = tls->s.domain;

    cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache);
    ci_pmtu_state_init(netif, &ts->s, &ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
    ci_pmtu_set(netif, &ts->pmtus,
                CI_MIN(ts->s.pkt.mtu,
                       tsr->tcpopts.smss + sizeof(ci_tcp_hdr)
                         + sizeof(ci_ip4_hdr)));

    /* If we've got SYN via local route, we can handle it */
    ci_assert_equiv(ts->s.pkt.status == retrrc_localroute,
                    OO_SP_NOT_NULL(tsr->local_peer));
    if( ts->s.pkt.status == retrrc_localroute )
      ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE;

    ts->amss = tsr->amss;

    /* options and flags */
    ts->tcpflags = 0;
    ts->tcpflags |= tsr->tcpopts.flags;
    ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED;
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr);
    if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) {
      ts->snd_wscl = tsr->tcpopts.wscl_shft;
      ts->rcv_wscl = tsr->rcv_wscl;
    } else {
      ts->snd_wscl = ts->rcv_wscl = 0u;
    }
    CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl);
    CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl);

    /* Send and receive sequence numbers */
    tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
      tsr->snd_isn + 1;
    ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0);
    ci_tcp_rx_set_isn(ts, tsr->rcv_nxt);
    tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1);

    if( ts->tcpflags & CI_TCPT_FLAG_TSO ) {
      ts->incoming_tcp_hdr_len += 12;
      ts->outgoing_hdrs_len += 12;
      ts->tspaws = ci_tcp_time_now(netif);
      ts->tsrecent = tsr->tspeer;
      ts->tslastack = tsr->rcv_nxt;
    }
    else {
      /* Must be after initialising snd_una. */
      ci_tcp_clear_rtt_timing(ts);
      ts->timed_ts = tsr->timest;
    }
    /* SACK has nothing to be done. */

    /* ?? ECN */
    ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr)));

    ts->smss = tsr->tcpopts.smss;
    ts->c.user_mss = tls->c.user_mss;
    if (ts->c.user_mss && ts->c.user_mss < ts->smss)
      ts->smss = ts->c.user_mss;
#if CI_CFG_LIMIT_SMSS
    ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__);
#endif
    ci_assert(ts->smss>0);
    ci_tcp_set_eff_mss(netif, ts);
    ci_tcp_set_initialcwnd(netif, ts);

    /* Copy socket options & related fields that should be inherited. 
     * Note: Windows does not inherit rcvbuf until the call to accept 
     * completes. The assumption here is that all options can be
     * inherited at the same time (most won't have an effect until there
     * is a socket available for use by the app.).
     */
    ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)");

    /* NB. Must have already set peer (which we have). */
    ci_tcp_set_established_state(netif, ts);
    CITP_STATS_NETIF(++netif->state->stats.synrecv2established);
  
    ci_assert(ts->ka_probes == 0);
    ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts));
    ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);

    /* Remove the synrecv structure from the listen queue, and free the
    ** buffer. */
    if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE )
      ci_free(tsr);
    else {
      ci_tcp_listenq_remove(netif, tls, tsr);
      ci_tcp_synrecv_free(netif, tsr);
    }

    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
    ci_tcp_acceptq_put(netif, tls, &ts->s.b);

    LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x",
               LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags);
           log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x",
               LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts),
               tcp_snd_una(ts),
               tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts)));

    citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
    *ts_out = ts;
    return 0;
  }