Esempio n. 1
0
static int
efab_tcp_drop_from_acceptq(ci_private_t *priv, void *arg)
{
  struct oo_op_tcp_drop_from_acceptq *carg = arg;
  tcp_helper_resource_t *thr;
  tcp_helper_endpoint_t *ep;
  citp_waitable *w;
  ci_tcp_state *ts;
  int rc = -EINVAL;

  /* find stack */
  rc = efab_thr_table_lookup(NULL, carg->stack_id,
                                 EFAB_THR_TABLE_LOOKUP_CHECK_USER |
                                 EFAB_THR_TABLE_LOOKUP_NO_UL,
                                 &thr);

  if( rc < 0 )
    return rc;
  ci_assert( thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND );

  /* find endpoint and drop OS socket */
  ep = ci_trs_get_valid_ep(thr, carg->sock_id);
  if( ep == NULL )
    goto fail1;

  w = SP_TO_WAITABLE(&thr->netif, carg->sock_id);
  if( !(w->state & CI_TCP_STATE_TCP) || w->state == CI_TCP_LISTEN )
    goto fail2;
  ts = SP_TO_TCP(&thr->netif, carg->sock_id);
  ci_assert(ep->os_port_keeper);
  ci_assert_equal(ep->os_socket, NULL);

  LOG_TV(ci_log("%s: send reset to non-accepted connection", __FUNCTION__));

  /* copy from ci_tcp_listen_shutdown_queues() */
  ci_assert(ts->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ);
  rc = ci_netif_lock(&thr->netif);
  if( rc != 0 ) {
    ci_assert_equal(rc, -EINTR);
    rc = -ERESTARTSYS;
    goto fail2;
  }
  ci_bit_clear(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
  /* We have no way to close this connection from the other side:
   * there was no RST from peer. */
  ci_assert_nequal(ts->s.b.state, CI_TCP_CLOSED);
  ci_assert_nequal(ts->s.b.state, CI_TCP_TIME_WAIT);
  ci_tcp_send_rst(&thr->netif, ts);
  ci_tcp_drop(&thr->netif, ts, ECONNRESET);
  ci_assert_equal(ep->os_port_keeper, NULL);
  ci_netif_unlock(&thr->netif);
  efab_tcp_helper_k_ref_count_dec(thr, 1);
  return 0;

fail1:
  efab_thr_release(thr);
fail2:
  ci_log("%s: inconsistent ep %d:%d", __func__, carg->stack_id, carg->sock_id);
  return rc;
}
Esempio n. 2
0
static int
tcp_helper_alloc_rsop(ci_private_t *priv, void *arg)
{
  /* Using lock to serialize multiple processes trying to create
   * stacks with same name.
   */
static DEFINE_MUTEX(ctor_mutex);

  ci_resource_onload_alloc_t *alloc = arg;
  tcp_helper_resource_t* trs;
  int rc;

  mutex_lock(&ctor_mutex);
  rc = tcp_helper_alloc_ul(alloc, -1, &trs);
  if( rc == 0 ) {
    rc = oo_priv_set_stack(priv, trs);
    if( rc == 0 ) {
      priv->fd_type = CI_PRIV_TYPE_NETIF;
      priv->sock_id = OO_SP_NULL;
    }
    else
      efab_thr_release(trs);
  }
  mutex_unlock(&ctor_mutex);
  return rc;
}
Esempio n. 3
0
int tcp_helper_cluster_alloc_thr(const char* cname,
                                 int cluster_size,
                                 int cluster_restart,
                                 int ni_flags,
                                 const ci_netif_config_opts* ni_opts,
                                 tcp_helper_resource_t** thr_out)
{
  tcp_helper_cluster_t* thc = NULL;
  tcp_helper_resource_t* thr = NULL;
  int alloced = 0;
  int rc = -ENOENT;
  int thc_flags = tcp_helper_cluster_thc_flags(ni_opts);
  char name[CI_CFG_CLUSTER_NAME_LEN + 1];


  mutex_lock(&thc_init_mutex);
  mutex_lock(&thc_mutex);

  gen_cluster_name(cname, name);

  rc = thc_search_by_name(name, 0, 0, ci_geteuid(), &thc);
  if( rc < 0 )
    goto fail;
  if( rc == 1 )
    rc = 0;
  else
    rc = -ENOENT;

  if( rc == -ENOENT ) {
    rc = thc_alloc(name, 0, 0, ci_geteuid(), cluster_size, thc_flags, &thc);
    if( rc < 0 )
      goto fail;
    alloced = 1;
  }
  if( rc == 0 )
    /* Find a suitable stack within the cluster to use */
    rc = thc_alloc_thr(thc, cluster_restart, ni_opts, ni_flags, &thr);
 fail:
  mutex_unlock(&thc_mutex);
  mutex_unlock(&thc_init_mutex);
  if( rc == 0 && alloced && thc_flags & THC_FLAG_TPROXY ) {
    rc = thc_install_tproxy(thc, ni_opts->scalable_filter_ifindex);
    if( rc != 0 ) {
      efab_thr_release(thr);
      /* this should have freed the thc without other references */
      alloced = 0;
    }
  }
  if( rc != 0 && alloced )
    tcp_helper_cluster_release(thc, NULL);
  if( rc == 0 )
   *thr_out = thr;
  return rc;
}
Esempio n. 4
0
int
oo_create_stack_fd(tcp_helper_resource_t *thr)
{
  int fd;

  efab_thr_ref(thr);
  fd = onload_alloc_file(thr, OO_SP_NULL, O_CLOEXEC, CI_PRIV_TYPE_NETIF);
  if( fd < 0 ) {
    efab_thr_release(thr);
    OO_DEBUG_ERR(ci_log("%s: onload_alloc_file failed (%d)", __FUNCTION__, fd));
    return fd;
  }
  return fd;
}
Esempio n. 5
0
static int
oo_priv_lookup_and_attach_stack(ci_private_t* priv, const char* name,
                                unsigned id)
{
  tcp_helper_resource_t* trs;
  int rc;
  if( (rc = efab_thr_table_lookup(name, id,
                                  EFAB_THR_TABLE_LOOKUP_CHECK_USER,
                                  &trs)) == 0 ) {
    if( (rc = oo_priv_set_stack(priv, trs)) == 0 ) {
      priv->fd_type = CI_PRIV_TYPE_NETIF;
      priv->sock_id = OO_SP_NULL;
    }
    else
      efab_thr_release(trs);
  }
  return rc;
}
/* Locking policy:
 * Enterance: priv->thr->netif is assumed to be locked.
 * Exit: all stacks (the client stack and the listener's stack) are
 * unlocked.
 */
int efab_tcp_loopback_connect(ci_private_t *priv, void *arg)
{
  struct oo_op_loopback_connect *carg = arg;
  ci_netif *alien_ni = NULL;
  oo_sp tls_id;

  ci_assert(ci_netif_is_locked(&priv->thr->netif));
  carg->out_moved = 0;

  if( !CI_PRIV_TYPE_IS_ENDPOINT(priv->fd_type) )
    return -EINVAL;
  if( NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
      CITP_TCP_LOOPBACK_TO_CONNSTACK &&
      NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
      CITP_TCP_LOOPBACK_TO_LISTSTACK &&
      NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
      CITP_TCP_LOOPBACK_TO_NEWSTACK) {
    ci_netif_unlock(&priv->thr->netif);
    return -EINVAL;
  }

  while( iterate_netifs_unlocked(&alien_ni) == 0 ) {

    if( !efab_thr_can_access_stack(netif2tcp_helper_resource(alien_ni),
                                   EFAB_THR_TABLE_LOOKUP_CHECK_USER) )
      continue; /* no permission to look in here */

    if( NI_OPTS(alien_ni).tcp_server_loopback == CITP_TCP_LOOPBACK_OFF )
      continue; /* server does not accept loopback connections */

    if( NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
        CITP_TCP_LOOPBACK_TO_LISTSTACK &&
        NI_OPTS(alien_ni).tcp_server_loopback !=
        CITP_TCP_LOOPBACK_ALLOW_ALIEN_IN_ACCEPTQ )
      continue; /* options of the stacks to not match */

    if( NI_OPTS(&priv->thr->netif).tcp_client_loopback !=
        CITP_TCP_LOOPBACK_TO_LISTSTACK &&
        !efab_thr_user_can_access_stack(alien_ni->uid, alien_ni->euid,
                                        &priv->thr->netif) )
      continue; /* server can't accept our socket */

    tls_id = ci_tcp_connect_find_local_peer(alien_ni, carg->dst_addr,
                                            carg->dst_port);

    if( OO_SP_NOT_NULL(tls_id) ) {
      int rc;

      /* We are going to exit in this or other way: get ref and
       * drop kref of alien_ni */
      efab_thr_ref(netif2tcp_helper_resource(alien_ni));
      iterate_netifs_unlocked_dropref(alien_ni);

      switch( NI_OPTS(&priv->thr->netif).tcp_client_loopback ) {
      case CITP_TCP_LOOPBACK_TO_CONNSTACK:
        /* connect_lo_toconn unlocks priv->thr->netif */
        carg->out_rc =
            ci_tcp_connect_lo_toconn(&priv->thr->netif, priv->sock_id,
                                     carg->dst_addr, alien_ni, tls_id);
        efab_thr_release(netif2tcp_helper_resource(alien_ni));
        return 0;

      case CITP_TCP_LOOPBACK_TO_LISTSTACK:
        /* Nobody should be using this socket, so trylock should succeed.
         * Overwise we hand over the socket and do not accelerate this
         * loopback connection. */
        rc = ci_sock_trylock(&priv->thr->netif,
                             SP_TO_WAITABLE(&priv->thr->netif,
                                            priv->sock_id));
        if( rc == 0 ) {
          ci_netif_unlock(&priv->thr->netif);
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          return -ECONNREFUSED;
        }

        /* move_to_alien changes locks - see comments near it */
        rc = efab_file_move_to_alien_stack(priv, alien_ni);
        if( rc != 0 ) {
          /* error - everything is already unlocked */
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          /* if we return error, UL will hand the socket over. */
          return rc;
        }
        /* now alien_ni is locked */

        /* Connect again, using new endpoint */
        carg->out_rc =
            ci_tcp_connect_lo_samestack(
                            alien_ni,
                            SP_TO_TCP(alien_ni,
                                      SP_TO_WAITABLE(&priv->thr->netif,
                                                     priv->sock_id)
                                      ->moved_to_sock_id),
                            tls_id);
        ci_netif_unlock(alien_ni);
        carg->out_moved = 1;
        return 0;


      case CITP_TCP_LOOPBACK_TO_NEWSTACK:
      {
        tcp_helper_resource_t *new_thr;
        ci_resource_onload_alloc_t alloc;

        /* create new stack
         * todo: no hardware interfaces are necessary */
        strcpy(alloc.in_version, ONLOAD_VERSION);
        strcpy(alloc.in_uk_intf_ver, oo_uk_intf_ver);
        alloc.in_name[0] = '\0';
        alloc.in_flags = 0;

        rc = tcp_helper_alloc_kernel(&alloc, &NI_OPTS(&priv->thr->netif), 0,
                                     &new_thr);
        if( rc != 0 ) {
          ci_netif_unlock(&priv->thr->netif);
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          return -ECONNREFUSED;
        }

        rc = ci_sock_trylock(&priv->thr->netif,
                             SP_TO_WAITABLE(&priv->thr->netif,
                                            priv->sock_id));
        if( rc == 0 ) {
          ci_netif_unlock(&priv->thr->netif);
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          efab_thr_release(new_thr);
          return -ECONNREFUSED;
        }

        /* move connecting socket to the new stack */
        rc = efab_file_move_to_alien_stack(priv, &new_thr->netif);
        if( rc != 0 ) {
          /* error - everything is already unlocked */
          efab_thr_release(netif2tcp_helper_resource(alien_ni));
          efab_thr_release(new_thr);
          return -ECONNREFUSED;
        }
        /* now new_thr->netif is locked */
        carg->out_moved = 1;
        carg->out_rc = -ECONNREFUSED;

        /* now connect via CITP_TCP_LOOPBACK_TO_CONNSTACK */
        /* connect_lo_toconn unlocks new_thr->netif */
        carg->out_rc =
            ci_tcp_connect_lo_toconn(
                            &new_thr->netif,
                            SP_TO_WAITABLE(&priv->thr->netif,
                                           priv->sock_id)->moved_to_sock_id,
                            carg->dst_addr, alien_ni, tls_id);
        efab_thr_release(netif2tcp_helper_resource(alien_ni));
        return 0;
      }
      }
    }
    else if( tls_id == OO_SP_INVALID )
      break;
  }

  ci_netif_unlock(&priv->thr->netif);
  return -ENOENT;
}
int efab_file_move_to_alien_stack_rsop(ci_private_t *stack_priv, void *arg)
{
  ci_fixed_descriptor_t sock_fd = *(ci_fixed_descriptor_t *)arg;
  struct file *sock_file = fget(sock_fd);
  ci_private_t *sock_priv;
  tcp_helper_resource_t *old_thr;
  tcp_helper_resource_t *new_thr;
  citp_waitable *w;
  int rc;

  if( sock_file == NULL )
    return -EINVAL;
  if( !FILE_IS_ENDPOINT_SOCK(sock_file) ||
      stack_priv->fd_type != CI_PRIV_TYPE_NETIF ) {
    fput(sock_file);
    return -EINVAL;
  }
  sock_priv = sock_file->private_data;
  ci_assert(sock_priv->fd_type == CI_PRIV_TYPE_TCP_EP ||
            sock_priv->fd_type == CI_PRIV_TYPE_UDP_EP);

  old_thr = sock_priv->thr;
  new_thr = stack_priv->thr;
  ci_assert(old_thr);
  ci_assert(new_thr);

  if( old_thr == new_thr ) {
    fput(sock_file);
    return 0;
  }

  if( tcp_helper_cluster_from_cluster(old_thr) != 0 ) {
    LOG_S(ci_log("%s: move_fd() not permitted on clustered stacks", __func__));
    fput(sock_file);
    return -EINVAL;
  }

  w = SP_TO_WAITABLE(&old_thr->netif, sock_priv->sock_id);
  rc = ci_sock_lock(&old_thr->netif, w);
  if( rc != 0 ) {
    fput(sock_file);
    return rc;
  }

  rc = ci_netif_lock(&old_thr->netif);
  if( rc != 0 ) {
    ci_sock_unlock(&old_thr->netif, w);
    fput(sock_file);
    return rc;
  }

  efab_thr_ref(new_thr);
  rc = efab_file_move_to_alien_stack(sock_priv, &stack_priv->thr->netif);
  fput(sock_file);

  if( rc != 0 )
    efab_thr_release(new_thr);
  else
    ci_netif_unlock(&new_thr->netif);

  return rc;
}
Esempio n. 8
0
static int
efab_tcp_helper_get_info(ci_private_t *unused, void *arg)
{
  ci_netif_info_t *info = arg;
  int index, rc=0;
  tcp_helper_resource_t* thr = NULL;
  ci_netif* ni = NULL;
  int flags = EFAB_THR_TABLE_LOOKUP_CHECK_USER | EFAB_THR_TABLE_LOOKUP_NO_WARN; 

#if CI_CFG_EFAB_EPLOCK_RECORD_CONTENTIONS
  int j;
  eplock_resource_t* eplock_rs;
#endif

  info->ni_exists = 0;
  info->ni_no_perms_exists = 0;
  if( info->ni_orphan ) {
    flags |= EFAB_THR_TABLE_LOOKUP_NO_UL;
    info->ni_orphan = 0;
  }
  rc = efab_thr_table_lookup(NULL, info->ni_index, flags, &thr);
  if( rc == 0 ) {
    info->ni_exists = 1;
    info->ni_orphan = (thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND);
    ni = &thr->netif;
    info->mmap_bytes = thr->mem_mmap_bytes;
    info->k_ref_count = thr->k_ref_count;
    info->rs_ref_count = oo_atomic_read(&thr->ref_count);
    memcpy(info->ni_name, ni->state->name, sizeof(ni->state->name));
  } else if( rc == -EACCES ) {
    info->ni_no_perms_id = info->ni_index;
    if( efab_thr_get_inaccessible_stack_info(info->ni_index, 
                                             &info->ni_no_perms_uid,
                                             &info->ni_no_perms_euid,
                                             &info->ni_no_perms_share_with,
                                             info->ni_no_perms_name) == 0 )
      info->ni_no_perms_exists = 1;
  }

  /* sub-ops that do not need the netif to exist */
  if( info->ni_subop == CI_DBG_NETIF_INFO_GET_NEXT_NETIF ) {
    tcp_helper_resource_t* next_thr;

    info->u.ni_next_ni.index = -1;
    for( index = info->ni_index + 1;
         index < 10000 /* FIXME: magic! */;
         ++index ) {
      rc = efab_thr_table_lookup(NULL, index, flags, &next_thr);
      if( rc == 0 ) {
        if( next_thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND )
          efab_tcp_helper_k_ref_count_dec(next_thr, 1);
        else
          efab_thr_release(next_thr);
        info->u.ni_next_ni.index = index;
        break;
      }
      if( rc == -EACCES ) {
        info->u.ni_next_ni.index = index;
        break;
      }
    }
    rc = 0;
  }
  else if( info->ni_subop == CI_DBG_NETIF_INFO_NOOP ) {
    rc = 0;
  }

  if (!info->ni_exists)
    return 0;

  /* sub-ops that need the netif to exist */
  switch (info->ni_subop)
  {

    case CI_DBG_NETIF_INFO_GET_ENDPOINT_STATE:
      index = info->u.ni_endpoint.index;
      info->u.ni_endpoint.max = thr->netif.ep_tbl_n;
      if ((index < 0) || (index >= (int)thr->netif.ep_tbl_n)) {
        info->u.ni_endpoint.state = CI_TCP_STATE_FREE;
      }
      else {
        citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(ni, index);

        info->u.ni_endpoint.state = wo->waitable.state;

        if( wo->waitable.state == CI_TCP_STATE_UDP ) {
          ci_udp_state* us = &wo->udp;
          info->u.ni_endpoint.udpstate = us->udpflags;
          info->u.ni_endpoint.rx_pkt_ul = us->recv_q.pkts_delivered;
          info->u.ni_endpoint.rx_pkt_kn = us->stats.n_rx_os;
        }
        else if( wo->waitable.state & CI_TCP_STATE_TCP_CONN ) {
          ci_tcp_state* ts = &wo->tcp;
          info->u.ni_endpoint.tx_pkts_max = ts->so_sndbuf_pkts;
          info->u.ni_endpoint.tx_pkts_num = ts->send.num;
        }
        if( wo->waitable.state & CI_TCP_STATE_SOCKET ) {
          ci_sock_cmn* s = &wo->sock;
          info->u.ni_endpoint.protocol = (int) sock_protocol(s);
          info->u.ni_endpoint.laddr = sock_laddr_be32(s);
          info->u.ni_endpoint.lport = (int) sock_lport_be16(s);
          info->u.ni_endpoint.raddr = sock_raddr_be32(s);
          info->u.ni_endpoint.rport = (int) sock_rport_be16(s);
        }
      }
      break;

    case CI_DBG_NETIF_INFO_GET_NEXT_NETIF:
      /* If the current netif is found, we need to succeed */
      break;

    case CI_DBG_NETIF_INFO_NOOP:
      /* Always succeeds, rc already set */
      break;

    default:
      rc = -EINVAL;
      break;
  }
  if( thr ) {
    /* Lookup needs a matching efab_thr_release() in case of ordinary
     * stack but just a ref_count_dec in case of orphan
     */
    if( thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND )
      efab_tcp_helper_k_ref_count_dec(thr, 1);
    else
      efab_thr_release(thr);
  }
  return rc;
}
Esempio n. 9
0
static int
efab_tcp_helper_move_state(ci_private_t* priv, void *arg)
{
  oo_tcp_move_state_t *op = arg;
  tcp_helper_endpoint_t *new_ep;
  tcp_helper_resource_t * new_trs = NULL;
  ci_netif* ni, *new_ni;
  ci_tcp_state * ts, *new_ts;
  tcp_helper_endpoint_t* ep;
  int rc = efab_ioctl_get_ep(priv, op->ep_id, &ep);
  if (rc != 0)
    return rc;

  OO_DEBUG_TCPH(ci_log("%s: (trs=%p (%u), priv=%p, ep_id=%u, new_trs_id=%u, "
                       "new_ep_id=%u", __FUNCTION__, priv->thr, priv->thr->id,
                       priv, OO_SP_FMT(op->ep_id), op->new_trs_id,
                       OO_SP_FMT(op->new_ep_id)));

  do {
    /* check that the existing id is valid */
    ni = &priv->thr->netif;
    ts = SP_TO_TCP(ni, ep->id);

    /* TODO: check this endpoint belongs to the tcp helper resource of priv and not
     * somewhere else */
    
    /* this function does not change fd_type or fd ops, so it is not able
     * to cope with changing the socket type. We think this only makes sense
     * for TCP, so assert we are taking a TCP endpoint.
     */
    ci_assert_equal(ts->s.pkt.ip.ip_protocol, IPPROTO_TCP);
    ci_assert_equal(priv->fd_type, CI_PRIV_TYPE_TCP_EP);

    /* get pointer to resource from handle - increments ref count */
    rc = efab_thr_table_lookup(NULL, op->new_trs_id,
                               EFAB_THR_TABLE_LOOKUP_CHECK_USER, &new_trs);
    if (rc < 0) {
      OO_DEBUG_ERR( ci_log("%s: invalid new resource handle", __FUNCTION__) );
      break;
    }
    ci_assert(new_trs != NULL);
    /* check valid endpoint in new netif */
    new_ni = &new_trs->netif;
    new_ep = ci_netif_get_valid_ep(new_ni, op->new_ep_id);
    new_ts = SP_TO_TCP(new_ni, new_ep->id);

    /* check the two endpoint states look valid */
    if( (ts->s.pkt.ip.ip_protocol != new_ts->s.pkt.ip.ip_protocol) ||
        (ts->s.b.state != CI_TCP_CLOSED) ||
        (ep->oofilter.sf_local_port != NULL) ) {
      efab_thr_release(new_trs);
      rc = -EINVAL;
      OO_DEBUG_ERR(ci_log("%s: invalid endpoint states", __FUNCTION__));
      break;
    }

    /* should be fine to complete */
    ci_assert(new_trs);
    {
      tcp_helper_resource_t *old_trs;
    again:
      old_trs = priv->thr;
      if (ci_cas_uintptr_fail((ci_uintptr_t *)&priv->thr,
                              (ci_uintptr_t)old_trs, (ci_uintptr_t)new_trs))
        goto again;
      efab_thr_release(old_trs);
    }

    /* move file to hold details of new resource, new endpoint */
    ci_assert(OO_SP_EQ(priv->sock_id, op->ep_id));
    priv->sock_id = new_ep->id;

    OO_DEBUG_TCPH(ci_log("%s: set epid %u", __FUNCTION__,
                         OO_SP_FMT(priv->sock_id)));
    
    /* copy across any necessary state */

    ci_assert_equal(new_ep->os_socket, NULL);
    new_ep->os_socket = ep->os_socket;
    ep->os_socket = NULL;

    /* set ORPHAN flag in current as not attached to an FD */
    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
    /* remove ORPHAN flag in new TCP state */
    ci_atomic32_and(&new_ts->s.b.sb_aflags,
		    ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ));

    return 0;

  } while (0);

  return rc;

}
Esempio n. 10
0
static int
onload_alloc_file(tcp_helper_resource_t *thr, oo_sp ep_id, int flags,
                  int fd_type)
{
  struct qstr name = { .name = "" };
#ifdef EFX_HAVE_STRUCT_PATH
  struct path path;
#define my_dentry path.dentry
#else
  struct dentry *dentry;
#define my_dentry dentry
#endif
  struct file *file;
  int fd;
  struct inode *inode;
  ci_private_t *priv;
  struct file_operations *fops;

  fops = oo_fops_by_type(fd_type);
  if( fops == NULL )
    return -EINVAL;
  ci_assert_equal(fops->owner, THIS_MODULE);

  inode = new_inode(onload_mnt->mnt_sb);
  if( inode == NULL )
    return -ENOMEM;
#ifdef EFX_FSTYPE_HAS_MOUNT
  inode->i_ino = get_next_ino();
#endif
  if( fd_type == CI_PRIV_TYPE_NETIF )
    inode->i_mode = S_IRWXUGO;
  if( fd_type == CI_PRIV_TYPE_TCP_EP || fd_type == CI_PRIV_TYPE_UDP_EP )
    inode->i_mode = 
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
        /* in 2.6.18 this flag makes us "socket" and sendmsg crashes;
         * see sock_from_file() */
                    S_IFSOCK |
#endif
                    S_IRWXUGO;
  else
    inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
  inode->i_uid = current_fsuid();
  inode->i_gid = current_fsgid();
  priv = &container_of(inode, struct onload_inode, vfs_inode)->priv;
  priv->thr = thr;
  priv->sock_id = ep_id;
  priv->fd_type = fd_type;

  fd = get_unused_fd();
  if( fd < 0 ) {
    iput(inode);
    return fd;
  }
  /*ci_log("[%d]%s(%d:%d) return %d priv=%p", current->pid, __func__,
         thr->id, ep_id, fd, priv);*/

#ifdef EFX_FSTYPE_HAS_MOUNT
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37)
  path.dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name);
  if( path.dentry != NULL )
    path.dentry->d_op = &onloadfs_dentry_operations;
#else
  path.dentry = d_alloc_pseudo(onload_mnt->mnt_sb, &name);
#endif
#else /* EFX_FSTYPE_HAS_MOUNT */
#ifdef EFX_HAVE_D_DNAME
  my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name);
#else
  {
    char str[32];
    name.len = onloadfs_name(&container_of(inode, struct onload_inode,
                                           vfs_inode)->priv,
                             str, sizeof(str));
    name.name = str;
    name.hash = inode->i_ino;
    my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name);
  }
#endif
#endif /* EFX_FSTYPE_HAS_MOUNT */

  if( my_dentry == NULL ) {
    put_unused_fd(fd);
    iput(inode);
    return -ENOMEM;
  }

#if !defined(EFX_FSTYPE_HAS_MOUNT) || defined(EFX_OLD_MOUNT_PSEUDO)
  my_dentry->d_op = &onloadfs_dentry_operations;
#if !defined(EFX_HAVE_STRUCT_PATH) && defined(EFX_HAVE_D_DNAME)
  my_dentry->d_flags &= ~DCACHE_UNHASHED;
#endif
#endif
  d_instantiate(my_dentry, inode);
#ifndef EFX_HAVE_D_DNAME
  d_rehash(my_dentry);
#endif
  inode->i_fop = fops;

#ifdef EFX_HAVE_STRUCT_PATH
  path.mnt = mntget(onload_mnt);
  file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops);
#else
  file = alloc_file(onload_mnt, dentry, FMODE_READ | FMODE_WRITE, fops);
#endif
  if( file == NULL) {
#ifdef EFX_HAVE_STRUCT_PATH
    path_put(&path);
#else
    dput(dentry);
    iput(inode);
#endif
    put_unused_fd(fd);
    return -ENFILE;
  }

  priv->_filp = file;
  file->f_flags = O_RDWR | (flags & O_NONBLOCK);
  file->f_pos = 0;
  file->private_data = priv;

  if( flags & O_CLOEXEC ) {
    struct files_struct *files = current->files;
    struct fdtable *fdt;
    spin_lock(&files->file_lock);
    fdt = files_fdtable(files);
    rcu_assign_pointer(fdt->fd[fd], file);
    efx_set_close_on_exec(fd, fdt);
    spin_unlock(&files->file_lock);
  } else
    fd_install(fd, file);
  try_module_get(THIS_MODULE);

  ci_assert_equal(file->f_op, fops);
  return fd;
}

void onload_priv_free(ci_private_t *priv)
{
  if( priv->_filp->f_vfsmnt != onload_mnt)
    ci_free(priv);
  /* inode will free the priv automatically */
}


int
oo_create_fd(tcp_helper_endpoint_t* ep, int flags, int fd_type)
{
  int fd;
  tcp_helper_resource_t *trs = ep->thr;
  citp_waitable_obj *wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id);

  efab_thr_ref(trs);
  fd = onload_alloc_file(trs, ep->id, flags, fd_type);
  if( fd < 0 ) {
    efab_thr_release(trs);
    OO_DEBUG_ERR(ci_log("%s: onload_alloc_file failed (%d)", __FUNCTION__, fd));
    return fd;
  }
  ci_atomic32_and(&wo-> waitable.sb_aflags,
                  ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ));

  return fd;
}
Esempio n. 11
0
/* This function must be called with netif lock not held and it always
 * returns with the netif lock not held.
 */
int efab_tcp_helper_reuseport_bind(ci_private_t *priv, void *arg)
{
  oo_tcp_reuseport_bind_t* trb = arg;
  ci_netif* ni = &priv->thr->netif;
  tcp_helper_cluster_t* thc;
  tcp_helper_resource_t* thr = NULL;
  citp_waitable* waitable;
  ci_sock_cmn* sock = SP_TO_SOCK(ni, priv->sock_id);
  struct oof_manager* fm = efab_tcp_driver.filter_manager;
  struct oof_socket* oofilter;
  struct oof_socket dummy_oofilter;
  int protocol = thc_get_sock_protocol(sock);
  char name[CI_CFG_CLUSTER_NAME_LEN + 1];
  int rc, rc1;
  int flags = 0;
  tcp_helper_cluster_t* named_thc,* ported_thc;
  int alloced = 0;

  /* No clustering on sockets bound to alien addresses */
  if( sock->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
    return 0;

  if( NI_OPTS(ni).cluster_ignore == 1 ) {
    LOG_NV(ci_log("%s: Ignored attempt to use clusters due to "
                  "EF_CLUSTER_IGNORE option.", __FUNCTION__));
    return 0;
  }

  if( trb->port_be16 == 0 ) {
    ci_log("%s: Reuseport on port=0 is not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( trb->cluster_size < 2 ) {
    ci_log("%s: Cluster sizes < 2 are not supported", __FUNCTION__);
    return -EINVAL;
  }

  if( sock->s_flags & (CI_SOCK_FLAG_TPROXY | CI_SOCK_FLAG_MAC_FILTER) ) {
    ci_log("%s: Scalable filter sockets cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  oofilter = &ci_trs_ep_get(priv->thr, priv->sock_id)->oofilter;

  if( oofilter->sf_local_port != NULL ) {
    ci_log("%s: Socket that already have filter cannot be clustered",
           __FUNCTION__);
    return -EINVAL;
  }

  if( priv->thr->thc ) {
    /* Reserve proto:port[:ip] until bind (or close)*/
    rc = oof_socket_add(fm, oofilter,
                       OOF_SOCKET_ADD_FLAG_CLUSTERED |
                       OOF_SOCKET_ADD_FLAG_DUMMY,
                       protocol, trb->addr_be32, trb->port_be16, 0, 0,
                       &ported_thc);
    if( rc > 0 )
      rc = 0;
    if( rc == 0 )
      sock->s_flags |= CI_SOCK_FLAG_FILTER;
    return rc;
  }

  mutex_lock(&thc_init_mutex);
  /* We are going to be iterating over clusters, make sure they don't
   * change.
   */
  mutex_lock(&thc_mutex);

  /* Lookup a suitable cluster to use */

  /* We try to add dummy filter to oof to reserve proto:port[:ip] tuple,
   * if there is already a cluster at the tuple we will get reference to it,
   */
  oof_socket_ctor(&dummy_oofilter);
  rc = oof_socket_add(fm, &dummy_oofilter,
                      OOF_SOCKET_ADD_FLAG_CLUSTERED |
                      OOF_SOCKET_ADD_FLAG_DUMMY |
                      OOF_SOCKET_ADD_FLAG_NO_STACK,
                      protocol, trb->addr_be32, trb->port_be16, 0, 0,
                      &ported_thc);
  if( rc < 0 ) /* non-clustered socket on the tuple */
    goto alloc_fail0;

  if( ! gen_cluster_name(trb->cluster_name, name) ) {
    /* user requested a cluster by name.  But we need to make sure
     * that the oof_local_port that the user is interested in is not
     * being used by another cluster.  We search for cluster by name
     * and use results of prior protp:port[:ip] search oof_local_port
     * to then do some sanity checking.
     */
    rc1 = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &named_thc);
    if( rc1 < 0 ) {
      rc = rc1;
      goto alloc_fail;
    }

    if( rc1 == 0 ) {
      if( rc == 1 ) {
        /* search by oof_local_port found a cluster which search by
         * name didn't find. */
        LOG_E(ci_log("Error: Cluster with requested name %s already "
                     "bound to %s", name, ported_thc->thc_name));
        rc = -EEXIST;
        goto alloc_fail;
      }
      else {
        /* Neither searches found a cluster.  So allocate one below.
         */
      }
    }
    else {
      if( rc == 1 ) {
        /* Both searches found clusters.  Fine if they are the same or
         * else error. */
        if( named_thc != ported_thc ) {
          LOG_E(ci_log("Error: Cluster %s does not handle socket %s:%d.  "
                       "Cluster %s does", name, FMT_PROTOCOL(protocol),
                       trb->port_be16, named_thc->thc_name));
          rc = -EEXIST;
          goto alloc_fail;
        }
      }
      /* Search by name found a cluster no conflict with search by tuple
       * (the ported cluster is either none or the same as named)*/
      thc = named_thc;
      goto cont;
    }
  }
  else {
    /* No cluster name requested.  We have already looked for a cluster handling
     * the tuple.  If none found, then try to use an existing
     * cluster this process created.  If none found, then allocate one.
     */
    /* If rc == 0, then no cluster found - try to allocate one.
     * If rc == 1, we found cluster - make sure that euids match and continue. */
    if( rc == 1 ) {
      thc = ported_thc;
      if( thc->thc_euid != ci_geteuid() ) {
        rc = -EADDRINUSE;
        goto alloc_fail;
      }
      goto cont;
    }
    rc = thc_search_by_name(name, protocol, trb->port_be16, ci_geteuid(),
                            &thc);
    if( rc < 0 )
      goto alloc_fail;
    if( rc == 1 )
      goto cont;
  }
  /* When an interface is in tproxy mode, all clustered listening socket
   * are assumed to be part of tproxy passive side.  This requires
   * rss context to use altered rss hashing based solely on src ip:port.
   */
  flags = tcp_helper_cluster_thc_flags(&NI_OPTS(ni));

  if( (rc = thc_alloc(name, protocol, trb->port_be16, ci_geteuid(),
                      trb->cluster_size, flags, &thc)) != 0 )
      goto alloc_fail;

  alloced = 1;

 cont:
  tcp_helper_cluster_ref(thc);

  /* At this point we have our cluster with one additional reference */

  /* Find a suitable stack within the cluster to use */
  rc = thc_get_thr(thc, &dummy_oofilter, &thr);
  if( rc != 0 )
    rc = thc_alloc_thr(thc, trb->cluster_restart_opt,
                       &ni->opts, ni->flags, &thr);

  /* If get or alloc succeeded thr holds reference to the cluster,
   * so the cluster cannot go away.  We'll drop our reference and also
   * will not be accessing state within the cluster anymore so we can
   * drop the lock. */
  mutex_unlock(&thc_mutex);

  if( alloced && rc == 0 && (flags & THC_FLAG_TPROXY) != 0 ) {
    /* Tproxy filter is allocated as late as here,
     * the reason is that this needs to be preceded by stack allocation
     * (firmware needs initialized vi) */
    rc = thc_install_tproxy(thc, NI_OPTS(ni).scalable_filter_ifindex);
    if( rc != 0 )
      efab_thr_release(thr);
  }

  tcp_helper_cluster_release(thc, NULL);

  if( rc != 0 ) {
    oof_socket_del(fm, &dummy_oofilter);
    goto alloc_fail_unlocked;
  }

  /* We have thr and we hold single reference to it. */

  /* Move the socket into the new stack */
  if( (rc = ci_netif_lock(ni)) != 0 )
    goto drop_and_done;
  waitable = SP_TO_WAITABLE(ni, priv->sock_id);
  rc = ci_sock_lock(ni, waitable);
  if( rc != 0 ) {
    ci_netif_unlock(ni);
    goto drop_and_done;
  }
  /* thr referencing scheme comes from efab_file_move_to_alien_stack_rsop */
  efab_thr_ref(thr);
  rc = efab_file_move_to_alien_stack(priv, &thr->netif, 0);
  if( rc != 0 )
    efab_thr_release(thr);
  else {
    /* beside us, socket now holds its own reference to thr */
    oofilter = &ci_trs_ep_get(thr, sock->b.moved_to_sock_id)->oofilter;
    oof_socket_replace(fm, &dummy_oofilter, oofilter);
    SP_TO_SOCK(&thr->netif, sock->b.moved_to_sock_id)->s_flags |= CI_SOCK_FLAG_FILTER;
    ci_netif_unlock(&thr->netif);
  }

 drop_and_done:
  if( rc != 0 )
    oof_socket_del(fm, &dummy_oofilter);
  /* Drop the reference we got from thc_get_thr or thc_alloc_thr().
   * If things went wrong both stack and cluster might disappear. */
  efab_thr_release(thr);
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;

 alloc_fail:
  oof_socket_del(fm, &dummy_oofilter);
 alloc_fail0:
  mutex_unlock(&thc_mutex);
 alloc_fail_unlocked:
  oof_socket_dtor(&dummy_oofilter);
  mutex_unlock(&thc_init_mutex);
  return rc;
}