int efab_eplock_unlock_and_wake(ci_netif *ni, int in_dl_context) { int l = ni->state->lock.lock; tcp_helper_resource_t *rs = netif2tcp_helper_resource(ni); /* Allocate more packets if necessary. */ if( !in_dl_context && OO_STACK_NEEDS_MORE_PACKETS(ni) ) efab_tcp_helper_more_bufs(rs); /* We use in_dl_context from now on, and we should remove * CI_NETIF_FLAG_IN_DL_CONTEXT under the stack lock. */ if( in_dl_context ) ni->flags &= ~CI_NETIF_FLAG_IN_DL_CONTEXT; again: #ifndef NDEBUG if( (~l & CI_EPLOCK_LOCKED) || (l & CI_EPLOCK_UNLOCKED) ) { OO_DEBUG_ERR(ci_log("efab_eplock_unlock_and_wake: corrupt" " (value is %x)", (unsigned) l)); OO_DEBUG_ERR(dump_stack()); return -EIO; } #endif if( l & CI_EPLOCK_CALLBACK_FLAGS ) { /* Invoke the callback while we've still got the lock. The callback ** is responsible for either ** - dropping the lock using ef_eplock_try_unlock(), and returning ** the lock value prior to unlocking, OR ** - keeping the eplock locked and returning CI_EPLOCK_LOCKED */ l = efab_tcp_helper_netif_lock_callback(&ni->eplock_helper, l, in_dl_context); } else if( ci_cas32_fail(&ni->state->lock.lock, l, CI_EPLOCK_UNLOCKED) ) { /* Someone (probably) set a flag when we tried to unlock, so we'd ** better handle the flag(s). */ l = ni->state->lock.lock; goto again; } if( l & CI_EPLOCK_FL_NEED_WAKE ) { CITP_STATS_NETIF_INC(ni, lock_wakes); wake_up_interruptible(&ni->eplock_helper.wq); } return 0; }
static int efab_tcp_helper_stack_attach(ci_private_t* priv, void *arg) { oo_stack_attach_t* op = arg; tcp_helper_resource_t* trs = priv->thr; int rc; if( trs == NULL ) { LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__)); return -EINVAL; } OO_DEBUG_TCPH(ci_log("%s: [%d]", __FUNCTION__, NI_ID(&trs->netif))); rc = oo_create_stack_fd(trs); if( rc < 0 ) { OO_DEBUG_ERR(ci_log("%s: oo_create_stack_fd failed (%d)", __FUNCTION__, rc)); return rc; } op->fd = rc; /* Re-read the OS socket buffer size settings. This ensures we'll use * up-to-date values for this new socket. */ efab_get_os_settings(&NI_OPTS_TRS(trs)); op->out_nic_set = trs->netif.nic_set; op->out_map_size = trs->mem_mmap_bytes; return 0; }
int oo_create_stack_fd(tcp_helper_resource_t *thr) { int fd; efab_thr_ref(thr); fd = onload_alloc_file(thr, OO_SP_NULL, O_CLOEXEC, CI_PRIV_TYPE_NETIF); if( fd < 0 ) { efab_thr_release(thr); OO_DEBUG_ERR(ci_log("%s: onload_alloc_file failed (%d)", __FUNCTION__, fd)); return fd; } return fd; }
static int efab_tcp_helper_move_state(ci_private_t* priv, void *arg) { oo_tcp_move_state_t *op = arg; tcp_helper_endpoint_t *new_ep; tcp_helper_resource_t * new_trs = NULL; ci_netif* ni, *new_ni; ci_tcp_state * ts, *new_ts; tcp_helper_endpoint_t* ep; int rc = efab_ioctl_get_ep(priv, op->ep_id, &ep); if (rc != 0) return rc; OO_DEBUG_TCPH(ci_log("%s: (trs=%p (%u), priv=%p, ep_id=%u, new_trs_id=%u, " "new_ep_id=%u", __FUNCTION__, priv->thr, priv->thr->id, priv, OO_SP_FMT(op->ep_id), op->new_trs_id, OO_SP_FMT(op->new_ep_id))); do { /* check that the existing id is valid */ ni = &priv->thr->netif; ts = SP_TO_TCP(ni, ep->id); /* TODO: check this endpoint belongs to the tcp helper resource of priv and not * somewhere else */ /* this function does not change fd_type or fd ops, so it is not able * to cope with changing the socket type. We think this only makes sense * for TCP, so assert we are taking a TCP endpoint. */ ci_assert_equal(ts->s.pkt.ip.ip_protocol, IPPROTO_TCP); ci_assert_equal(priv->fd_type, CI_PRIV_TYPE_TCP_EP); /* get pointer to resource from handle - increments ref count */ rc = efab_thr_table_lookup(NULL, op->new_trs_id, EFAB_THR_TABLE_LOOKUP_CHECK_USER, &new_trs); if (rc < 0) { OO_DEBUG_ERR( ci_log("%s: invalid new resource handle", __FUNCTION__) ); break; } ci_assert(new_trs != NULL); /* check valid endpoint in new netif */ new_ni = &new_trs->netif; new_ep = ci_netif_get_valid_ep(new_ni, op->new_ep_id); new_ts = SP_TO_TCP(new_ni, new_ep->id); /* check the two endpoint states look valid */ if( (ts->s.pkt.ip.ip_protocol != new_ts->s.pkt.ip.ip_protocol) || (ts->s.b.state != CI_TCP_CLOSED) || (ep->oofilter.sf_local_port != NULL) ) { efab_thr_release(new_trs); rc = -EINVAL; OO_DEBUG_ERR(ci_log("%s: invalid endpoint states", __FUNCTION__)); break; } /* should be fine to complete */ ci_assert(new_trs); { tcp_helper_resource_t *old_trs; again: old_trs = priv->thr; if (ci_cas_uintptr_fail((ci_uintptr_t *)&priv->thr, (ci_uintptr_t)old_trs, (ci_uintptr_t)new_trs)) goto again; efab_thr_release(old_trs); } /* move file to hold details of new resource, new endpoint */ ci_assert(OO_SP_EQ(priv->sock_id, op->ep_id)); priv->sock_id = new_ep->id; OO_DEBUG_TCPH(ci_log("%s: set epid %u", __FUNCTION__, OO_SP_FMT(priv->sock_id))); /* copy across any necessary state */ ci_assert_equal(new_ep->os_socket, NULL); new_ep->os_socket = ep->os_socket; ep->os_socket = NULL; /* set ORPHAN flag in current as not attached to an FD */ ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT); /* remove ORPHAN flag in new TCP state */ ci_atomic32_and(&new_ts->s.b.sb_aflags, ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ)); return 0; } while (0); return rc; }
static int onload_alloc_file(tcp_helper_resource_t *thr, oo_sp ep_id, int flags, int fd_type) { struct qstr name = { .name = "" }; #ifdef EFX_HAVE_STRUCT_PATH struct path path; #define my_dentry path.dentry #else struct dentry *dentry; #define my_dentry dentry #endif struct file *file; int fd; struct inode *inode; ci_private_t *priv; struct file_operations *fops; fops = oo_fops_by_type(fd_type); if( fops == NULL ) return -EINVAL; ci_assert_equal(fops->owner, THIS_MODULE); inode = new_inode(onload_mnt->mnt_sb); if( inode == NULL ) return -ENOMEM; #ifdef EFX_FSTYPE_HAS_MOUNT inode->i_ino = get_next_ino(); #endif if( fd_type == CI_PRIV_TYPE_NETIF ) inode->i_mode = S_IRWXUGO; if( fd_type == CI_PRIV_TYPE_TCP_EP || fd_type == CI_PRIV_TYPE_UDP_EP ) inode->i_mode = #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) /* in 2.6.18 this flag makes us "socket" and sendmsg crashes; * see sock_from_file() */ S_IFSOCK | #endif S_IRWXUGO; else inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); priv = &container_of(inode, struct onload_inode, vfs_inode)->priv; priv->thr = thr; priv->sock_id = ep_id; priv->fd_type = fd_type; fd = get_unused_fd(); if( fd < 0 ) { iput(inode); return fd; } /*ci_log("[%d]%s(%d:%d) return %d priv=%p", current->pid, __func__, thr->id, ep_id, fd, priv);*/ #ifdef EFX_FSTYPE_HAS_MOUNT #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) path.dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name); if( path.dentry != NULL ) path.dentry->d_op = &onloadfs_dentry_operations; #else path.dentry = d_alloc_pseudo(onload_mnt->mnt_sb, &name); #endif #else /* EFX_FSTYPE_HAS_MOUNT */ #ifdef EFX_HAVE_D_DNAME my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name); #else { char str[32]; name.len = onloadfs_name(&container_of(inode, struct onload_inode, vfs_inode)->priv, str, sizeof(str)); name.name = str; name.hash = inode->i_ino; my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name); } #endif #endif /* EFX_FSTYPE_HAS_MOUNT */ if( my_dentry == NULL ) { put_unused_fd(fd); iput(inode); return -ENOMEM; } #if !defined(EFX_FSTYPE_HAS_MOUNT) || defined(EFX_OLD_MOUNT_PSEUDO) my_dentry->d_op = &onloadfs_dentry_operations; #if !defined(EFX_HAVE_STRUCT_PATH) && defined(EFX_HAVE_D_DNAME) my_dentry->d_flags &= ~DCACHE_UNHASHED; #endif #endif d_instantiate(my_dentry, inode); #ifndef EFX_HAVE_D_DNAME d_rehash(my_dentry); #endif inode->i_fop = fops; #ifdef EFX_HAVE_STRUCT_PATH path.mnt = mntget(onload_mnt); file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops); #else file = alloc_file(onload_mnt, dentry, FMODE_READ | FMODE_WRITE, fops); #endif if( file == NULL) { #ifdef EFX_HAVE_STRUCT_PATH path_put(&path); #else dput(dentry); iput(inode); #endif put_unused_fd(fd); return -ENFILE; } priv->_filp = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = priv; if( flags & O_CLOEXEC ) { struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); rcu_assign_pointer(fdt->fd[fd], file); efx_set_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); } else fd_install(fd, file); try_module_get(THIS_MODULE); ci_assert_equal(file->f_op, fops); return fd; } void onload_priv_free(ci_private_t *priv) { if( priv->_filp->f_vfsmnt != onload_mnt) ci_free(priv); /* inode will free the priv automatically */ } int oo_create_fd(tcp_helper_endpoint_t* ep, int flags, int fd_type) { int fd; tcp_helper_resource_t *trs = ep->thr; citp_waitable_obj *wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id); efab_thr_ref(trs); fd = onload_alloc_file(trs, ep->id, flags, fd_type); if( fd < 0 ) { efab_thr_release(trs); OO_DEBUG_ERR(ci_log("%s: onload_alloc_file failed (%d)", __FUNCTION__, fd)); return fd; } ci_atomic32_and(&wo-> waitable.sb_aflags, ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ)); return fd; }