citp_waitable_obj* citp_waitable_obj_alloc(ci_netif* netif) { citp_waitable_obj* wo; ci_assert(netif); ci_assert(ci_netif_is_locked(netif)); if( netif->state->deferred_free_eps_head != CI_ILL_END ) { ci_uint32 link; do link = netif->state->deferred_free_eps_head; while( ci_cas32_fail(&netif->state->deferred_free_eps_head, link, CI_ILL_END)); while( link != CI_ILL_END ) { citp_waitable* w = ID_TO_WAITABLE(netif, link); link = w->next_id; CI_DEBUG(w->next_id = CI_ILL_END); ci_assert_equal(w->state, CI_TCP_STATE_FREE); ci_assert(OO_SP_IS_NULL(w->wt_next)); w->wt_next = netif->state->free_eps_head; netif->state->free_eps_head = W_SP(w); } } if( OO_SP_IS_NULL(netif->state->free_eps_head) ) { ci_tcp_helper_more_socks(netif); if( OO_SP_IS_NULL(netif->state->free_eps_head) ) ci_netif_timeout_reap(netif); } if( OO_SP_IS_NULL(netif->state->free_eps_head) ) return NULL; LOG_TV(ci_log("%s: allocating %d", __FUNCTION__, OO_SP_FMT(netif->state->free_eps_head))); ci_assert(IS_VALID_SOCK_P(netif, netif->state->free_eps_head)); #if !defined(__KERNEL__) && !defined (CI_HAVE_OS_NOPAGE) ci_netif_mmap_shmbuf(netif, (netif->state->free_eps_head >> EP_BUF_BLOCKSHIFT) + 1); #endif wo = SP_TO_WAITABLE_OBJ(netif, netif->state->free_eps_head); ci_assert(OO_SP_EQ(W_SP(&wo->waitable), netif->state->free_eps_head)); ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE); ci_assert_equal(wo->waitable.sb_aflags, (CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY)); ci_assert_equal(wo->waitable.lock.wl_val, 0); netif->state->free_eps_head = wo->waitable.wt_next; CI_DEBUG(wo->waitable.wt_next = OO_SP_NULL); ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE); return wo; }
void citp_waitable_all_fds_gone(ci_netif* ni, oo_sp w_id) { citp_waitable_obj* wo; ci_assert(ni); ci_assert(IS_VALID_SOCK_P(ni, w_id)); ci_assert(ci_netif_is_locked(ni)); wo = SP_TO_WAITABLE_OBJ(ni, w_id); ci_assert(wo->waitable.state != CI_TCP_STATE_FREE); LOG_NC(ci_log("%s: %d:%d %s", __FUNCTION__, NI_ID(ni), OO_SP_FMT(w_id), ci_tcp_state_str(wo->waitable.state))); /* listening socket is closed in blocking conext, see * efab_tcp_helper_close_endpoint(). * CI_SB_AFLAG_ORPHAN is set earlier in this case.. */ CI_DEBUG(if( (wo->waitable.sb_aflags & CI_SB_AFLAG_ORPHAN) && wo->waitable.state != CI_TCP_LISTEN ) ci_log("%s: %d:%d already orphan", __FUNCTION__, NI_ID(ni), OO_SP_FMT(w_id))); /* It's essential that an ORPHANed socket not be on the deferred * socket list, because the same link field is used as timewait * list, free list etc. We must purge the deferred list before * setting the orphan flag. * * NB. This socket cannot now be added to the deferred list, because * no-one has a reference to it. */ ci_netif_purge_deferred_socket_list(ni); ci_bit_set(&wo->waitable.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT); /* We also need to remove the socket from the post-poll list. It may * have been left there because the stack believes a wakeup is needed. */ ci_ni_dllist_remove_safe(ni, &wo->waitable.post_poll_link); ci_ni_dllist_remove_safe(ni, &wo->waitable.ready_link); wo->waitable.ready_list_id = 0; citp_waitable_cleanup(ni, wo, 1); }
static int onloadfs_name(ci_private_t *priv, char *buffer, int buflen) { int len; if( priv->fd_type == CI_PRIV_TYPE_NETIF) len = snprintf(buffer, buflen, "[stack:%d]", priv->thr->id); #ifdef EFX_HAVE_D_DNAME /* without d_dname, this is called before listen(), so * we have no chance to print tcpl:N:N. */ else if( priv->fd_type == CI_PRIV_TYPE_TCP_EP && SP_TO_WAITABLE_OBJ(&priv->thr->netif, priv->sock_id)->waitable.state == CI_TCP_LISTEN) len = snprintf(buffer, buflen, "[tcpl:%d:%d]", priv->thr->id, priv->sock_id); #endif else len = snprintf(buffer, buflen, "[%s:%d:%d]", priv_type_to_str(priv->fd_type), priv->thr->id, priv->sock_id); buffer[buflen-1] = '\0'; return len + 1; }
static int efab_tcp_helper_sock_attach(ci_private_t* priv, void *arg) { oo_sock_attach_t* op = arg; tcp_helper_resource_t* trs = priv->thr; tcp_helper_endpoint_t* ep = NULL; citp_waitable_obj *wo; int rc, flags, type = op->type; /* SOCK_CLOEXEC and SOCK_NONBLOCK exist from 2.6.27 both */ #ifdef SOCK_TYPE_MASK BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); flags = type & (SOCK_CLOEXEC | SOCK_NONBLOCK); type &= SOCK_TYPE_MASK; # ifdef SOCK_NONBLOCK if( SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK) ) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; # endif #else flags = 0; #endif OO_DEBUG_TCPH(ci_log("%s: ep_id=%d", __FUNCTION__, op->ep_id)); if( trs == NULL ) { LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__)); return -EINVAL; } /* Validate and find the endpoint. */ if( ! IS_VALID_SOCK_P(&trs->netif, op->ep_id) ) return -EINVAL; ep = ci_trs_get_valid_ep(trs, op->ep_id); if( tcp_helper_endpoint_set_aflags(ep, OO_THR_EP_AFLAG_ATTACHED) & OO_THR_EP_AFLAG_ATTACHED ) return -EBUSY; wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id); /* create OS socket */ if( op->domain != AF_UNSPEC ) { struct socket *sock; struct file *os_file; rc = sock_create(op->domain, type, 0, &sock); if( rc < 0 ) { LOG_E(ci_log("%s: ERROR: sock_create(%d, %d, 0) failed (%d)", __FUNCTION__, op->domain, type, rc)); tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED); return rc; } os_file = sock_alloc_file(sock, flags, NULL); if( IS_ERR(os_file) ) { LOG_E(ci_log("%s: ERROR: sock_alloc_file failed (%ld)", __FUNCTION__, PTR_ERR(os_file))); sock_release(sock); tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED); return PTR_ERR(os_file); } rc = efab_attach_os_socket(ep, os_file); if( rc < 0 ) { LOG_E(ci_log("%s: ERROR: efab_attach_os_socket failed (%d)", __FUNCTION__, rc)); /* NB. efab_attach_os_socket() consumes [os_file] even on error. */ tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED); return rc; } wo->sock.domain = op->domain; wo->sock.ino = ep->os_socket->file->f_dentry->d_inode->i_ino; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) wo->sock.uid = ep->os_socket->file->f_dentry->d_inode->i_uid; #else wo->sock.uid = __kuid_val(ep->os_socket->file->f_dentry->d_inode->i_uid); #endif } /* Create a new file descriptor to attach the stack to. */ ci_assert((wo->waitable.state & CI_TCP_STATE_TCP) || wo->waitable.state == CI_TCP_STATE_UDP); rc = oo_create_fd(ep, flags, (wo->waitable.state & CI_TCP_STATE_TCP) ? CI_PRIV_TYPE_TCP_EP : CI_PRIV_TYPE_UDP_EP); if( rc < 0 ) { ci_irqlock_state_t lock_flags; struct oo_file_ref* os_socket; ci_irqlock_lock(&ep->thr->lock, &lock_flags); os_socket = ep->os_socket; ep->os_socket = NULL; ci_irqlock_unlock(&ep->thr->lock, &lock_flags); if( os_socket != NULL ) oo_file_ref_drop(os_socket); tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED); return rc; } op->fd = rc; #ifdef SOCK_NONBLOCK if( op->type & SOCK_NONBLOCK ) ci_bit_mask_set(&wo->waitable.sb_aflags, CI_SB_AFLAG_O_NONBLOCK); #endif /* Re-read the OS socket buffer size settings. This ensures we'll use * up-to-date values for this new socket. */ efab_get_os_settings(&NI_OPTS_TRS(trs)); return 0; }
static int onload_alloc_file(tcp_helper_resource_t *thr, oo_sp ep_id, int flags, int fd_type) { struct qstr name = { .name = "" }; #ifdef EFX_HAVE_STRUCT_PATH struct path path; #define my_dentry path.dentry #else struct dentry *dentry; #define my_dentry dentry #endif struct file *file; int fd; struct inode *inode; ci_private_t *priv; struct file_operations *fops; fops = oo_fops_by_type(fd_type); if( fops == NULL ) return -EINVAL; ci_assert_equal(fops->owner, THIS_MODULE); inode = new_inode(onload_mnt->mnt_sb); if( inode == NULL ) return -ENOMEM; #ifdef EFX_FSTYPE_HAS_MOUNT inode->i_ino = get_next_ino(); #endif if( fd_type == CI_PRIV_TYPE_NETIF ) inode->i_mode = S_IRWXUGO; if( fd_type == CI_PRIV_TYPE_TCP_EP || fd_type == CI_PRIV_TYPE_UDP_EP ) inode->i_mode = #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21) /* in 2.6.18 this flag makes us "socket" and sendmsg crashes; * see sock_from_file() */ S_IFSOCK | #endif S_IRWXUGO; else inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); priv = &container_of(inode, struct onload_inode, vfs_inode)->priv; priv->thr = thr; priv->sock_id = ep_id; priv->fd_type = fd_type; fd = get_unused_fd(); if( fd < 0 ) { iput(inode); return fd; } /*ci_log("[%d]%s(%d:%d) return %d priv=%p", current->pid, __func__, thr->id, ep_id, fd, priv);*/ #ifdef EFX_FSTYPE_HAS_MOUNT #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) path.dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name); if( path.dentry != NULL ) path.dentry->d_op = &onloadfs_dentry_operations; #else path.dentry = d_alloc_pseudo(onload_mnt->mnt_sb, &name); #endif #else /* EFX_FSTYPE_HAS_MOUNT */ #ifdef EFX_HAVE_D_DNAME my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name); #else { char str[32]; name.len = onloadfs_name(&container_of(inode, struct onload_inode, vfs_inode)->priv, str, sizeof(str)); name.name = str; name.hash = inode->i_ino; my_dentry = d_alloc(onload_mnt->mnt_sb->s_root, &name); } #endif #endif /* EFX_FSTYPE_HAS_MOUNT */ if( my_dentry == NULL ) { put_unused_fd(fd); iput(inode); return -ENOMEM; } #if !defined(EFX_FSTYPE_HAS_MOUNT) || defined(EFX_OLD_MOUNT_PSEUDO) my_dentry->d_op = &onloadfs_dentry_operations; #if !defined(EFX_HAVE_STRUCT_PATH) && defined(EFX_HAVE_D_DNAME) my_dentry->d_flags &= ~DCACHE_UNHASHED; #endif #endif d_instantiate(my_dentry, inode); #ifndef EFX_HAVE_D_DNAME d_rehash(my_dentry); #endif inode->i_fop = fops; #ifdef EFX_HAVE_STRUCT_PATH path.mnt = mntget(onload_mnt); file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops); #else file = alloc_file(onload_mnt, dentry, FMODE_READ | FMODE_WRITE, fops); #endif if( file == NULL) { #ifdef EFX_HAVE_STRUCT_PATH path_put(&path); #else dput(dentry); iput(inode); #endif put_unused_fd(fd); return -ENFILE; } priv->_filp = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = priv; if( flags & O_CLOEXEC ) { struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); rcu_assign_pointer(fdt->fd[fd], file); efx_set_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); } else fd_install(fd, file); try_module_get(THIS_MODULE); ci_assert_equal(file->f_op, fops); return fd; } void onload_priv_free(ci_private_t *priv) { if( priv->_filp->f_vfsmnt != onload_mnt) ci_free(priv); /* inode will free the priv automatically */ } int oo_create_fd(tcp_helper_endpoint_t* ep, int flags, int fd_type) { int fd; tcp_helper_resource_t *trs = ep->thr; citp_waitable_obj *wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id); efab_thr_ref(trs); fd = onload_alloc_file(trs, ep->id, flags, fd_type); if( fd < 0 ) { efab_thr_release(trs); OO_DEBUG_ERR(ci_log("%s: onload_alloc_file failed (%d)", __FUNCTION__, fd)); return fd; } ci_atomic32_and(&wo-> waitable.sb_aflags, ~(CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_TCP_IN_ACCEPTQ)); return fd; }