void efab_signal_process_fini(struct mm_signal_data *tramp_data) { int sig; OO_DEBUG_SIGNAL(ci_log("%s(%p) pid %d: current->flags=%x, " "tramp_data->user_data=%p", __func__, tramp_data, current->pid, (int)current->flags, CI_USER_PTR_GET(tramp_data->user_data))); /* Check if we should really do anything */ if( current->flags & PF_EXITING ) return; /* the process is exiting */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30) if( current->in_execve ) return; /* in execve() */ #endif if( CI_USER_PTR_GET(tramp_data->user_data) == NULL ) return; /* nothing was inited */ OO_DEBUG_SIGNAL(ci_log("%s(%p) pid %d: uninstall interception", __func__, tramp_data, current->pid)); for( sig = 1; sig <= _NSIG; sig++ ) { if( sig_kernel_only(sig) ) continue; if( efab_signal_report_sigaction(sig, NULL, tramp_data) != 0 ) { ci_log("%s: ERROR: pid %d failed to back off signal %d handler", __func__, current->pid, sig); continue; } } }
/* Apply all postponed epoll_ctl and ignore the results (just print * a message), since there is nothing to do now. */ static int oo_epoll2_apply_ctl(struct oo_epoll_private *priv, struct oo_epoll2_action_arg *op) { struct oo_epoll_item postponed_k[CI_CFG_EPOLL_MAX_POSTPONED]; struct oo_epoll_item *postponed_u = CI_USER_PTR_GET(op->epoll_ctl); int i; int rc = 0; if( op->epoll_ctl_n > CI_CFG_EPOLL_MAX_POSTPONED ) return -EFAULT; if( copy_from_user(postponed_k, postponed_u, sizeof(struct oo_epoll_item) * op->epoll_ctl_n) ) return -EFAULT; for( i = 0; i < op->epoll_ctl_n; i++ ) { if( postponed_k[i].fd != -1 ) { rc = oo_epoll2_ctl(priv, op->kepfd, postponed_k[i].op, postponed_k[i].fd, &postponed_u[i].event); if( rc && (i != op->epoll_ctl_n - 1 || op->maxevents != 0) ) { ci_log("postponed epoll_ctl(fd=%d) returned error %d; ignoring", (int)postponed_k[i].fd, rc); ci_log("consider disabling EF_EPOLL_CTL_FAST to get " "the correct behaviour"); } } } /* Return the last rc */ return rc; }
static int oo_ioctl_debug_op(ci_private_t *priv, void *arg) { ci_debug_onload_op_t *op = arg; int rc; if( !ci_is_sysadmin() ) return -EPERM; switch( op->what ) { case __CI_DEBUG_OP_DUMP_INODE__: rc = efab_linux_dump_inode(op->u.fd); break; case __CI_DEBUG_OP_TRAMPOLINE__: rc = efab_linux_trampoline_debug(&op->u.tramp_debug); break; case __CI_DEBUG_OP_FDS_DUMP__: rc = efab_fds_dump(op->u.fds_dump_pid); break; case __CI_DEBUG_OP_DUMP_STACK__: rc = tcp_helper_dump_stack(op->u.dump_stack.stack_id, op->u.dump_stack.orphan_only, CI_USER_PTR_GET(op->u.dump_stack.user_buf), op->u.dump_stack.user_buf_len); break; case __CI_DEBUG_OP_KILL_STACK__: rc = tcp_helper_kill_stack_by_id(op->u.stack_id); break; default: rc = -EINVAL; break; } return rc; }
static int efab_cluster_dump(ci_private_t *priv, void *arg) { oo_cluster_dump_t *op = arg; return tcp_helper_cluster_dump(priv->thr, CI_USER_PTR_GET(op->buf), op->buf_len); }
/* Trampoline into userland failure - this function is never called, and * would need to know whether userspace was 64 or 32 bit in order to * work out how to buld the trampoline, so it does nothing for now - rrw * 2012-12-14 */ void efab_linux_trampoline_ul_fail(void) { struct pt_regs *regs = 0; /* don't know how to do this on this platform */ struct mm_hash *p; ci_uintptr_t trampoline_ul_fail = 0; ci_assert(regs); if (current->mm) { read_lock (&oo_mm_tbl_lock); p = oo_mm_tbl_lookup(current->mm); read_unlock (&oo_mm_tbl_lock); if (p) { trampoline_ul_fail = (ci_uintptr_t) CI_USER_PTR_GET (p->trampoline_ul_fail); } else { ci_log("%s: no entry for pid %u", __FUNCTION__, current->tgid); return; } } else { ci_log("%s: pid %u is dying - no mm", __FUNCTION__, current->tgid); return; } ci_log("%s: syscall backtrace (pid %d)", __FUNCTION__, current->tgid); ci_backtrace(); ci_log("%s: provoking user-level fail on syscall exit for pid %d", __FUNCTION__, current->tgid); ci_log("(not really, don't know how on this platform)"); return; }
static int efab_ep_filter_dump(ci_private_t *priv, void *arg) { oo_tcp_filter_dump_t *op = arg; return tcp_helper_endpoint_filter_dump(priv->thr, op->sock_id, CI_USER_PTR_GET(op->buf), op->buf_len); }
int efab_signal_mm_init(const ci_tramp_reg_args_t *args, struct mm_hash *p) { int i; if( args->max_signum < _NSIG ) return -E2BIG; p->signal_data.handler_postpone = CI_USER_PTR_GET(args->signal_handler_postpone); p->signal_data.sarestorer = CI_USER_PTR_GET(args->signal_sarestorer); for( i = 0; i <= OO_SIGHANGLER_DFL_MAX; i++ ) p->signal_data.handlers[i] = CI_USER_PTR_GET(args->signal_handlers[i]); p->signal_data.user_data = args->signal_data; p->signal_data.sa_onstack_intercept = args->sa_onstack_intercept; return 0; }
static int cicpos_mac_set_rsop(ci_private_t *priv, void *arg) { cp_mac_set_t *op = arg; if (priv->thr == NULL) return -EINVAL; return cicpos_mac_set(CICP_HANDLE(&priv->thr->netif), &op->rowinfo, op->ifindex, op->ip_be32, (const ci_mac_addr_t *)op->mac, CI_USER_PTR_GET(op->os_sync_ptr)); }
void efab_signal_process_init(struct mm_signal_data *tramp_data) { int sig; int rc; OO_DEBUG_SIGNAL(ci_log("%s(%p) pid %d", __func__, tramp_data, current->pid)); /* At start-of-day, we intercept all already-installed handlers * and deadly SIG_DFL */ for( sig = 1; sig <= _NSIG; sig++ ) { struct k_sigaction *k; tramp_data->signal_data[sig - 1].type = OO_SIGHANGLER_USER | OO_SIGHANGLER_IGN_BIT; CI_USER_PTR_SET(tramp_data->signal_data[sig - 1].handler, NULL); /* Never, never intercept SIGKILL. You'll get deadlock since exit_group * sends SIGKILL to all other threads. */ if( sig_kernel_only(sig) ) continue; /* If this is our handler, do nothing. This is second init from the * same process. It happens in fork hooks, when second netif is * created, etc. */ spin_lock_irq(¤t->sighand->siglock); k = ¤t->sighand->action[sig - 1]; if( k->sa.sa_handler == tramp_data->handler_postpone ) { spin_unlock_irq(¤t->sighand->siglock); OO_DEBUG_SIGNAL(ci_log("%s: double init pid=%d", __func__, current->pid)); rc = copy_from_user(tramp_data->signal_data, CI_USER_PTR_GET(tramp_data->user_data), sizeof(tramp_data->signal_data)); if( rc != 0 ) ci_log("%s: ERROR: failed to copy signal data (%d)", __func__, rc); break; } spin_unlock_irq(¤t->sighand->siglock); /* Ignore any errors */ (void) efab_signal_substitute(sig, NULL, tramp_data); } tramp_data->kernel_sighand = current->sighand; }
static int efab_signal_get_tramp_data(struct mm_signal_data **tramp_data) { struct mm_hash *p; read_lock (&oo_mm_tbl_lock); p = oo_mm_tbl_lookup(current->mm); if( p == NULL || CI_USER_PTR_GET(p->signal_data.user_data) == NULL) { read_unlock (&oo_mm_tbl_lock); return -ENOSYS; } efab_get_mm_hash_locked(p); *tramp_data = &p->signal_data; read_unlock (&oo_mm_tbl_lock); return 0; }
static int efab_tcp_helper_bind_os_sock_rsop(ci_private_t* priv, void *arg) { oo_tcp_bind_os_sock_t *op = arg; struct sockaddr_storage k_address_buf; int addrlen = op->addrlen; ci_uint16 port; int rc; if (priv->thr == NULL) return -EINVAL; rc = move_addr_to_kernel(CI_USER_PTR_GET(op->address), addrlen, (struct sockaddr *)&k_address_buf); if( rc < 0 ) return rc; rc = efab_tcp_helper_bind_os_sock(priv->thr, op->sock_id, (struct sockaddr *)&k_address_buf, addrlen, &port); if( rc < 0 ) return rc; op->addrlen = port; return 0; }
static void oo_epoll2_wait(struct oo_epoll_private *priv, struct oo_epoll2_action_arg *op) { /* This function uses oo_timesync_cpu_khz but we do not want to * block here for it to stabilize. So we already blocked in * oo_epoll_fop_open(). */ ci_uint64 start_frc = 0, now_frc = 0; /* =0 to make gcc happy */ tcp_helper_resource_t* thr; ci_netif* ni; unsigned i; ci_int32 timeout = op->timeout; /* Get the start of time. */ if( timeout > 0 || ( timeout < 0 && op->spin_cycles ) ) ci_frc64(&start_frc); /* Declare that we are spinning - even if we are just polling */ OO_EPOLL_FOR_EACH_STACK(priv, i, thr, ni) ci_atomic32_inc(&ni->state->n_spinners); /* Poll each stack for events */ op->rc = -ENOEXEC; /* impossible value */ OO_EPOLL_FOR_EACH_STACK(priv, i, thr, ni) { if( ci_netif_may_poll(ni) && ci_netif_has_event(ni) && ci_netif_trylock(ni) ) { int did_wake; ni->state->poll_did_wake = 0; ci_netif_poll(ni); did_wake = ni->state->poll_did_wake; ci_netif_unlock(ni); /* Possibly, we've got necessary event. If true, exit */ if( did_wake ) { op->rc = efab_linux_sys_epoll_wait(op->kepfd, CI_USER_PTR_GET(op->events), op->maxevents, 0); if( op->rc != 0 ) goto do_exit; } } } /* Do we have anything to do? */ if( op->rc == -ENOEXEC ) { /* never called sys_epoll_wait() - do it! */ op->rc = efab_linux_sys_epoll_wait(op->kepfd, CI_USER_PTR_GET(op->events), op->maxevents, 0); } if( op->rc != 0 || timeout == 0 ) goto do_exit; /* Fixme: eventually, remove NO_USERLAND stacks from this list. * Here is a good moment: we are going to spin or block, so there are * a lot of time. But avoid locking! */ /* Spin for a while. */ if( op->spin_cycles ) { ci_uint64 schedule_frc; ci_uint64 max_spin = op->spin_cycles; int spin_limited_by_timeout = 0; ci_assert(start_frc); if( timeout > 0) { ci_uint64 max_timeout_spin = (ci_uint64)timeout * oo_timesync_cpu_khz; if( max_timeout_spin <= max_spin ) { max_spin = max_timeout_spin; spin_limited_by_timeout = 1; } } /* spin */ now_frc = schedule_frc = start_frc; do { if(unlikely( signal_pending(current) )) { op->rc = -EINTR; /* epoll_wait returns EINTR, not ERESTARTSYS! */ goto do_exit; } OO_EPOLL_FOR_EACH_STACK(priv, i, thr, ni) { #if CI_CFG_SPIN_STATS ni->state->stats.spin_epoll_kernel++; #endif if( ci_netif_may_poll(ni) && ci_netif_need_poll_spinning(ni, now_frc) && ci_netif_trylock(ni) ) { ci_netif_poll(ni); ci_netif_unlock(ni); } } op->rc = efab_linux_sys_epoll_wait(op->kepfd, CI_USER_PTR_GET(op->events), op->maxevents, 0); if( op->rc != 0 ) goto do_exit; ci_frc64(&now_frc); if(unlikely( now_frc - schedule_frc > oo_timesync_cpu_khz )) { schedule(); /* schedule() every 1ms */ schedule_frc = now_frc; } else ci_spinloop_pause(); } while( now_frc - start_frc < max_spin );
/*! Run a signal handler ** \param signum Signal number ** \param info Saved info for sa_sigaction handler ** \param context Saved context for sa_sigaction handler ** \return sa_restart flag value */ static int citp_signal_run_app_handler(int sig, siginfo_t *info, void *context) { struct oo_sigaction *p_data = &citp_signal_data[sig-1]; struct oo_sigaction act; ci_int32 type1, type2; int ret; sa_sigaction_t handler; do { type1 = p_data->type; act = *p_data; type2 = p_data->type; } while( type1 != type2 || (type1 & OO_SIGHANGLER_TYPE_MASK) == OO_SIGHANGLER_BUSY ); /* When the signal was delivered and set pending, it was intercepted. * Now it is not. * It is possible if, for example, user-provided handler is replaced by * SIG_DFL for SIGABORT. * * We just run old handler in this case, so we drop * OO_SIGHANGLER_IGN_BIT. */ ret = act.flags & SA_RESTART; LOG_SIG(log("%s: signal %d type %d run handler %p flags %x", __FUNCTION__, sig, act.type, CI_USER_PTR_GET(act.handler), act.flags)); handler = CI_USER_PTR_GET(act.handler); ci_assert(handler); ci_assert_nequal(handler, citp_signal_intercept); ci_assert(info); ci_assert(context); if( (act.type & OO_SIGHANGLER_TYPE_MASK) != OO_SIGHANGLER_USER || (act.flags & SA_SIGINFO) ) { (*handler)(sig, info, context); } else { __sighandler_t handler1 = (void *)handler; (*handler1)(sig); } LOG_SIG(log("%s: returned from handler for signal %d: ret=%x", __FUNCTION__, sig, ret)); /* If sighandler was reset because of SA_ONESHOT, we should properly * handle termination. * Also, signal flags possibly differs from the time when kernel was * running the sighandler: so, we should ensure that ONESHOT shoots * only once. */ if( (act.flags & SA_ONESHOT) && act.type == citp_signal_data[sig-1].type ) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_DFL; sigaction(sig, &sa, NULL); LOG_SIG(log("%s: SA_ONESHOT fixup", __func__)); } return ret; }
/* Substitute signal handler by our variant. */ static int efab_signal_substitute(int sig, struct sigaction *new_act, struct mm_signal_data *tramp_data) { int rc; __sighandler_t handler; struct k_sigaction *k; int type; __user struct oo_sigaction *user_data; struct oo_sigaction *signal_data = &(tramp_data->signal_data[sig - 1]); ci_int32 old_type; ci_int32 seq; user_data = &(((struct oo_sigaction *) (CI_USER_PTR_GET(tramp_data->user_data)))[sig - 1]); if( !access_ok(VERIFY_WRITE, user_data, sizeof(struct oo_sigaction) ) ) return -EFAULT; do { old_type = signal_data->type; seq = (old_type & OO_SIGHANGLER_SEQ_MASK) + (1 << OO_SIGHANGLER_SEQ_SHIFT); } while( ci_cas32_fail(&signal_data->type, old_type, OO_SIGHANGLER_BUSY | seq) ); /* We are going to change signal handler: UL should wait until we've * finished */ rc = __put_user(signal_data->type, &user_data->type); if( rc != 0 ) { signal_data->type = old_type; return -EFAULT; } spin_lock_irq(¤t->sighand->siglock); k = ¤t->sighand->action[sig - 1]; if( new_act ) k->sa = *new_act; type = efab_signal_handler_type(sig, k->sa.sa_handler); handler = type <= OO_SIGHANGLER_DFL_MAX ? tramp_data->handlers[type] : NULL; BUILD_BUG_ON(SIG_DFL != NULL); /* We do not handle this signal: */ if( type != OO_SIGHANGLER_USER && handler == NULL ) { spin_unlock_irq(¤t->sighand->siglock); signal_data->type = old_type | OO_SIGHANGLER_IGN_BIT | seq; ci_verify(__put_user(signal_data->type, &user_data->type) == 0); return 0; } OO_DEBUG_SIGNAL(ci_log("%s: %d change sig=%d handler %p flags %lx " "restorer %p type %d", __func__, current->pid, sig, k->sa.sa_handler, k->sa.sa_flags, k->sa.sa_restorer, type)); signal_data->flags = k->sa.sa_flags; k->sa.sa_flags |= SA_SIGINFO; if( type == OO_SIGHANGLER_USER ) CI_USER_PTR_SET(signal_data->handler, k->sa.sa_handler); else { CI_USER_PTR_SET(signal_data->handler, handler); if( tramp_data->sarestorer ) { k->sa.sa_flags |= SA_RESTORER; k->sa.sa_restorer = tramp_data->sarestorer; } } k->sa.sa_handler = tramp_data->handler_postpone; spin_unlock_irq(¤t->sighand->siglock); OO_DEBUG_SIGNAL(ci_log("%s: %d set sig=%d handler %p flags %lx restorer %p", __func__, current->pid, sig, k->sa.sa_handler, k->sa.sa_flags, k->sa.sa_restorer)); /* Copy signal_data to UL; type BUSY */ rc = __copy_to_user(user_data, signal_data, sizeof(*signal_data)); signal_data->type = type | seq; if( rc != 0 ) return -EFAULT; /* Fill in the real type */ ci_verify(__put_user(signal_data->type, &user_data->type) == 0); return 0; }
/* Change substituted sigaction to the structure really meant by user. * If sa is provided, copy user sigaction data here to pass to user. * If sa==NULL, substitute in-place. */ static int efab_signal_report_sigaction(int sig, struct sigaction *sa, struct mm_signal_data *tramp_data) { struct oo_sigaction *signal_data = &(tramp_data->signal_data[sig - 1]); ci_int32 type; #define MAX_TRIES_BUSY 1000 int tried_busy = 0; int tried_changed = 0; int sa_provided = (sa != NULL); re_read_data: do { tried_busy++; type = signal_data->type; } while( (type & OO_SIGHANGLER_TYPE_MASK) == OO_SIGHANGLER_BUSY && tried_busy <= MAX_TRIES_BUSY ); if( tried_busy > MAX_TRIES_BUSY ) { ci_log("%s(%d): pid %d signal() or sigaction() runs for too long", __func__, sig, current->pid); return -EBUSY; } report: spin_lock_irq(¤t->sighand->siglock); if( sa_provided ) *sa = current->sighand->action[sig - 1].sa; else sa = ¤t->sighand->action[sig - 1].sa; if( sa->sa_handler != tramp_data->handler_postpone ) { spin_unlock_irq(¤t->sighand->siglock); return 0; } OO_DEBUG_SIGNAL(ci_log("%s: %d process sig=%d type %d handler %p " "flags %lx restorer %p", __func__, current->pid, sig, type & OO_SIGHANGLER_TYPE_MASK, sa->sa_handler, sa->sa_flags, sa->sa_restorer)); if( (signal_data->type & OO_SIGHANGLER_TYPE_MASK) == OO_SIGHANGLER_USER) { sa->sa_handler = CI_USER_PTR_GET(signal_data->handler); if( ! (signal_data->flags & SA_SIGINFO) ) sa->sa_flags &= ~SA_SIGINFO; } else if( ! (signal_data->type & OO_SIGHANGLER_IGN_BIT) ) { sa->sa_handler = SIG_DFL; sa->sa_flags &= ~SA_RESTORER; if( ! (signal_data->flags & SA_SIGINFO) ) sa->sa_flags &= ~SA_SIGINFO; sa->sa_restorer = NULL; } OO_DEBUG_SIGNAL(ci_log("%s: %d to user sig=%d handler %p flags %lx " "restorer %p", __func__, current->pid, sig, sa->sa_handler, sa->sa_flags, sa->sa_restorer)); spin_unlock_irq(¤t->sighand->siglock); /* Re-check that UL have not changed signal_data. */ if( type != signal_data->type ) { tried_changed++; if( tried_changed > MAX_TRIES_BUSY ) { ci_log("%s: signal() or sigaction() called too fast", __func__); return -EBUSY; } if( (signal_data->type & OO_SIGHANGLER_TYPE_MASK) == OO_SIGHANGLER_BUSY ) { tried_busy = 0; goto re_read_data; } else goto report; } return 0; }
static int setup_trampoline(struct pt_regs *regs, int opcode, int arg, int bits) { struct mm_hash *p; ci_uintptr_t trampoline_entry = 0, trampoline_exclude = 0, trampoline_toc = 0, trampoline_fixup = 0; int rc = -EBADF; read_lock(&oo_mm_tbl_lock); p = oo_mm_tbl_lookup(current->mm); if (p) { trampoline_entry = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_entry); trampoline_exclude = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_exclude); trampoline_toc = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_toc); trampoline_fixup = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_user_fixup); } read_unlock(&oo_mm_tbl_lock); TRAMP_DEBUG("%s: trampoline_entry = %p \n", __func__, (void *)trampoline_entry); /* OK. We have the entry - set up a trampoline to user space */ if (trampoline_entry) { if (!access_ok(VERIFY_READ, trampoline_entry, 1)) { /* Can't read this address. Fail! */ ci_log("Pid %d (mm=%p) has bad trampoline entry: %p", current->tgid, current->mm, (void *)trampoline_entry); return -EBADF; } /* Check for the excluded address */ if (regs->nip == trampoline_exclude) { TRAMP_DEBUG("Ignoring call from excluded address 0x%08lx", (unsigned long)trampoline_exclude); return -EBUSY; } TRAMP_DEBUG("%s: bits = %d; set up trampoline. \n", __func__, bits); if (bits == TRAMPOLINE_BITS_64) { setup_trampoline64(regs, opcode, arg, (void *)trampoline_entry, (void *)trampoline_toc, (void *)trampoline_fixup); } #ifdef CONFIG_COMPAT else { setup_trampoline32(regs, opcode, arg, (void *)trampoline_entry, (void *)trampoline_toc, (void *)trampoline_fixup); } #endif rc = 0; } else { OO_DEBUG_VERB(ci_log("Error -- attempt to trampoline for unknown process")); rc = -ENOENT; } return rc; }