/* Finish a context. If the context has no parent, its state will be set * to 'x86_ctx_finished'. If it has, its state is set to 'x86_ctx_zombie', waiting for * a call to 'waitpid'. * The children of the finished context will set their 'parent' attribute to NULL. * The zombie children will be finished. */ void X86ContextFinish(X86Context *self, int state) { X86Emu *emu = self->emu; X86Context *aux; /* Context already finished */ if (X86ContextGetState(self, X86ContextFinished | X86ContextZombie)) return; /* If context is waiting for host events, cancel spawned host threads. */ X86ContextHostThreadSuspendCancel(self); X86ContextHostThreadTimerCancel(self); /* From now on, all children have lost their parent. If a child is * already zombie, finish it, since its parent won't be able to waitpid it * anymore. */ DOUBLE_LINKED_LIST_FOR_EACH(emu, context, aux) { if (aux->parent == self) { aux->parent = NULL; if (X86ContextGetState(aux, X86ContextZombie)) X86ContextSetState(aux, X86ContextFinished); } } /* Send finish signal to parent */ if (self->exit_signal && self->parent) { x86_sys_debug(" sending signal %d to pid %d\n", self->exit_signal, self->parent->pid); x86_sigset_add(&self->parent->signal_mask_table->pending, self->exit_signal); X86EmuProcessEventsSchedule(emu); } /* If clear_child_tid was set, a futex() call must be performed on * that pointer. Also wake up futexes in the robust list. */ if (self->clear_child_tid) { unsigned int zero = 0; mem_write(self->mem, self->clear_child_tid, 4, &zero); X86ContextFutexWake(self, self->clear_child_tid, 1, -1); } X86ContextExitRobustList(self); /* If we are in a signal handler, stop it. */ if (X86ContextGetState(self, X86ContextHandler)) X86ContextReturnFromSignalHandler(self); /* Finish context */ X86ContextSetState(self, self->parent ? X86ContextZombie : X86ContextFinished); self->exit_code = state; X86EmuProcessEventsSchedule(emu); }
void X86ContextExitRobustList(X86Context *self) { unsigned int next, lock_entry, offset, lock_word; /* Read the offset from the list head. This is how the structure is * represented in the kernel: * struct robust_list { * struct robust_list __user *next; * } * struct robust_list_head { * struct robust_list list; * long futex_offset; * struct robust_list __user *list_op_pending; * } * See linux/Documentation/robust-futex-ABI.txt for details * about robust futex wake up at thread exit. */ lock_entry = self->robust_list_head; if (!lock_entry) return; x86_sys_debug("ctx %d: processing robust futex list\n", self->pid); for (;;) { mem_read(self->mem, lock_entry, 4, &next); mem_read(self->mem, lock_entry + 4, 4, &offset); mem_read(self->mem, lock_entry + offset, 4, &lock_word); x86_sys_debug(" lock_entry=0x%x: offset=%d, lock_word=0x%x\n", lock_entry, offset, lock_word); /* Stop processing list if 'next' points to robust list */ if (!next || next == self->robust_list_head) break; lock_entry = next; } }
int X86ContextFutexWake(X86Context *self, unsigned int futex, unsigned int count, unsigned int bitset) { X86Emu *emu = self->emu; X86Context *wakeup_ctx; int wakeup_count = 0; /* Look for threads suspended in this futex */ while (count) { wakeup_ctx = NULL; for (self = emu->suspended_list_head; self; self = self->suspended_list_next) { if (!X86ContextGetState(self, X86ContextFutex) || self->wakeup_futex != futex) continue; if (!(self->wakeup_futex_bitset & bitset)) continue; if (!wakeup_ctx || self->wakeup_futex_sleep < wakeup_ctx->wakeup_futex_sleep) wakeup_ctx = self; } if (wakeup_ctx) { /* Wake up context */ X86ContextClearState(wakeup_ctx, X86ContextSuspended | X86ContextFutex); x86_sys_debug(" futex 0x%x: thread %d woken up\n", futex, wakeup_ctx->pid); wakeup_count++; count--; /* Set system call return value */ wakeup_ctx->regs->eax = 0; } else { break; } } return wakeup_count; }
/* Check for events detected in spawned host threads, like waking up contexts or * sending signals. * The list is only processed if flag 'x86_emu->process_events_force' is set. */ void x86_emu_process_events() { struct x86_ctx_t *ctx, *next; long long now = esim_real_time(); /* Check if events need actually be checked. */ pthread_mutex_lock(&x86_emu->process_events_mutex); if (!x86_emu->process_events_force) { pthread_mutex_unlock(&x86_emu->process_events_mutex); return; } /* By default, no subsequent call to 'x86_emu_process_events' is assumed */ x86_emu->process_events_force = 0; /* * LOOP 1 * Look at the list of suspended contexts and try to find * one that needs to be waken up. */ for (ctx = x86_emu->suspended_list_head; ctx; ctx = next) { /* Save next */ next = ctx->suspended_list_next; /* Context is suspended in 'nanosleep' system call. */ if (x86_ctx_get_status(ctx, x86_ctx_nanosleep)) { uint32_t rmtp = ctx->regs->ecx; uint64_t zero = 0; uint32_t sec, usec; uint64_t diff; /* If 'x86_emu_host_thread_suspend' is still running for this context, do nothing. */ if (ctx->host_thread_suspend_active) continue; /* Timeout expired */ if (ctx->wakeup_time <= now) { if (rmtp) mem_write(ctx->mem, rmtp, 8, &zero); x86_sys_debug("syscall 'nanosleep' - continue (pid %d)\n", ctx->pid); x86_sys_debug(" return=0x%x\n", ctx->regs->eax); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_nanosleep); continue; } /* Context received a signal */ if (ctx->signal_mask_table->pending & ~ctx->signal_mask_table->blocked) { if (rmtp) { diff = ctx->wakeup_time - now; sec = diff / 1000000; usec = diff % 1000000; mem_write(ctx->mem, rmtp, 4, &sec); mem_write(ctx->mem, rmtp + 4, 4, &usec); } ctx->regs->eax = -EINTR; x86_sys_debug("syscall 'nanosleep' - interrupted by signal (pid %d)\n", ctx->pid); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_nanosleep); continue; } /* No event available, launch 'x86_emu_host_thread_suspend' again */ ctx->host_thread_suspend_active = 1; if (pthread_create(&ctx->host_thread_suspend, NULL, x86_emu_host_thread_suspend, ctx)) fatal("syscall 'poll': could not create child thread"); continue; } /* Context suspended in 'rt_sigsuspend' system call */ if (x86_ctx_get_status(ctx, x86_ctx_sigsuspend)) { /* Context received a signal */ if (ctx->signal_mask_table->pending & ~ctx->signal_mask_table->blocked) { x86_signal_handler_check_intr(ctx); ctx->signal_mask_table->blocked = ctx->signal_mask_table->backup; x86_sys_debug("syscall 'rt_sigsuspend' - interrupted by signal (pid %d)\n", ctx->pid); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_sigsuspend); continue; } /* No event available. The context will never awake on its own, so no * 'x86_emu_host_thread_suspend' is necessary. */ continue; } /* Context suspended in 'poll' system call */ if (x86_ctx_get_status(ctx, x86_ctx_poll)) { uint32_t prevents = ctx->regs->ebx + 6; uint16_t revents = 0; struct x86_file_desc_t *fd; struct pollfd host_fds; int err; /* If 'x86_emu_host_thread_suspend' is still running for this context, do nothing. */ if (ctx->host_thread_suspend_active) continue; /* Get file descriptor */ fd = x86_file_desc_table_entry_get(ctx->file_desc_table, ctx->wakeup_fd); if (!fd) fatal("syscall 'poll': invalid 'wakeup_fd'"); /* Context received a signal */ if (ctx->signal_mask_table->pending & ~ctx->signal_mask_table->blocked) { x86_signal_handler_check_intr(ctx); x86_sys_debug("syscall 'poll' - interrupted by signal (pid %d)\n", ctx->pid); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_poll); continue; } /* Perform host 'poll' call */ host_fds.fd = fd->host_fd; host_fds.events = ((ctx->wakeup_events & 4) ? POLLOUT : 0) | ((ctx->wakeup_events & 1) ? POLLIN : 0); err = poll(&host_fds, 1, 0); if (err < 0) fatal("syscall 'poll': unexpected error in host 'poll'"); /* POLLOUT event available */ if (ctx->wakeup_events & host_fds.revents & POLLOUT) { revents = POLLOUT; mem_write(ctx->mem, prevents, 2, &revents); ctx->regs->eax = 1; x86_sys_debug("syscall poll - continue (pid %d) - POLLOUT occurred in file\n", ctx->pid); x86_sys_debug(" retval=%d\n", ctx->regs->eax); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_poll); continue; } /* POLLIN event available */ if (ctx->wakeup_events & host_fds.revents & POLLIN) { revents = POLLIN; mem_write(ctx->mem, prevents, 2, &revents); ctx->regs->eax = 1; x86_sys_debug("syscall poll - continue (pid %d) - POLLIN occurred in file\n", ctx->pid); x86_sys_debug(" retval=%d\n", ctx->regs->eax); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_poll); continue; } /* Timeout expired */ if (ctx->wakeup_time && ctx->wakeup_time < now) { revents = 0; mem_write(ctx->mem, prevents, 2, &revents); x86_sys_debug("syscall poll - continue (pid %d) - time out\n", ctx->pid); x86_sys_debug(" return=0x%x\n", ctx->regs->eax); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_poll); continue; } /* No event available, launch 'x86_emu_host_thread_suspend' again */ ctx->host_thread_suspend_active = 1; if (pthread_create(&ctx->host_thread_suspend, NULL, x86_emu_host_thread_suspend, ctx)) fatal("syscall 'poll': could not create child thread"); continue; } /* Context suspended in a 'write' system call */ if (x86_ctx_get_status(ctx, x86_ctx_write)) { struct x86_file_desc_t *fd; int count, err; uint32_t pbuf; void *buf; struct pollfd host_fds; /* If 'x86_emu_host_thread_suspend' is still running for this context, do nothing. */ if (ctx->host_thread_suspend_active) continue; /* Context received a signal */ if (ctx->signal_mask_table->pending & ~ctx->signal_mask_table->blocked) { x86_signal_handler_check_intr(ctx); x86_sys_debug("syscall 'write' - interrupted by signal (pid %d)\n", ctx->pid); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_write); continue; } /* Get file descriptor */ fd = x86_file_desc_table_entry_get(ctx->file_desc_table, ctx->wakeup_fd); if (!fd) fatal("syscall 'write': invalid 'wakeup_fd'"); /* Check if data is ready in file by polling it */ host_fds.fd = fd->host_fd; host_fds.events = POLLOUT; err = poll(&host_fds, 1, 0); if (err < 0) fatal("syscall 'write': unexpected error in host 'poll'"); /* If data is ready in the file, wake up context */ if (host_fds.revents) { pbuf = ctx->regs->ecx; count = ctx->regs->edx; buf = xmalloc(count); mem_read(ctx->mem, pbuf, count, buf); count = write(fd->host_fd, buf, count); if (count < 0) fatal("syscall 'write': unexpected error in host 'write'"); ctx->regs->eax = count; free(buf); x86_sys_debug("syscall write - continue (pid %d)\n", ctx->pid); x86_sys_debug(" return=0x%x\n", ctx->regs->eax); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_write); continue; } /* Data is not ready to be written - launch 'x86_emu_host_thread_suspend' again */ ctx->host_thread_suspend_active = 1; if (pthread_create(&ctx->host_thread_suspend, NULL, x86_emu_host_thread_suspend, ctx)) fatal("syscall 'write': could not create child thread"); continue; } /* Context suspended in 'read' system call */ if (x86_ctx_get_status(ctx, x86_ctx_read)) { struct x86_file_desc_t *fd; uint32_t pbuf; int count, err; void *buf; struct pollfd host_fds; /* If 'x86_emu_host_thread_suspend' is still running for this context, do nothing. */ if (ctx->host_thread_suspend_active) continue; /* Context received a signal */ if (ctx->signal_mask_table->pending & ~ctx->signal_mask_table->blocked) { x86_signal_handler_check_intr(ctx); x86_sys_debug("syscall 'read' - interrupted by signal (pid %d)\n", ctx->pid); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_read); continue; } /* Get file descriptor */ fd = x86_file_desc_table_entry_get(ctx->file_desc_table, ctx->wakeup_fd); if (!fd) fatal("syscall 'read': invalid 'wakeup_fd'"); /* Check if data is ready in file by polling it */ host_fds.fd = fd->host_fd; host_fds.events = POLLIN; err = poll(&host_fds, 1, 0); if (err < 0) fatal("syscall 'read': unexpected error in host 'poll'"); /* If data is ready, perform host 'read' call and wake up */ if (host_fds.revents) { pbuf = ctx->regs->ecx; count = ctx->regs->edx; buf = xmalloc(count); count = read(fd->host_fd, buf, count); if (count < 0) fatal("syscall 'read': unexpected error in host 'read'"); ctx->regs->eax = count; mem_write(ctx->mem, pbuf, count, buf); free(buf); x86_sys_debug("syscall 'read' - continue (pid %d)\n", ctx->pid); x86_sys_debug(" return=0x%x\n", ctx->regs->eax); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_read); continue; } /* Data is not ready. Launch 'x86_emu_host_thread_suspend' again */ ctx->host_thread_suspend_active = 1; if (pthread_create(&ctx->host_thread_suspend, NULL, x86_emu_host_thread_suspend, ctx)) fatal("syscall 'read': could not create child thread"); continue; } /* Context suspended in a 'waitpid' system call */ if (x86_ctx_get_status(ctx, x86_ctx_waitpid)) { struct x86_ctx_t *child; uint32_t pstatus; /* A zombie child is available to 'waitpid' it */ child = x86_ctx_get_zombie(ctx, ctx->wakeup_pid); if (child) { /* Continue with 'waitpid' system call */ pstatus = ctx->regs->ecx; ctx->regs->eax = child->pid; if (pstatus) mem_write(ctx->mem, pstatus, 4, &child->exit_code); x86_ctx_set_status(child, x86_ctx_finished); x86_sys_debug("syscall waitpid - continue (pid %d)\n", ctx->pid); x86_sys_debug(" return=0x%x\n", ctx->regs->eax); x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_waitpid); continue; } /* No event available. Since this context won't wake up on its own, no * 'x86_emu_host_thread_suspend' is needed. */ continue; } /* Context suspended in a system call using a custom wake up check call-back * function. NOTE: this is a new mechanism. It'd be nice if all other system * calls started using it. It is nicer, since it allows for a check of wake up * conditions together with the system call itself, without having distributed * code for the implementation of a system call (e.g. 'read'). */ if (x86_ctx_get_status(ctx, x86_ctx_callback)) { assert(ctx->can_wakeup_callback_func); if (ctx->can_wakeup_callback_func(ctx, ctx->can_wakeup_callback_data)) { /* Set context status to 'running' again. */ x86_ctx_clear_status(ctx, x86_ctx_suspended | x86_ctx_callback); /* Call wake up function */ if (ctx->wakeup_callback_func) ctx->wakeup_callback_func(ctx, ctx->wakeup_callback_data); /* Reset call-back info */ ctx->wakeup_callback_func = NULL; ctx->wakeup_callback_data = NULL; ctx->can_wakeup_callback_func = NULL; ctx->can_wakeup_callback_data = NULL; } continue; } } /* * LOOP 2 * Check list of all contexts for expired timers. */ for (ctx = x86_emu->context_list_head; ctx; ctx = ctx->context_list_next) { int sig[3] = { 14, 26, 27 }; /* SIGALRM, SIGVTALRM, SIGPROF */ int i; /* If there is already a 'ke_host_thread_timer' running, do nothing. */ if (ctx->host_thread_timer_active) continue; /* Check for any expired 'itimer': itimer_value < now * In this case, send corresponding signal to process. * Then calculate next 'itimer' occurrence: itimer_value = now + itimer_interval */ for (i = 0; i < 3; i++ ) { /* Timer inactive or not expired yet */ if (!ctx->itimer_value[i] || ctx->itimer_value[i] > now) continue; /* Timer expired - send a signal. * The target process might be suspended, so the host thread is canceled, and a new * call to 'x86_emu_process_events' is scheduled. Since 'ke_process_events_mutex' is * already locked, the thread-unsafe version of 'x86_ctx_host_thread_suspend_cancel' is used. */ __x86_ctx_host_thread_suspend_cancel(ctx); x86_emu->process_events_force = 1; x86_sigset_add(&ctx->signal_mask_table->pending, sig[i]); /* Calculate next occurrence */ ctx->itimer_value[i] = 0; if (ctx->itimer_interval[i]) ctx->itimer_value[i] = now + ctx->itimer_interval[i]; } /* Calculate the time when next wakeup occurs. */ ctx->host_thread_timer_wakeup = 0; for (i = 0; i < 3; i++) { if (!ctx->itimer_value[i]) continue; assert(ctx->itimer_value[i] >= now); if (!ctx->host_thread_timer_wakeup || ctx->itimer_value[i] < ctx->host_thread_timer_wakeup) ctx->host_thread_timer_wakeup = ctx->itimer_value[i]; } /* If a new timer was set, launch ke_host_thread_timer' again */ if (ctx->host_thread_timer_wakeup) { ctx->host_thread_timer_active = 1; if (pthread_create(&ctx->host_thread_timer, NULL, x86_emu_host_thread_timer, ctx)) fatal("%s: could not create child thread", __FUNCTION__); } } /* * LOOP 3 * Process pending signals in running contexts to launch signal handlers */ for (ctx = x86_emu->running_list_head; ctx; ctx = ctx->running_list_next) { x86_signal_handler_check(ctx); } /* Unlock */ pthread_mutex_unlock(&x86_emu->process_events_mutex); }