/** * give control back to the scheduler after main() exits. This allows * remaining threads to continue running. * FIXME: we don't know whether user explicit calls exit() or main() normally returns * in the previous case, we should exit immediately, while in the later, we should * join other threads. * Overriding exit() does not work because normal returning from * main() also calls exit(). **/ static void exit_func(void) { // don't do anything if we're in a forked child process if( getpid() != capriccio_main_pid ) return; exit_func_done = 1; main_exited = 1; if( !exit_whole_program ) // this will block until all other threads finish thread_exit(NULL); // dump the blocking graph before we exit if( conf_dump_blocking_graph ) { tdebug("dumping blocking graph from exit_func()\n"); dump_blocking_graph(); } // FIXME: make sure to kill cloned children if( conf_dump_timing_info ) { if( main_timer.running ) stop_timer(&main_timer); if( scheduler_timer.running ) stop_timer(&scheduler_timer); if( app_timer.running ) stop_timer(&app_timer); print_timers(); } }
int thread_join(thread_t *t, void **ret) { if (t == NULL) return_errno(FALSE, EINVAL); if ( !( t->joinable ) ) return_errno(FALSE, EINVAL); assert(t->state != GHOST); // A thread can be joined only once if (t->join_thread) return_errno(FALSE, EACCES); t->join_thread = current_thread; // Wait for the thread to complete tdebug( "**** thread state: %d\n" ,t->state); if (t->state != ZOMBIE) { CAP_SET_SYSCALL(); thread_suspend_self(0); CAP_CLEAR_SYSCALL(); } // clean up the dead thread if (ret != NULL) *ret = t->ret; free_thread( t ); return TRUE; }
/** * Wrapper function for new threads. This allows us to clean up * correctly if a thread exits without calling thread_exit(). **/ static void* new_thread_wrapper(void *arg) { void *ret; (void) arg; // set up initial stats current_thread->curr_stats.files = 0; current_thread->curr_stats.sockets = 0; current_thread->curr_stats.heap = 0; bg_set_current_stats( ¤t_thread->curr_stats ); current_thread->prev_stats = current_thread->curr_stats; // set up stack limit for new thread stack_bottom = current_thread->stack_bottom; stack_fingerprint = current_thread->stack_fingerprint; // start the thread tdebug("Initial arg = %p\n", current_thread->initial_arg); ret = current_thread->initial_func(current_thread->initial_arg); // call thread_exit() to do the cleanup thread_exit(ret); return NULL; }
void thread_exit(void *ret) { thread_t *t = current_thread; sanity_check_threadcounts(); tdebug("current=%s\n", current_thread?current_thread->name : "NULL"); if (current_thread == main_thread && main_exited == 0) { // the case when the user calls thread_exit() in main thread is complicated // we cannot simply terminate the main thread, because we need that stack to terminate the // whole program normally. so we call exit() to make the c runtime help us get the stack // context where we can just return to terminate the whole program // this will call exit_func() and in turn call thread_exit() again main_exited = 1; exit (0); } // note the thread exit in the blocking graph t->curr_stats.node = bg_exit_node; current_thread->prev_stats.node->num_here--; current_thread->curr_stats.node->num_here++; if( bg_save_stats ) { bg_update_stats(); } // update thread counts num_runnable_threads--; if( t->daemon ) num_daemon_threads--; t->state = ZOMBIE; num_zombie_threads++; // deallocate the TCB // keep the thread, if the thread is Joinable, and we want the return value for something if ( !( t->joinable ) ) { // tell the scheduler thread to delete the current one current_thread_exited = 1; } else { t->ret = ret; if (t->join_thread) thread_resume(t->join_thread); } sanity_check_threadcounts(); // squirrel away the stack limit--not that we'll need it again current_thread->stack_bottom = stack_bottom; current_thread->stack_fingerprint = stack_fingerprint; // give control back to the scheduler #ifdef NO_SCHEDULER_THREAD do_scheduler(NULL); #else co_call(scheduler_thread->coro, NULL); #endif }
// only resume the thread internally // don't touch the timeout flag and the sleep queue static void _thread_resume(thread_t *t) { tdebug("t=%p\n",t); if (t->state != SUSPENDED) return; num_suspended_threads--; num_runnable_threads++; sanity_check_threadcounts(); assert(t->state == SUSPENDED); t->state = RUNNABLE; assert( t->sleep == -1 ); sched_add_thread(t); }
/** * Entry point for the vcore. Basic job is to either resume the thread that * was interrupted in the case of a notification coming in, or to find a new * thread from the user level threading library and launch it. **/ void __attribute__((noreturn)) vcore_entry() { /* Grab references to the current vcoreid vcore preemption data, and the * vcoremap */ assert(in_vcore_context()); uint32_t vcoreid = vcore_id(); struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid]; struct vcore *vc = &__procinfo.vcoremap[vcoreid]; tdebug("current=%s, vcore=%d\n", current_thread?current_thread->name : "NULL", vcoreid); /* Assert that notifications are disabled. Should always have notifications * disabled when coming in here. */ assert(vcpd->notif_enabled == FALSE); /* Put this in the loop that deals with notifications. It will return if * there is no preempt pending. */ if (vc->preempt_pending) sys_yield(TRUE); /* When running vcore_entry(), we are using the TLS of the vcore, not any * particular thread. If current_thread is set in the vcore's TLS, then * that means the thread did not yield voluntarily, and was, instead, * interrupted by a notification. We therefore need to restore the thread * context from the notification trapframe, not the one stored in the * thread struct itself. */ if (unlikely(current_thread)) { vcpd->notif_pending = 0; /* Do one last check for notifs after clearing pending */ // TODO: call the handle_notif() here (first) /* Copy the notification trapframe into the current * threads trapframe */ memcpy(¤t_thread->context->utf, &vcpd->notif_tf, sizeof(struct user_trapframe)); /* Restore the context from the current_thread's trapframe */ restore_context(current_thread->context); assert(0); } /* Otherwise either a vcore is coming up for the first time, or a thread * has just yielded and vcore_entry() was called directly. In this case we * need to figure out which thread to schedule next on the vcore */ run_next_thread(); assert(0); }
static int tc_x11_demultiplex(TCModuleInstance *self, vframe_list_t *vframe, aframe_list_t *aframe) { TCX11PrivateData *priv = NULL; uint64_t now = 0; int ret = 0; TC_MODULE_SELF_CHECK(self, "demultiplex"); priv = self->userdata; priv->reftime = tc_gettime(); tdebug(priv, "begin demultiplex"); if (aframe != NULL) { aframe->audio_len = 0; /* no audio from here */ } if (vframe != NULL) { tdebug(priv, " begin acquire"); ret = tc_x11source_acquire(&priv->src, vframe->video_buf, vframe->video_size); tdebug(priv, " end acquire"); if (ret > 0) { int64_t naptime = 0; uint64_t now = 0; vframe->attributes |= TC_FRAME_IS_KEYFRAME; vframe->video_len = ret; now = tc_gettime(); naptime = (priv->frame_delay - (now - priv->reftime)); if (priv->skew >= priv->skew_limit) { tc_log_info(MOD_NAME, " skew correction (naptime was %lu)", (unsigned long)naptime); int64_t t = naptime; naptime -= priv->skew; priv->skew = TC_MAX(0, priv->skew - t); } if (naptime <= 0) { /* don't sleep at all if delay is already excessive */ tc_log_info(MOD_NAME, "%-18s", " NO SLEEP!"); priv->expired++; } else { tc_log_info(MOD_NAME, "%-18s %lu", " sleep time", (unsigned long)(naptime)); tc_timer_sleep(&priv->timer, (uint64_t)naptime); } } } now = tc_gettime(); now -= priv->reftime; priv->skew += now - priv->frame_delay; tdebug(priv, "end multiplex"); tc_log_info(MOD_NAME, "%-18s %li", "detected skew", (long)(priv->skew)); return (ret > 0) ?ret :-1; }
void sockio_poll_poll(long long usecs) { int ret, i; fdstruct_t *fds; iorequest_t *req; int timeout = (usecs == -1 ? -1 : usecs / 1000); // translate from microseconds to milliseconds // get the lock thread_latch( sockio_poll_latch ); assert(num_outstanding >= 0); if( num_outstanding == 0 ) { thread_unlatch( sockio_poll_latch ); return; } // NOTE: why a 100us timeout here? this is preventing all runnable threads from proceeding - zf // removing it //while( (ret=syscall(SYS_poll, ufds, num_outstanding, timeout)) < 0 && errno == EINTR ) ; (void) timeout; while( (ret=syscall(SYS_poll, ufds, num_outstanding, 0)) < 0 && errno == EINTR ) ; if(ret < 0) { perror("FATAL ERROR. poll() failed. "); exit(1); } if(ret == 0) { // release the lock thread_unlatch( sockio_poll_latch ); return; } tdebug("%d fds are ready for IO\n", ret); // process the returned events for(i=num_outstanding-1; i>=0; i--) { // skip past fds w/ no events if( ufds[i].revents == 0 ) continue; // find the fdstruct & get the first request fds = get_fdstruct(ufds[i].fd); req = view_first_waiter(fds); // do the IO for as many waiters as possible // // FIXME: do this with just one syscall by using readv/writev!!! // FIXME: it's VERY VERY BAD to hold sockio_poll_latch across these // syscalls. To fix, sockio_poll_add_request needs to drop waiting // requests into a list somewhere, so only the list needs to be locked while( req != NULL ) { do { switch (req->type) { case READ: ret = syscall(SYS_read, fds->fd, req->args.rw.buf, req->args.rw.count); break; case WRITE: ret = syscall(SYS_write, fds->fd, req->args.rw.buf, req->args.rw.count); break; case POLL1: ret = 1; req->args.poll1.ufds[0].revents = ufds[i].revents; break; case CONNECT: ret=0; break; // system call already done in blocking_io.c case ACCEPT: case SEND: case RECV: ret = syscall(SYS_socketcall, req->args.scall.which, req->args.scall.argv); break; default: assert(0); } } while(ret==-1 && errno==EINTR); // the request would have blocked. Keep the fd in the poll set, and try again later if( ret == -1 && (errno==EAGAIN || errno==EWOULDBLOCK)) req = NULL; // there was some other error - return this error to all waiters else if( ret == -1 ) { while( (req=remove_first_waiter(fds)) != NULL ) { req->ret = -1; req->err = errno; thread_resume( req->thread ); } } // the call succeeded else { remove_first_waiter(fds); req->ret = ret; req->err = 0; thread_resume( req->thread ); // a read or write succeeded, but we didn't get the full count. if( (req->type == READ || req->type == WRITE) && (size_t) ret < req->args.rw.count ) req = NULL; // for everything else, we get the next request else req = view_first_waiter(fds); } } // update the poll flags for the fd req = view_first_waiter(fds); if( req != NULL ) { // add flags for the next poll() call debug("more waiters for %d - will poll again", fds->fd); switch( req->type ) { case READ: case RECV: case ACCEPT: ufds[i].events = POLLIN|POLLPRI; break; case WRITE: case SEND: case CONNECT: ufds[i].events = POLLOUT; break; case POLL1: ufds[i].events = req->args.poll1.ufds[0].events; break; default: assert(0); } } else { // plug the hole in the request list num_outstanding--; ufds[i] = ufds[num_outstanding]; } } // release the lock thread_unlatch( sockio_poll_latch ); }
/** * perform necessary management to yield the current thread * if suspended == TRUE && timeout != 0 -> the thread is added * to the sleep queue and later waken up when the clock times out * returns FALSE if time-out actually happens, TRUE if waken up * by other threads, INTERRUPTED if interrupted by a signal **/ static int thread_yield_internal(int suspended, unsigned long long timeout) { // now we use a per-thread errno stored in thread_t int savederrno; int rv = OK; tdebug("current_thread=%p\n",current_thread); savederrno = errno; // decide what to do with the thread if( !suspended ) // just add it to the runlist sched_add_thread( current_thread ); else if( timeout ) // add to the sleep list sleepq_add_thread( current_thread, timeout); { #ifdef SHOW_EDGE_TIMES cpu_tick_t start, end, rstart, rend; GET_CPU_TICKS(start); GET_REAL_CPU_TICKS(rstart); #endif // figure out the current node in the graph if( !conf_no_stacktrace ) bg_backtrace_set_node(); // FIXME: fake out what cil would do... current_thread->curr_stats.node = bg_dummy_node; // we should already have been told the node by CIL or directly by the programmer assert( current_thread->curr_stats.node != NULL ); // update node counts current_thread->prev_stats.node->num_here--; current_thread->curr_stats.node->num_here++; // update the blocking graph info if( bg_save_stats ) bg_update_stats(); #ifdef SHOW_EDGE_TIMES GET_CPU_TICKS(end); GET_REAL_CPU_TICKS(rend); { thread_stats_t *curr = ¤t_thread->curr_stats; thread_stats_t *prev = ¤t_thread->prev_stats; output(" %3d -> %-3d %7lld ticks (%lld ms) %7lld rticks (%lld ms) ", prev->node->node_num, curr->node->node_num, curr->cpu_ticks - prev->cpu_ticks, (curr->cpu_ticks - prev->cpu_ticks) / ticks_per_millisecond, # ifdef USE_PERFCTR curr->real_ticks - prev->real_ticks, (curr->real_ticks - prev->real_ticks) / ticks_per_millisecond # else curr->cpu_ticks - prev->cpu_ticks, (curr->cpu_ticks - prev->cpu_ticks) / ticks_per_millisecond # endif ); output("update bg node %d: %lld (%lld ms) real: %lld (%lld ms)\n", current_thread->curr_stats.node->node_num, (end-start), (end-start)/ticks_per_millisecond, (rend-rstart), (rend-rstart)/ticks_per_millisecond); } #endif } // squirrel away the stack limit for next time current_thread->stack_bottom = stack_bottom; current_thread->stack_fingerprint = stack_fingerprint; // switch to the scheduler thread #ifdef NO_SCHEDULER_THREAD do_scheduler(NULL); #else co_call(scheduler_thread->coro, NULL); #endif // set up stack limit for new thread stack_bottom = current_thread->stack_bottom; stack_fingerprint = current_thread->stack_fingerprint; // rotate the stats if( bg_save_stats ) { current_thread->prev_stats = current_thread->curr_stats; // update thread time, to skip time asleep GET_CPU_TICKS( current_thread->prev_stats.cpu_ticks ); current_thread->prev_stats.cpu_ticks -= ticks_diff; // FIXME: subtract out time to do debug output #ifdef USE_PERFCTR GET_REAL_CPU_TICKS( current_thread->prev_stats.real_ticks ); current_thread->prev_stats.real_ticks -= ticks_rdiff; // FIXME: subtract out time to do debug output #endif } else { current_thread->prev_stats.node = current_thread->curr_stats.node; } // check whether time-out happens if (suspended && timeout && current_thread->timeout) { rv = TIMEDOUT; current_thread->timeout = 0; } // check for and process pending signals if ( likely(!current_thread->sig_waiting) ) { if (sig_process_pending()) rv = INTERRUPTED; } else { // if sig_waiting is 1, sigwait() itself will handle the remaining rv = INTERRUPTED; } errno = savederrno; return rv; }
/** * Main scheduling loop **/ static void* do_scheduler(void *arg) { static cpu_tick_t next_poll=0, next_overload_check=0, next_info_dump=0, next_graph_stats=0, now=0; static int pollcount=1000; static int init_done = 0; (void) arg; // suppress GCC "unused parameter" warning in_scheduler = 1; // make sure we start out by saving edge stats for a while if( !init_done ) { init_done = 1; if (conf_no_statcollect) bg_save_stats = 0; else bg_save_stats = 1; GET_REAL_CPU_TICKS( now ); next_graph_stats = now + 1 * ticks_per_second; start_timer(&scheduler_timer); } while( 1 ) { //current_thread = scheduler_thread; sanity_check_threadcounts(); sanity_check_io_stats(); // wake up threads that have timeouts sleepq_check(0); sanity_check_threadcounts(); // break out if there are only daemon threads if(unlikely (num_suspended_threads == 0 && num_runnable_threads == num_daemon_threads)) { // dump the blocking graph if( exit_func_done && conf_dump_blocking_graph ) { tdebug("dumping blocking graph from do_scheduler()\n"); dump_blocking_graph(); } // go back to mainthread, which should now be in exit_func() current_thread = main_thread; in_scheduler = 0; co_call(main_thread->coro, NULL); in_scheduler = 1; if( unlikely(current_thread_exited) ) { // free memory from deleted threads current_thread_exited=0; if (current_thread != main_thread) // main_thread is needed for whole program exit free_thread( current_thread ); } return NULL; } // cheesy way of handling things with timing requirements { GET_REAL_CPU_TICKS( now ); // toggle stats collection if( conf_no_statcollect == 0 && next_graph_stats < now ) { bg_save_stats = 1 - bg_save_stats; if( bg_save_stats ) { // record stats for 100 ms next_graph_stats = now + 100 * ticks_per_millisecond; // update the stats epoch, to allow proper handling of the first data items bg_stats_epoch++; } else { // avoid stats for 2000 ms next_graph_stats = now + 2000 * ticks_per_millisecond; } //output(" *********************** graph stats %s\n", bg_save_stats ? "ON" : "OFF" ); } // resource utalization if( unlikely (next_overload_check < now) ) { check_overload( now ); next_overload_check = now + OVERLOAD_CHECK_INTERVAL; } // poll if( likely( (int)io_polling_func) ) { if( num_runnable_threads==0 || --pollcount <= 0 || next_poll < now ) { //if( num_runnable_threads==0 ) { // poll long long timeout = 0; if( num_runnable_threads==0 ) { if (first_wake_usecs == 0) { timeout = -1; } else { // there are threads in the sleep queue // so poll for i/o till at most that time unsigned long long now; now = current_usecs(); tdebug ("first_wake: %lld, now: %lld\n", first_wake_usecs, now); if (first_wake_usecs > now) timeout = first_wake_usecs - now; } } stop_timer(&scheduler_timer); //if( timeout != -1 ) output("timeout is not zero\n"); io_polling_func( timeout ); // allow blocking start_timer(&scheduler_timer); sanity_check_threadcounts(); #ifndef USE_NIO // sleep for a bit, if there was nothing to do // FIXME: let the IO functions block instead?? if( num_runnable_threads == 0 ) { syscall(SYS_sched_yield); } #endif // vary the poll rate depending on the workload #if 0 if( num_runnable_threads < 5 ) { next_poll = now + (10*ticks_per_millisecond); pollcount = 1000; } else if( num_runnable_threads < 10 ) { next_poll = now + (50*ticks_per_millisecond); pollcount = 2000; } else { next_poll = now + (100*ticks_per_millisecond); pollcount = 3000; } #else next_poll = now + (ticks_per_millisecond << 13); pollcount = 10000; #endif } } // debug stats if( 0 && next_info_dump < now ) { dump_debug_info(); next_info_dump = now + 5 * ticks_per_second; } } // get the head of the run list current_thread = sched_next_thread(); // scheduler gave an invlid even though there are runnable // threads. This indicates that every runnable thead is likely to // require use of an overloaded resource. if( !valid_thread(current_thread) ) { pollcount = 0; continue; } // barf, if the returned thread is still on the sleep queue assert( current_thread->sleep == -1 ); tdebug("running TID %d (%s)\n", current_thread->tid, current_thread->name ? current_thread->name : "no name"); sanity_check_threadcounts(); // call thread stop_timer(&scheduler_timer); start_timer(&app_timer); in_scheduler = 0; co_call(current_thread->coro, NULL); in_scheduler = 1; stop_timer(&app_timer); start_timer(&scheduler_timer); if( unlikely(current_thread_exited) ) { // free memory from deleted threads current_thread_exited=0; if (current_thread != main_thread) // main_thread is needed for whole program exit free_thread( current_thread ); } #ifdef NO_SCHEDULER_THREAD return NULL; #endif } return NULL; }