static void demo_task_new(struct task_struct *tsk, int on_runqueue, int is_running) { /* We'll use this to store IRQ flags. */ unsigned long flags; struct demo_cpu_state *state = cpu_state_for(get_partition(tsk)); lt_t now; TRACE_TASK(tsk, "is a new RT task %llu (on runqueue:%d, running:%d)\n", litmus_clock(), on_runqueue, is_running); /* Acquire the lock protecting the state and disable interrupts. */ raw_spin_lock_irqsave(&state->local_queues.ready_lock, flags); now = litmus_clock(); /* Release the first job now. */ release_at(tsk, now); if (is_running) { /* If tsk is running, then no other task can be running * on the local CPU. */ BUG_ON(state->scheduled != NULL); state->scheduled = tsk; } else if (on_runqueue) { demo_requeue(tsk, state); } if (edf_preemption_needed(&state->local_queues, state->scheduled)) preempt_if_preemptable(state->scheduled, state->cpu); raw_spin_unlock_irqrestore(&state->local_queues.ready_lock, flags); }
/* Called when the state of tsk changes back to TASK_RUNNING. * We need to requeue the task. * * NOTE: If a sporadic task is suspended for a long time, * this might actually be an event-driven release of a new job. */ static void demo_task_resume(struct task_struct *tsk) { unsigned long flags; struct demo_cpu_state *state = cpu_state_for(get_partition(tsk)); lt_t now; TRACE_TASK(tsk, "wake_up at %llu\n", litmus_clock()); raw_spin_lock_irqsave(&state->local_queues.ready_lock, flags); now = litmus_clock(); if (is_sporadic(tsk) && is_tardy(tsk, now)) { /* This sporadic task was gone for a "long" time and woke up past * its deadline. Give it a new budget by triggering a job * release. */ release_at(tsk, now); } /* This check is required to avoid races with tasks that resume before * the scheduler "noticed" that it resumed. That is, the wake up may * race with the call to schedule(). */ if (state->scheduled != tsk) { demo_requeue(tsk, state); if (edf_preemption_needed(&state->local_queues, state->scheduled)) { preempt_if_preemptable(state->scheduled, state->cpu); } } raw_spin_unlock_irqrestore(&state->local_queues.ready_lock, flags); }
static struct task_struct* pfair_schedule(struct task_struct * prev) { struct pfair_state* state = &__get_cpu_var(pfair_state); int blocks; struct task_struct* next = NULL; raw_spin_lock(&pfair_lock); blocks = is_realtime(prev) && !is_running(prev); if (state->local && safe_to_schedule(state->local, state->cpu)) next = state->local; if (prev != next) { tsk_rt(prev)->scheduled_on = NO_CPU; if (next) tsk_rt(next)->scheduled_on = state->cpu; } raw_spin_unlock(&pfair_lock); if (next) TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n", tsk_pfair(next)->release, pfair_time, litmus_clock()); else if (is_realtime(prev)) TRACE("Becomes idle at %lu (%llu)\n", pfair_time, litmus_clock()); return next; }
static void pfair_task_wake_up(struct task_struct *t) { unsigned long flags; lt_t now; TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n", litmus_clock(), cur_release(t), pfair_time); raw_spin_lock_irqsave(&pfair_lock, flags); /* It is a little unclear how to deal with Pfair * tasks that block for a while and then wake. For now, * if a task blocks and wakes before its next job release, * then it may resume if it is currently linked somewhere * (as if it never blocked at all). Otherwise, we have a * new sporadic job release. */ if (tsk_pfair(t)->sporadic_release) { now = litmus_clock(); release_at(t, now); prepare_release(t, time2quanta(now, CEIL)); sched_trace_task_release(t); /* FIXME: race with pfair_time advancing */ pfair_add_release(t); tsk_pfair(t)->sporadic_release = 0; } check_preempt(t); raw_spin_unlock_irqrestore(&pfair_lock, flags); TRACE_TASK(t, "wake up done at %llu\n", litmus_clock()); }
void sobliv_on_blocked(struct task_struct* t) { if (bt_flag_is_set(t, BTF_IS_TOP_M)) { /* there is a fraction of time where we're double-counting the * time tracked by the rq and suspension time. * TODO: Do this recording closer to suspension time. */ tsk_rt(t)->budget.suspend_timestamp = litmus_clock(); if (!tsk_rt(t)->budget.timer.armed) { /* budget exhaustion timer fired as t was waking up, so budget * routine thought t was running. We need to re-trigger the budget * exhastion routine via timer. Schedulers do not call * job_completion() when a task blocks, even if t's budget has been * exhausted. Unfortunately, we cannot rerun the exhaustion routine * here due to spinlock ordering issues. Just re-arm the timer with * the exhausted time, re-running the timer routine immediately once * interrupts have been re-enabled. */ /* clear the exhausted flag so handle will re-run. this will not * trigger another exhaustion signal since signals are controled by * BTF_SIG_BUDGET_SENT. */ bt_flag_clear(t, BTF_BUDGET_EXHAUSTED); if (likely(!bt_flag_is_set(t, BTF_WAITING_FOR_RELEASE))) { TRACE_TASK(t, "budget timer not armed. " "Raced with exhaustion-resched? Re-arming.\n"); arm_enforcement_timer(t, 1); } else { TRACE_TASK(t, "not arming timer because task is waiting " "for release.\n"); } } } }
static void boost_priority(struct task_struct* t) { unsigned long flags; psnedf_domain_t* pedf = task_pedf(t); lt_t now; raw_readyq_lock_irqsave(&pedf->slock, flags); now = litmus_clock(); TRACE_TASK(t, "priority boosted at %llu\n", now); tsk_rt(t)->priority_boosted = 1; tsk_rt(t)->boost_start_time = now; if (pedf->scheduled != t) { /* holder may be queued: first stop queue changes */ raw_spin_lock(&pedf->domain.release_lock); if (is_queued(t) && /* If it is queued, then we need to re-order. */ bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) && /* If we bubbled to the top, then we need to check for preemptions. */ edf_preemption_needed(&pedf->domain, pedf->scheduled)) preempt(pedf); raw_spin_unlock(&pedf->domain.release_lock); } /* else: nothing to do since the job is not queued while scheduled */ raw_readyq_unlock_irqrestore(&pedf->slock, flags); }
/* Prepare a task for running in RT mode */ static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled) { rt_domain_t* edf = task_edf(t); psnedf_domain_t* pedf = task_pedf(t); unsigned long flags; TRACE_TASK(t, "psn edf: task new, cpu = %d\n", t->rt_param.task_params.cpu); /* setup job parameters */ release_at(t, litmus_clock()); /* The task should be running in the queue, otherwise signal * code will try to wake it up with fatal consequences. */ raw_readyq_lock_irqsave(&pedf->slock, flags); if (is_scheduled) { /* there shouldn't be anything else scheduled at the time */ BUG_ON(pedf->scheduled); pedf->scheduled = t; } else { /* !is_scheduled means it is not scheduled right now, but it * does not mean that it is suspended. If it is not suspended, * it still needs to be requeued. If it is suspended, there is * nothing that we need to do as it will be handled by the * wake_up() handler. */ if (is_running(t)) { requeue(t, edf); /* maybe we have to reschedule */ psnedf_preempt_check(pedf); } } raw_readyq_unlock_irqrestore(&pedf->slock, flags); }
static void unboost_priority(struct task_struct* t) { unsigned long flags; psnedf_domain_t* pedf = task_pedf(t); lt_t now; raw_readyq_lock_irqsave(&pedf->slock, flags); now = litmus_clock(); /* assumption: this only happens when the job is scheduled */ BUG_ON(pedf->scheduled != t); TRACE_TASK(t, "priority restored at %llu\n", now); /* priority boosted jobs must be scheduled */ BUG_ON(pedf->scheduled != t); tsk_rt(t)->priority_boosted = 0; tsk_rt(t)->boost_start_time = 0; /* check if this changes anything */ if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) preempt(pedf); raw_readyq_unlock_irqrestore(&pedf->slock, flags); }
inline static void arm_enforcement_timer(struct task_struct* t, int force) { struct enforcement_timer* et; lt_t when_to_fire, remaining_budget; lt_t now; unsigned long flags; BUG_ON(!t); BUG_ON(!is_realtime(t)); et = &tsk_rt(t)->budget.timer; if (et->armed) { TRACE_TASK(t, "timer already armed!\n"); return; } if (!force) { if ( (!budget_enforced(t) || (budget_enforced(t) && bt_flag_is_set(t, BTF_BUDGET_EXHAUSTED))) && (!budget_signalled(t) || (budget_signalled(t) && bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)))) { TRACE_TASK(t, "trying to arm timer when budget " "has already been exhausted.\n"); return; } } TRACE_TASK(t, "arming enforcement timer.\n"); /* __hrtimer_start_range_ns() cancels the timer * anyway, so we don't have to check whether it is still armed */ raw_spin_lock_irqsave(&et->lock, flags); if (et->armed) { TRACE_TASK(t, "timer already armed (race)!\n"); goto out; } now = litmus_clock(); remaining_budget = budget_remaining(t); when_to_fire = now + remaining_budget; TRACE_TASK(t, "budget remaining: %ld, when_to_fire: %ld\n", remaining_budget, when_to_fire); __hrtimer_start_range_ns(&et->timer, ns_to_ktime(when_to_fire), 0 /* delta */, HRTIMER_MODE_ABS_PINNED, /* TODO: need to use non-pinned? */ 0 /* no wakeup */); et->armed = 1; out: raw_spin_unlock_irqrestore(&et->lock, flags); }
static void psnedf_task_block(struct task_struct *t) { /* only running tasks can block, thus t is in no queue */ TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); BUG_ON(!is_realtime(t)); BUG_ON(is_queued(t)); }
static void psnedf_task_wake_up(struct task_struct *task) { unsigned long flags; psnedf_domain_t* pedf = task_pedf(task); rt_domain_t* edf = task_edf(task); lt_t now; TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); raw_readyq_lock_irqsave(&pedf->slock, flags); set_task_state(task, TASK_RUNNING); BUG_ON(is_queued(task)); now = litmus_clock(); if (is_sporadic(task) && is_tardy(task, now) #ifdef CONFIG_LITMUS_LOCKING /* We need to take suspensions because of semaphores into * account! If a job resumes after being suspended due to acquiring * a semaphore, it should never be treated as a new job release. */ && !is_priority_boosted(task) #endif ) { /* new sporadic release */ release_at(task, now); sched_trace_task_release(task); } budget_state_machine(task,on_wakeup); /* Only add to ready queue if it is not the currently-scheduled * task. This could be the case if a task was woken up concurrently * on a remote CPU before the executing CPU got around to actually * de-scheduling the task, i.e., wake_up() raced with schedule() * and won. */ if (pedf->scheduled != task) { requeue(task, edf); psnedf_preempt_check(pedf); } raw_readyq_unlock_irqrestore(&pedf->slock, flags); TRACE_TASK(task, "wake up done\n"); }
static void schedule_next_quantum(quanta_t time) { int cpu; /* called with interrupts disabled */ PTRACE("--- Q %lu at %llu PRE-SPIN\n", time, litmus_clock()); raw_spin_lock(&pfair_lock); PTRACE("<<< Q %lu at %llu\n", time, litmus_clock()); sched_trace_quantum_boundary(); advance_subtasks(time); poll_releases(time); schedule_subtasks(time); for (cpu = 0; cpu < num_online_cpus(); cpu++) if (pstate[cpu]->linked) PTRACE_TASK(pstate[cpu]->linked, " linked on %d.\n", cpu); else PTRACE("(null) linked on %d.\n", cpu); /* We are done. Advance time. */ mb(); for (cpu = 0; cpu < num_online_cpus(); cpu++) { if (pstate[cpu]->local_tick != pstate[cpu]->cur_tick) { TRACE("BAD Quantum not acked on %d " "(l:%lu c:%lu p:%lu)\n", cpu, pstate[cpu]->local_tick, pstate[cpu]->cur_tick, pfair_time); pstate[cpu]->missed_quanta++; } pstate[cpu]->cur_tick = time; } PTRACE(">>> Q %lu at %llu\n", time, litmus_clock()); raw_spin_unlock(&pfair_lock); }
/* Add the task `tsk` to the appropriate queue. Assumes the caller holds the ready lock. */ static void demo_requeue(struct task_struct *tsk, struct demo_cpu_state *cpu_state) { if (is_released(tsk, litmus_clock())) { /* Uses __add_ready() instead of add_ready() because we already * hold the ready lock. */ __add_ready(&cpu_state->local_queues, tsk); } else { /* Uses add_release() because we DON'T have the release lock. */ add_release(&cpu_state->local_queues, tsk); } }
static void requeue(struct task_struct* t, rt_domain_t *edf) { if (t->state != TASK_RUNNING) TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); tsk_rt(t)->completed = 0; if (is_early_releasing(t) || is_released(t, litmus_clock())) __add_ready(edf, t); else add_release(edf, t); /* it has got to wait */ }
/* pfair_tick - this function is called for every local timer * interrupt. */ static void pfair_tick(struct task_struct* t) { struct pfair_state* state = &__get_cpu_var(pfair_state); quanta_t time, cur; int retry = 10; do { cur = current_quantum(state); PTRACE("q %lu at %llu\n", cur, litmus_clock()); /* Attempt to advance time. First CPU to get here * will prepare the next quantum. */ time = cmpxchg(&pfair_time, cur - 1, /* expected */ cur /* next */ ); if (time == cur - 1) { /* exchange succeeded */ wait_for_quantum(cur - 1, state); schedule_next_quantum(cur); retry = 0; } else if (time_before(time, cur - 1)) { /* the whole system missed a tick !? */ catchup_quanta(time, cur, state); retry--; } else if (time_after(time, cur)) { /* our timer lagging behind!? */ TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur); retry--; } else { /* Some other CPU already started scheduling * this quantum. Let it do its job and then update. */ retry = 0; } } while (retry); /* Spin locally until time advances. */ wait_for_quantum(cur, state); /* copy assignment */ /* FIXME: what if we race with a future update? Corrupted state? */ state->local = state->linked; /* signal that we are done */ mb(); state->local_tick = state->cur_tick; if (state->local != current && (is_realtime(current) || is_present(state->local))) set_tsk_need_resched(current); }
void simple_io_on_wakeup(struct task_struct* t) { /* we're waking up from an io-based suspension */ if (tsk_rt(t)->budget.suspend_timestamp) { lt_t suspend_cost = litmus_clock() - tsk_rt(t)->budget.suspend_timestamp; tsk_rt(t)->budget.suspend_timestamp = 0; TRACE_TASK(t, "budget consumed while io-suspended: %llu\n", suspend_cost); get_exec_time(t) += suspend_cost; } else { TRACE_TASK(t, "waking from non-io blocking\n"); } }
void sobliv_on_exit_top_m(struct task_struct* t) { if (budget_precisely_tracked(t)) { if (tsk_rt(t)->budget.timer.armed) { if (!is_running(t)) { /* the time at which we started draining budget while * suspended is recorded in evt_timestamp. evt_timestamp * was set either when 't' exited the top-m while suspended * or when 't' blocked. */ lt_t suspend_cost; BUG_ON(!tsk_rt(t)->budget.suspend_timestamp); suspend_cost = litmus_clock() - tsk_rt(t)->budget.suspend_timestamp; TRACE_TASK(t, "budget consumed while suspended: %llu\n", suspend_cost); get_exec_time(t) += suspend_cost; /* timer should have fired before now */ if (get_exec_time(t) + 1000000/10 > get_exec_cost(t)) { TRACE_TASK(t, "budget overrun while suspended by over 1/10 " "millisecond! timer should have already fired!\n"); WARN_ON(1); } } TRACE_TASK(t, "stops draining budget\n"); /* the callback will handle it if it is executing */ if (!hrtimer_callback_running(&tsk_rt(t)->budget.timer.timer)) { /* TODO: record a timestamp if the task isn't running */ cancel_enforcement_timer(t); } else { TRACE_TASK(t, "within callback context. skipping operation.\n"); } } else { TRACE_TASK(t, "was not draining budget\n"); } } }
void simple_io_on_blocked(struct task_struct* t) { /* hiding is turned on by locking protocols, so if there isn't any hiding, then we're blocking for some other reason. assume it's I/O. */ int for_io = 0; #ifdef CONFIG_LITMUS_NESTED_LOCKING for_io |= !tsk_rt(t)->blocked_lock; #endif #ifdef CONFIG_REALTIME_AUX_TASKS for_io |= tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks; #endif #ifdef CONFIG_LITMUS_NVIDIA for_io |= tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu; #endif /* we drain budget for io-based suspensions */ if (for_io) { /* there is a fraction of time where we're double-counting the * time tracked by the rq and suspension time. * TODO: Do this recording closer to suspension time. */ tsk_rt(t)->budget.suspend_timestamp = litmus_clock(); TRACE_TASK(t, "blocking for I/O.\n"); if (!tsk_rt(t)->budget.timer.armed) { bt_flag_clear(t, BTF_BUDGET_EXHAUSTED); if (likely(!bt_flag_is_set(t, BTF_WAITING_FOR_RELEASE))) { TRACE_TASK(t, "budget timer not armed. " "Raced with exhaustion-resched? Re-arming.\n"); arm_enforcement_timer(t, 1); } else { TRACE_TASK(t, "not arming timer because task is waiting " "for release.\n"); } } } else { TRACE_TASK(t, "blocking for litmus lock. stop draining.\n"); simple_on_blocked(t); } }
feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr) { lt_t now = litmus_clock(); lt_t *when = (lt_t*) when_ptr; unsigned int seq_no; int cpu = raw_smp_processor_id(); struct timestamp *ts; seq_no = fetch_and_inc((int *) &ts_seq_no); if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { ts->event = event; ts->timestamp = now - *when; ts->seq_no = seq_no; ts->cpu = cpu; ts->task_type = TSK_RT; __save_irq_flags(ts); ft_buffer_finish_write(trace_ts_buf, ts); } }
void sobliv_on_wakeup(struct task_struct* t) { if (bt_flag_is_set(t, BTF_IS_TOP_M)) { /* we're waking up while in top-m. record the time spent * suspended while draining in exec_cost. suspend_timestamp was * either set when we entered top-m while asleep, or when we * blocked. */ if (tsk_rt(t)->budget.suspend_timestamp) { lt_t suspend_cost = litmus_clock() - tsk_rt(t)->budget.suspend_timestamp; tsk_rt(t)->budget.suspend_timestamp = 0; TRACE_TASK(t, "budget consumed while suspended: %llu\n", suspend_cost); get_exec_time(t) += suspend_cost; } else { WARN_ON(!bt_flag_is_set(t, BTF_WAITING_FOR_RELEASE)); } } }
static enum hrtimer_restart __on_timeout(struct hrtimer *timer) { enum hrtimer_restart restart = HRTIMER_NORESTART; unsigned long flags; struct budget_tracker* bt = container_of( container_of(timer, struct enforcement_timer, timer), struct budget_tracker, timer); struct task_struct* t = container_of( container_of(bt, struct rt_param, budget), struct task_struct, rt_param); TRACE_TASK(t, "budget timer interrupt fired at time %lu\n", litmus_clock()); raw_spin_lock_irqsave(&bt->timer.lock, flags); tsk_rt(t)->budget.timer.armed = 0; raw_spin_unlock_irqrestore(&bt->timer.lock, flags); if (unlikely(bt_flag_is_set(t, BTF_WAITING_FOR_RELEASE))) { TRACE_TASK(t, "spurious exhastion while waiting for release. dropping.\n"); goto out; } restart = bt->ops->on_exhausted(t,!IN_SCHEDULE); raw_spin_lock_irqsave(&bt->timer.lock, flags); tsk_rt(t)->budget.timer.armed = (restart == HRTIMER_RESTART); raw_spin_unlock_irqrestore(&bt->timer.lock, flags); out: return restart; }
void sobliv_on_enter_top_m(struct task_struct* t) { if (!bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) { if (tsk_rt(t)->budget.timer.armed) TRACE_TASK(t, "budget timer already armed.\n"); else { /* if we're blocked, then record the time at which we started measuring */ if (!is_running(t)) tsk_rt(t)->budget.suspend_timestamp = litmus_clock(); /* the callback will handle it if it is executing */ if (!hrtimer_callback_running(&tsk_rt(t)->budget.timer.timer)) { arm_enforcement_timer(t, 0); } else { TRACE_TASK(t, "within callback context. deferring timer arm.\n"); } } } }
static quanta_t current_quantum(struct pfair_state* state) { lt_t t = litmus_clock() - state->offset; return time2quanta(t, FLOOR); }
static void pfair_task_block(struct task_struct *t) { BUG_ON(!is_realtime(t)); TRACE_TASK(t, "blocks at %llu, state:%d\n", litmus_clock(), t->state); }
static struct task_struct* psnedf_schedule(struct task_struct * prev) { psnedf_domain_t* pedf = local_pedf; rt_domain_t* edf = &pedf->domain; struct task_struct* next; int out_of_time, sleep, preempt, np, exists, blocks, resched; raw_readyq_lock(&pedf->slock); /* sanity checking * differently from gedf, when a task exits (dead) * pedf->schedule may be null and prev _is_ realtime */ BUG_ON(pedf->scheduled && pedf->scheduled != prev); BUG_ON(pedf->scheduled && !is_realtime(prev)); /* (0) Determine state */ exists = pedf->scheduled != NULL; blocks = exists && !is_running(pedf->scheduled); out_of_time = exists && budget_enforced(pedf->scheduled) && bt_flag_is_set(pedf->scheduled, BTF_BUDGET_EXHAUSTED); np = exists && is_np(pedf->scheduled); sleep = exists && is_completed(pedf->scheduled); preempt = edf_preemption_needed(edf, prev); /* If we need to preempt do so. * The following checks set resched to 1 in case of special * circumstances. */ resched = preempt; /* Do budget stuff */ if (blocks) budget_state_machine(prev,on_blocked); else if (sleep) budget_state_machine(prev,on_sleep); else if (preempt) budget_state_machine(prev,on_preempt); /* If a task blocks we have no choice but to reschedule. */ if (blocks) resched = 1; /* Request a sys_exit_np() call if we would like to preempt but cannot. * Multiple calls to request_exit_np() don't hurt. */ if (np && (out_of_time || preempt || sleep)) request_exit_np(pedf->scheduled); /* Any task that is preemptable and either exhausts its execution * budget or wants to sleep completes. We may have to reschedule after * this. */ if (!np && (out_of_time || sleep) && !blocks) { job_completion(pedf->scheduled, !sleep); resched = 1; } /* The final scheduling decision. Do we need to switch for some reason? * Switch if we are in RT mode and have no task or if we need to * resched. */ next = NULL; if ((!np || blocks) && (resched || !exists)) { /* When preempting a task that does not block, then * re-insert it into either the ready queue or the * release queue (if it completed). requeue() picks * the appropriate queue. */ if (pedf->scheduled && !blocks) requeue(pedf->scheduled, edf); next = __take_ready(edf); } else /* Only override Linux scheduler if we have a real-time task * scheduled that needs to continue. */ if (exists) next = prev; if (next) { TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); } else { TRACE("becoming idle at %llu\n", litmus_clock()); } pedf->scheduled = next; sched_state_task_picked(); raw_readyq_unlock(&pedf->slock); return next; }