void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime = { .sum_exec_runtime = p->se.sum_exec_runtime, }; task_cputime(p, &cputime.utime, &cputime.stime); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } /* * Must be called with siglock held. */ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime; thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static unsigned long long vtime_delta(struct task_struct *tsk) { unsigned long long clock; clock = local_clock(); if (clock < tsk->vtime_snap) return 0; return clock - tsk->vtime_snap; }
/* * Accumulate raw cputime values of dead tasks (sig->[us]time) and live * tasks (sum on group iteration) belonging to @tsk's group. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; cputime_t utime, stime; struct task_struct *t; times->utime = sig->utime; times->stime = sig->stime; times->sum_exec_runtime = sig->sum_sched_runtime; rcu_read_lock(); /* make sure we can trust tsk->thread_group list */ if (!likely(pid_alive(tsk))) goto out; t = tsk; do { task_cputime(tsk, &utime, &stime); times->utime += utime; times->stime += stime; times->sum_exec_runtime += task_sched_runtime(t); } while_each_thread(tsk, t); out: rcu_read_unlock(); }
static inline unsigned long long virt_ticks(struct task_struct *p) { cputime_t utime, stime; task_cputime(p, &utime, &stime); return cputime_to_expires(utime); }
static inline u64 virt_ticks(struct task_struct *p) { u64 utime, stime; task_cputime(p, &utime, &stime); return utime; }
void ParallelizerInternal::evaluate(int nfdir, int nadir){ // Let the first call (which may contain memory allocations) be serial when using OpenMP if(mode_== SERIAL || (first_call_ && mode_ == OPENMP)){ for(int task=0; task<funcs_.size(); ++task){ evaluateTask(task,nfdir,nadir); } } else if(mode_== OPENMP) { #ifdef WITH_OPENMP // Allocate some lists to collect statistics std::vector<int> task_allocation(funcs_.size()); std::vector<int> task_order(funcs_.size()); std::vector<double> task_cputime(funcs_.size()); std::vector<double> task_starttime(funcs_.size()); std::vector<double> task_endtime(funcs_.size()); // A private counter int cnt=0; #pragma omp parallel for firstprivate(cnt) for(int task=0; task<funcs_.size(); ++task) { if (gather_stats_ && task==0) { stats_["max_threads"] = omp_get_max_threads(); stats_["num_threads"] = omp_get_num_threads(); } task_allocation[task] = omp_get_thread_num(); task_starttime[task] = omp_get_wtime(); // Do the actual work evaluateTask(task,nfdir,nadir); task_endtime[task] = omp_get_wtime(); task_cputime[task] = task_endtime[task] - task_starttime[task]; task_order[task] = cnt++; } if (gather_stats_) { stats_["task_allocation"] = task_allocation; stats_["task_order"] = task_order; stats_["task_cputime"] = task_cputime; } // Measure all times relative to the earliest start_time. double start = *std::min_element(task_starttime.begin(),task_starttime.end()); for (int task=0; task<funcs_.size(); ++task) { task_starttime[task] = task_starttime[task] - start; task_endtime[task] = task_endtime[task] - start; } if (gather_stats_) { stats_["task_starttime"] = task_starttime; stats_["task_endtime"] = task_endtime; } #endif //WITH_OPENMP #ifndef WITH_OPENMP casadi_error("ParallelizerInternal::evaluate: OPENMP support was not available during CasADi compilation"); #endif //WITH_OPENMP } else if(mode_ == MPI){ casadi_error("ParallelizerInternal::evaluate: MPI not implemented"); } first_call_ = false; }
void posix_cpu_timers_exit_group(struct task_struct *tsk) { struct signal_struct *const sig = tsk->signal; cputime_t utime, stime; task_cputime(tsk, &utime, &stime); cleanup_timers(tsk->signal->cpu_timers, utime + sig->utime, stime + sig->stime, tsk->se.sum_exec_runtime + sig->sum_sched_runtime); }
/* * These are both called with the siglock held, when the current thread * is being reaped. When the final (leader) thread in the group is reaped, * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit. */ void posix_cpu_timers_exit(struct task_struct *tsk) { cputime_t utime, stime; add_device_randomness((const void*) &tsk->se.sum_exec_runtime, sizeof(unsigned long long)); task_cputime(tsk, &utime, &stime); cleanup_timers(tsk->cpu_timers, utime, stime, tsk->se.sum_exec_runtime); }
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { cputime_t utime, stime, stimescaled, utimescaled; unsigned long long t2, t3; unsigned long flags, t1; s64 tmp; task_cputime(tsk, &utime, &stime); tmp = (s64)d->cpu_run_real_total; tmp += cputime_to_nsecs(utime + stime); d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; task_cputime_scaled(tsk, &utimescaled, &stimescaled); tmp = (s64)d->cpu_scaled_run_real_total; tmp += cputime_to_nsecs(utimescaled + stimescaled); d->cpu_scaled_run_real_total = (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; /* * No locking available for sched_info (and too expensive to add one) * Mitigate by taking snapshot of values */ t1 = tsk->sched_info.pcount; t2 = tsk->sched_info.run_delay; t3 = tsk->se.sum_exec_runtime; d->cpu_count += t1; tmp = (s64)d->cpu_delay_total + t2; d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; tmp = (s64)d->cpu_run_virtual_total + t3; d->cpu_run_virtual_total = (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ spin_lock_irqsave(&tsk->delays->lock, flags); tmp = d->blkio_delay_total + tsk->delays->blkio_delay; d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; tmp = d->freepages_delay_total + tsk->delays->freepages_delay; d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; }
static inline void check_for_tasks(int cpu) { struct task_struct *p; cputime_t utime, stime; write_lock_irq(&tasklist_lock); for_each_process(p) { task_cputime(p, &utime, &stime); if (task_cpu(p) == cpu && p->state == TASK_RUNNING && (utime || stime)) pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n", p->comm, task_pid_nr(p), cpu, p->state, p->flags); } write_unlock_irq(&tasklist_lock); }
void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime = { .sum_exec_runtime = p->se.sum_exec_runtime, }; task_cputime(p, &cputime.utime, &cputime.stime); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } /* * Must be called with siglock held. */ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime; thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); }
/* * Accumulate raw cputime values of dead tasks (sig->[us]time) and live * tasks (sum on group iteration) belonging to @tsk's group. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; u64 utime, stime; struct task_struct *t; unsigned int seq, nextseq; unsigned long flags; /* * Update current task runtime to account pending time since last * scheduler action or thread_group_cputime() call. This thread group * might have other running tasks on different CPUs, but updating * their runtime can affect syscall performance, so we skip account * those pending times and rely only on values updated on tick or * other scheduler action. */ if (same_thread_group(current, tsk)) (void) task_sched_runtime(current); rcu_read_lock(); /* Attempt a lockless read on the first round. */ nextseq = 0; do { seq = nextseq; flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); times->utime = sig->utime; times->stime = sig->stime; times->sum_exec_runtime = sig->sum_sched_runtime; for_each_thread(tsk, t) { task_cputime(t, &utime, &stime); times->utime += utime; times->stime += stime; times->sum_exec_runtime += read_sum_exec_runtime(t); } /* If lockless access failed, take the lock. */ nextseq = 1; } while (need_seqretry(&sig->stats_lock, seq));
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { s64 tmp; unsigned long t1; unsigned long long t2, t3; unsigned long flags; struct timespec ts; cputime_t utime, stime, stimescaled, utimescaled; /* Though tsk->delays accessed later, early exit avoids * unnecessary returning of other data */ if (!tsk->delays) goto done; tmp = (s64)d->cpu_run_real_total; task_cputime(tsk, &utime, &stime); cputime_to_timespec(utime + stime, &ts); tmp += timespec_to_ns(&ts); d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; tmp = (s64)d->cpu_scaled_run_real_total; task_cputime_scaled(tsk, &utimescaled, &stimescaled); cputime_to_timespec(utimescaled + stimescaled, &ts); tmp += timespec_to_ns(&ts); d->cpu_scaled_run_real_total = (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; /* * No locking available for sched_info (and too expensive to add one) * Mitigate by taking snapshot of values */ t1 = tsk->sched_info.pcount; t2 = tsk->sched_info.run_delay; t3 = tsk->se.sum_exec_runtime; d->cpu_count += t1; tmp = (s64)d->cpu_delay_total + t2; d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; tmp = (s64)d->cpu_run_virtual_total + t3; d->cpu_run_virtual_total = (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ spin_lock_irqsave(&tsk->delays->lock, flags); tmp = d->blkio_delay_total + tsk->delays->blkio_delay; d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; tmp = d->freepages_delay_total + tsk->delays->freepages_delay; d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; spin_unlock_irqrestore(&tsk->delays->lock, flags); done: return 0; }