static int xenoprof_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) { unsigned long ip = profile_pc(regs); int event = arg->pmd_eventid; struct vcpu *v = current; int mode = xenoprofile_get_mode(v, regs); // see pfm_do_interrupt_handler() in xen/arch/ia64/linux-xen/perfmon.c. // It always passes task as NULL. This is work around BUG_ON(task != NULL); arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; if (!allow_virq || !allow_ints) return 0; // Note that log event actually expect cpu_user_regs, cast back // appropriately when doing the backtrace implementation in ia64 xenoprof_log_event(v, regs, ip, mode, event); // send VIRQ_XENOPROF if (is_active(v->domain) && !xenoprof_is_xen_mode(v, regs) && !is_idle_vcpu(v)) send_guest_vcpu_virq(v, VIRQ_XENOPROF); return 0; }
/** * This function allocates scheduler-specific data for a VCPU * * @param ops Pointer to this instance of the scheduler structure * * @return Pointer to the allocated data */ static void * a653sched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) { a653sched_priv_t *sched_priv = SCHED_PRIV(ops); arinc653_vcpu_t *svc; unsigned int entry; unsigned long flags; /* * Allocate memory for the ARINC 653-specific scheduler data information * associated with the given VCPU (vc). */ svc = xmalloc(arinc653_vcpu_t); if ( svc == NULL ) return NULL; spin_lock_irqsave(&sched_priv->lock, flags); /* * Add every one of dom0's vcpus to the schedule, as long as there are * slots available. */ if ( vc->domain->domain_id == 0 ) { entry = sched_priv->num_schedule_entries; if ( entry < ARINC653_MAX_DOMAINS_PER_SCHEDULE ) { sched_priv->schedule[entry].dom_handle[0] = '\0'; sched_priv->schedule[entry].vcpu_id = vc->vcpu_id; sched_priv->schedule[entry].runtime = DEFAULT_TIMESLICE; sched_priv->schedule[entry].vc = vc; sched_priv->major_frame += DEFAULT_TIMESLICE; ++sched_priv->num_schedule_entries; } } /* * Initialize our ARINC 653 scheduler-specific information for the VCPU. * The VCPU starts "asleep." When Xen is ready for the VCPU to run, it * will call the vcpu_wake scheduler callback function and our scheduler * will mark the VCPU awake. */ svc->vc = vc; svc->awake = 0; if ( !is_idle_vcpu(vc) ) list_add(&svc->list, &SCHED_PRIV(ops)->vcpu_list); update_schedule_vcpus(ops); spin_unlock_irqrestore(&sched_priv->lock, flags); return svc; }
void startup_cpu_idle_loop(void) { struct vcpu *v = current; ASSERT(is_idle_vcpu(v)); cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask); cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask); /* Finally get off the boot stack. */ reset_stack_and_jump(idle_loop); }
void startup_cpu_idle_loop(void) { struct vcpu *v = current; ASSERT(is_idle_vcpu(v)); /* TODO cpumask_set_cpu(v->processor, v->domain->domain_dirty_cpumask); cpumask_set_cpu(v->processor, v->vcpu_dirty_cpumask); */ reset_stack_and_jump(idle_loop); }
int virt_timer_restore(struct vcpu *v) { ASSERT(!is_idle_vcpu(v)); stop_timer(&v->arch.virt_timer.timer); migrate_timer(&v->arch.virt_timer.timer, v->processor); migrate_timer(&v->arch.phys_timer.timer, v->processor); WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2); WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0); WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0); return 0; }
static void continue_new_vcpu(struct vcpu *prev) { schedule_tail(prev); if ( is_idle_vcpu(current) ) reset_stack_and_jump(idle_loop); else if ( is_32bit_domain(current->domain) ) /* check_wakeup_from_wait(); */ reset_stack_and_jump(return_to_new_vcpu32); else /* check_wakeup_from_wait(); */ reset_stack_and_jump(return_to_new_vcpu64); }
/** * This function frees scheduler-specific VCPU data * * @param ops Pointer to this instance of the scheduler structure */ static void a653sched_free_vdata(const struct scheduler *ops, void *priv) { arinc653_vcpu_t *av = priv; if (av == NULL) return; if ( !is_idle_vcpu(av->vc) ) list_del(&av->list); xfree(av); update_schedule_vcpus(ops); }
int virt_timer_save(struct vcpu *v) { ASSERT(!is_idle_vcpu(v)); v->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0); WRITE_SYSREG32(v->arch.virt_timer.ctl & ~CNTx_CTL_ENABLE, CNTV_CTL_EL0); v->arch.virt_timer.cval = READ_SYSREG64(CNTV_CVAL_EL0); if ( (v->arch.virt_timer.ctl & CNTx_CTL_ENABLE) && !(v->arch.virt_timer.ctl & CNTx_CTL_MASK)) { set_timer(&v->arch.virt_timer.timer, ticks_to_ns(v->arch.virt_timer.cval + v->domain->arch.virt_timer_base.offset - boot_count)); } return 0; }
void save_rest_processor_state(void) { if ( !is_idle_vcpu(current) ) unlazy_fpu(current); #if defined(CONFIG_X86_64) rdmsrl(MSR_CSTAR, saved_cstar); rdmsrl(MSR_LSTAR, saved_lstar); if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) { rdmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp); rdmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip); } #endif }
int vcpu_initialise(struct vcpu *v) { int rc = 0; BUILD_BUG_ON( sizeof(struct cpu_info) > STACK_SIZE ); v->arch.stack = alloc_xenheap_pages(STACK_ORDER, MEMF_node(vcpu_to_node(v))); if ( v->arch.stack == NULL ) return -ENOMEM; v->arch.cpu_info = (struct cpu_info *)(v->arch.stack + STACK_SIZE - sizeof(struct cpu_info)); memset(&v->arch.saved_context, 0, sizeof(v->arch.saved_context)); v->arch.saved_context.sp = (register_t)v->arch.cpu_info; v->arch.saved_context.pc = (register_t)continue_new_vcpu; /* Idle VCPUs don't need the rest of this setup */ if ( is_idle_vcpu(v) ) return rc; v->arch.sctlr = SCTLR_GUEST_INIT; /* * By default exposes an SMP system with AFF0 set to the VCPU ID * TODO: Handle multi-threading processor and cluster */ v->arch.vmpidr = MPIDR_SMP | (v->vcpu_id << MPIDR_AFF0_SHIFT); v->arch.actlr = READ_SYSREG32(ACTLR_EL1); processor_vcpu_initialise(v); if ( (rc = vcpu_vgic_init(v)) != 0 ) goto fail; if ( (rc = vcpu_vtimer_init(v)) != 0 ) goto fail; return rc; fail: vcpu_destroy(v); return rc; }
void restore_rest_processor_state(void) { struct vcpu *v = current; load_TR(); #if defined(CONFIG_X86_64) /* Recover syscall MSRs */ wrmsrl(MSR_LSTAR, saved_lstar); wrmsrl(MSR_CSTAR, saved_cstar); wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS); wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U); if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) { /* Recover sysenter MSRs */ wrmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp); wrmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip); wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0); } #else /* !defined(CONFIG_X86_64) */ if ( supervisor_mode_kernel && cpu_has_sep ) wrmsr(MSR_IA32_SYSENTER_ESP, &init_tss[smp_processor_id()].esp1, 0); #endif /* Maybe load the debug registers. */ BUG_ON(is_hvm_vcpu(v)); if ( !is_idle_vcpu(v) && unlikely(v->arch.guest_context.debugreg[7]) ) { write_debugreg(0, v->arch.guest_context.debugreg[0]); write_debugreg(1, v->arch.guest_context.debugreg[1]); write_debugreg(2, v->arch.guest_context.debugreg[2]); write_debugreg(3, v->arch.guest_context.debugreg[3]); write_debugreg(6, v->arch.guest_context.debugreg[6]); write_debugreg(7, v->arch.guest_context.debugreg[7]); } /* Reload FPU state on next FPU use. */ stts(); if (cpu_has_pat) wrmsrl(MSR_IA32_CR_PAT, host_pat); mtrr_ap_init(); mcheck_init(&boot_cpu_data); }
static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs) { /* * Edge-triggered interrupts can be used for the virtual timer. Even * if the timer output signal is masked in the context switch, the * GIC will keep track that of any interrupts raised while IRQS are * disabled. As soon as IRQs are re-enabled, the virtual interrupt * will be injected to Xen. * * If an IDLE vCPU was scheduled next then we should ignore the * interrupt. */ if ( unlikely(is_idle_vcpu(current)) ) return; current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0); WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0); vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq); }
int vcpu_initialise(struct vcpu *v) { int rc = 0; BUILD_BUG_ON( sizeof(struct cpu_info) > STACK_SIZE ); v->arch.stack = alloc_xenheap_pages(STACK_ORDER, MEMF_node(vcpu_to_node(v))); if ( v->arch.stack == NULL ) return -ENOMEM; v->arch.cpu_info = (struct cpu_info *)(v->arch.stack + STACK_SIZE - sizeof(struct cpu_info)); memset(&v->arch.saved_context, 0, sizeof(v->arch.saved_context)); v->arch.saved_context.sp = (register_t)v->arch.cpu_info; v->arch.saved_context.pc = (register_t)continue_new_vcpu; /* Idle VCPUs don't need the rest of this setup */ if ( is_idle_vcpu(v) ) return rc; v->arch.sctlr = SCTLR_GUEST_INIT; v->arch.vmpidr = MPIDR_SMP | vcpuid_to_vaffinity(v->vcpu_id); v->arch.actlr = READ_SYSREG32(ACTLR_EL1); processor_vcpu_initialise(v); if ( (rc = vcpu_vgic_init(v)) != 0 ) goto fail; if ( (rc = vcpu_vtimer_init(v)) != 0 ) goto fail; return rc; fail: vcpu_destroy(v); return rc; }
/* * This function is used by cpu_hotplug code from stop_machine context. * Hence we can avoid needing to take the */ void cpu_disable_scheduler(void) { struct domain *d; struct vcpu *v; unsigned int cpu = smp_processor_id(); for_each_domain ( d ) { for_each_vcpu ( d, v ) { if ( is_idle_vcpu(v) ) continue; if ( (cpus_weight(v->cpu_affinity) == 1) && cpu_isset(cpu, v->cpu_affinity) ) { printk("Breaking vcpu affinity for domain %d vcpu %d\n", v->domain->domain_id, v->vcpu_id); cpus_setall(v->cpu_affinity); } /* * Migrate single-shot timers to CPU0. A new cpu will automatically * be chosen when the timer is next re-set. */ if ( v->singleshot_timer.cpu == cpu ) migrate_timer(&v->singleshot_timer, 0); if ( v->processor == cpu ) { set_bit(_VPF_migrating, &v->pause_flags); vcpu_sleep_nosync(v); vcpu_migrate(v); } } } }
static inline void vcpu_urgent_count_update(struct vcpu *v) { if ( is_idle_vcpu(v) ) return; if ( unlikely(v->is_urgent) ) { if ( !test_bit(_VPF_blocked, &v->pause_flags) || !test_bit(v->vcpu_id, v->domain->poll_mask) ) { v->is_urgent = 0; atomic_dec(&per_cpu(schedule_data,v->processor).urgent_count); } } else { if ( unlikely(test_bit(_VPF_blocked, &v->pause_flags) && test_bit(v->vcpu_id, v->domain->poll_mask)) ) { v->is_urgent = 1; atomic_inc(&per_cpu(schedule_data,v->processor).urgent_count); } } }
static void ctxt_switch_from(struct vcpu *p) { /* When the idle VCPU is running, Xen will always stay in hypervisor * mode. Therefore we don't need to save the context of an idle VCPU. */ if ( is_idle_vcpu(p) ) return; p2m_save_state(p); /* CP 15 */ p->arch.csselr = READ_SYSREG(CSSELR_EL1); /* Control Registers */ p->arch.cpacr = READ_SYSREG(CPACR_EL1); p->arch.contextidr = READ_SYSREG(CONTEXTIDR_EL1); p->arch.tpidr_el0 = READ_SYSREG(TPIDR_EL0); p->arch.tpidrro_el0 = READ_SYSREG(TPIDRRO_EL0); p->arch.tpidr_el1 = READ_SYSREG(TPIDR_EL1); /* Arch timer */ p->arch.cntkctl = READ_SYSREG32(CNTKCTL_EL1); virt_timer_save(p); if ( is_32bit_domain(p->domain) && cpu_has_thumbee ) { p->arch.teecr = READ_SYSREG32(TEECR32_EL1); p->arch.teehbr = READ_SYSREG32(TEEHBR32_EL1); } #ifdef CONFIG_ARM_32 p->arch.joscr = READ_CP32(JOSCR); p->arch.jmcr = READ_CP32(JMCR); #endif isb(); /* MMU */ p->arch.vbar = READ_SYSREG(VBAR_EL1); p->arch.ttbcr = READ_SYSREG(TCR_EL1); p->arch.ttbr0 = READ_SYSREG64(TTBR0_EL1); p->arch.ttbr1 = READ_SYSREG64(TTBR1_EL1); if ( is_32bit_domain(p->domain) ) p->arch.dacr = READ_SYSREG(DACR32_EL2); p->arch.par = READ_SYSREG64(PAR_EL1); #if defined(CONFIG_ARM_32) p->arch.mair0 = READ_CP32(MAIR0); p->arch.mair1 = READ_CP32(MAIR1); p->arch.amair0 = READ_CP32(AMAIR0); p->arch.amair1 = READ_CP32(AMAIR1); #else p->arch.mair = READ_SYSREG64(MAIR_EL1); p->arch.amair = READ_SYSREG64(AMAIR_EL1); #endif /* Fault Status */ #if defined(CONFIG_ARM_32) p->arch.dfar = READ_CP32(DFAR); p->arch.ifar = READ_CP32(IFAR); p->arch.dfsr = READ_CP32(DFSR); #elif defined(CONFIG_ARM_64) p->arch.far = READ_SYSREG64(FAR_EL1); p->arch.esr = READ_SYSREG64(ESR_EL1); #endif if ( is_32bit_domain(p->domain) ) p->arch.ifsr = READ_SYSREG(IFSR32_EL2); p->arch.afsr0 = READ_SYSREG(AFSR0_EL1); p->arch.afsr1 = READ_SYSREG(AFSR1_EL1); /* XXX MPU */ /* VFP */ vfp_save_state(p); /* VGIC */ gic_save_state(p); isb(); }
/** * Xen scheduler callback function to select a VCPU to run. * This is the main scheduler routine. * * @param ops Pointer to this instance of the scheduler structure * @param now Current time * * @return Address of the VCPU structure scheduled to be run next * Amount of time to execute the returned VCPU * Flag for whether the VCPU was migrated */ static struct task_slice a653sched_do_schedule( const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled) { struct task_slice ret; /* hold the chosen domain */ struct vcpu * new_task = NULL; static unsigned int sched_index = 0; static s_time_t next_switch_time; a653sched_priv_t *sched_priv = SCHED_PRIV(ops); const unsigned int cpu = smp_processor_id(); unsigned long flags; spin_lock_irqsave(&sched_priv->lock, flags); if ( sched_priv->num_schedule_entries < 1 ) sched_priv->next_major_frame = now + DEFAULT_TIMESLICE; else if ( now >= sched_priv->next_major_frame ) { /* time to enter a new major frame * the first time this function is called, this will be true */ /* start with the first domain in the schedule */ sched_index = 0; sched_priv->next_major_frame = now + sched_priv->major_frame; next_switch_time = now + sched_priv->schedule[0].runtime; } else { while ( (now >= next_switch_time) && (sched_index < sched_priv->num_schedule_entries) ) { /* time to switch to the next domain in this major frame */ sched_index++; next_switch_time += sched_priv->schedule[sched_index].runtime; } } /* * If we exhausted the domains in the schedule and still have time left * in the major frame then switch next at the next major frame. */ if ( sched_index >= sched_priv->num_schedule_entries ) next_switch_time = sched_priv->next_major_frame; /* * If there are more domains to run in the current major frame, set * new_task equal to the address of next domain's VCPU structure. * Otherwise, set new_task equal to the address of the idle task's VCPU * structure. */ new_task = (sched_index < sched_priv->num_schedule_entries) ? sched_priv->schedule[sched_index].vc : IDLETASK(cpu); /* Check to see if the new task can be run (awake & runnable). */ if ( !((new_task != NULL) && (AVCPU(new_task) != NULL) && AVCPU(new_task)->awake && vcpu_runnable(new_task)) ) new_task = IDLETASK(cpu); BUG_ON(new_task == NULL); /* * Check to make sure we did not miss a major frame. * This is a good test for robust partitioning. */ BUG_ON(now >= sched_priv->next_major_frame); spin_unlock_irqrestore(&sched_priv->lock, flags); /* Tasklet work (which runs in idle VCPU context) overrides all else. */ if ( tasklet_work_scheduled ) new_task = IDLETASK(cpu); /* Running this task would result in a migration */ if ( !is_idle_vcpu(new_task) && (new_task->processor != cpu) ) new_task = IDLETASK(cpu); /* * Return the amount of time the next domain has to run and the address * of the selected task's VCPU structure. */ ret.time = next_switch_time - now; ret.task = new_task; ret.migrated = 0; BUG_ON(ret.time <= 0); return ret; }
static void ctxt_switch_to(struct vcpu *n) { /* When the idle VCPU is running, Xen will always stay in hypervisor * mode. Therefore we don't need to restore the context of an idle VCPU. */ if ( is_idle_vcpu(n) ) return; p2m_restore_state(n); WRITE_SYSREG32(n->domain->arch.vpidr, VPIDR_EL2); WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2); /* VGIC */ gic_restore_state(n); /* VFP */ vfp_restore_state(n); /* XXX MPU */ /* Fault Status */ #if defined(CONFIG_ARM_32) WRITE_CP32(n->arch.dfar, DFAR); WRITE_CP32(n->arch.ifar, IFAR); WRITE_CP32(n->arch.dfsr, DFSR); #elif defined(CONFIG_ARM_64) WRITE_SYSREG64(n->arch.far, FAR_EL1); WRITE_SYSREG64(n->arch.esr, ESR_EL1); #endif if ( is_32bit_domain(n->domain) ) WRITE_SYSREG(n->arch.ifsr, IFSR32_EL2); WRITE_SYSREG(n->arch.afsr0, AFSR0_EL1); WRITE_SYSREG(n->arch.afsr1, AFSR1_EL1); /* MMU */ WRITE_SYSREG(n->arch.vbar, VBAR_EL1); WRITE_SYSREG(n->arch.ttbcr, TCR_EL1); WRITE_SYSREG64(n->arch.ttbr0, TTBR0_EL1); WRITE_SYSREG64(n->arch.ttbr1, TTBR1_EL1); if ( is_32bit_domain(n->domain) ) WRITE_SYSREG(n->arch.dacr, DACR32_EL2); WRITE_SYSREG64(n->arch.par, PAR_EL1); #if defined(CONFIG_ARM_32) WRITE_CP32(n->arch.mair0, MAIR0); WRITE_CP32(n->arch.mair1, MAIR1); WRITE_CP32(n->arch.amair0, AMAIR0); WRITE_CP32(n->arch.amair1, AMAIR1); #elif defined(CONFIG_ARM_64) WRITE_SYSREG64(n->arch.mair, MAIR_EL1); WRITE_SYSREG64(n->arch.amair, AMAIR_EL1); #endif isb(); /* Control Registers */ WRITE_SYSREG(n->arch.cpacr, CPACR_EL1); WRITE_SYSREG(n->arch.contextidr, CONTEXTIDR_EL1); WRITE_SYSREG(n->arch.tpidr_el0, TPIDR_EL0); WRITE_SYSREG(n->arch.tpidrro_el0, TPIDRRO_EL0); WRITE_SYSREG(n->arch.tpidr_el1, TPIDR_EL1); if ( is_32bit_domain(n->domain) && cpu_has_thumbee ) { WRITE_SYSREG32(n->arch.teecr, TEECR32_EL1); WRITE_SYSREG32(n->arch.teehbr, TEEHBR32_EL1); } #ifdef CONFIG_ARM_32 WRITE_CP32(n->arch.joscr, JOSCR); WRITE_CP32(n->arch.jmcr, JMCR); #endif isb(); /* CP 15 */ WRITE_SYSREG(n->arch.csselr, CSSELR_EL1); isb(); /* This is could trigger an hardware interrupt from the virtual * timer. The interrupt needs to be injected into the guest. */ WRITE_SYSREG32(n->arch.cntkctl, CNTKCTL_EL1); virt_timer_restore(n); }