void __trace_pv_page_fault(unsigned long addr, unsigned error_code) { unsigned long eip = guest_cpu_user_regs()->eip; if ( is_pv_32bit_vcpu(current) ) { struct __packed { u32 eip, addr, error_code; } d; d.eip = eip; d.addr = addr; d.error_code = error_code; __trace_var(TRC_PV_PAGE_FAULT, 1, sizeof(d), &d); } else { struct __packed { unsigned long eip, addr; u32 error_code; } d; unsigned event; d.eip = eip; d.addr = addr; d.error_code = error_code; event = TRC_PV_PAGE_FAULT; event |= TRC_64_FLAG; __trace_var(event, 1, sizeof(d), &d); } }
void __trace_hypercall_entry(void) { struct cpu_user_regs *regs = guest_cpu_user_regs(); unsigned long args[6]; if ( is_pv_32bit_vcpu(current) ) { args[0] = regs->ebx; args[1] = regs->ecx; args[2] = regs->edx; args[3] = regs->esi; args[4] = regs->edi; args[5] = regs->ebp; } else { args[0] = regs->rdi; args[1] = regs->rsi; args[2] = regs->rdx; args[3] = regs->r10; args[4] = regs->r8; args[5] = regs->r9; } __trace_hypercall(TRC_PV_HYPERCALL_V2, regs->eax, args); }
void __trace_trap_one_addr(unsigned event, unsigned long va) { if ( is_pv_32bit_vcpu(current) ) { u32 d = va; __trace_var(event, 1, sizeof(d), &d); } else { event |= TRC_64_FLAG; __trace_var(event, 1, sizeof(va), &va); } }
void __trace_ptwr_emulation(unsigned long addr, l1_pgentry_t npte) { unsigned long eip = guest_cpu_user_regs()->eip; /* We have a couple of different modes to worry about: * - 32-on-32: 32-bit pte, 32-bit virtual addresses * - pae-on-pae, pae-on-64: 64-bit pte, 32-bit virtual addresses * - 64-on-64: 64-bit pte, 64-bit virtual addresses * pae-on-64 is the only one that requires extra code; in all other * cases, "unsigned long" is the size of a guest virtual address. */ if ( is_pv_32bit_vcpu(current) ) { struct __packed { l1_pgentry_t pte; u32 addr, eip; } d; d.addr = addr; d.eip = eip; d.pte = npte; __trace_var(TRC_PV_PTWR_EMULATION_PAE, 1, sizeof(d), &d); } else { struct { l1_pgentry_t pte; unsigned long addr, eip; } d; unsigned event; d.addr = addr; d.eip = eip; d.pte = npte; event = TRC_PV_PTWR_EMULATION; event |= TRC_64_FLAG; __trace_var(event, 1/*tsc*/, sizeof(d), &d); } }
void __trace_pv_trap(int trapnr, unsigned long eip, int use_error_code, unsigned error_code) { if ( is_pv_32bit_vcpu(current) ) { struct __packed { unsigned eip:32, trapnr:15, use_error_code:1, error_code:16; } d; d.eip = eip; d.trapnr = trapnr; d.error_code = error_code; d.use_error_code=!!use_error_code; __trace_var(TRC_PV_TRAP, 1, sizeof(d), &d); } else { struct __packed { unsigned long eip; unsigned trapnr:15, use_error_code:1, error_code:16; } d; unsigned event; d.eip = eip; d.trapnr = trapnr; d.error_code = error_code; d.use_error_code=!!use_error_code; event = TRC_PV_TRAP; event |= TRC_64_FLAG; __trace_var(event, 1, sizeof(d), &d); } }
enum mc_disposition arch_do_multicall_call(struct mc_state *state) { struct vcpu *curr = current; unsigned long op; if ( !is_pv_32bit_vcpu(curr) ) { struct multicall_entry *call = &state->call; op = call->op; if ( (op < ARRAY_SIZE(pv_hypercall_table)) && pv_hypercall_table[op].native ) call->result = pv_hypercall_table[op].native( call->args[0], call->args[1], call->args[2], call->args[3], call->args[4], call->args[5]); else call->result = -ENOSYS; } #ifdef CONFIG_COMPAT else { struct compat_multicall_entry *call = &state->compat_call; op = call->op; if ( (op < ARRAY_SIZE(pv_hypercall_table)) && pv_hypercall_table[op].compat ) call->result = pv_hypercall_table[op].compat( call->args[0], call->args[1], call->args[2], call->args[3], call->args[4], call->args[5]); else call->result = -ENOSYS; } #endif return unlikely(op == __HYPERVISOR_iret) ? mc_exit : likely(guest_kernel_mode(curr, guest_cpu_user_regs())) ? mc_continue : mc_preempt; }
void __trace_trap_two_addr(unsigned event, unsigned long va1, unsigned long va2) { if ( is_pv_32bit_vcpu(current) ) { struct __packed { u32 va1, va2; } d; d.va1=va1; d.va2=va2; __trace_var(event, 1, sizeof(d), &d); } else { struct __packed { unsigned long va1, va2; } d; d.va1=va1; d.va2=va2; event |= TRC_64_FLAG; __trace_var(event, 1, sizeof(d), &d); } }
void vpmu_do_interrupt(struct cpu_user_regs *regs) { struct vcpu *sampled = current, *sampling; struct vpmu_struct *vpmu; struct vlapic *vlapic; u32 vlapic_lvtpc; /* * dom0 will handle interrupt for special domains (e.g. idle domain) or, * in XENPMU_MODE_ALL, for everyone. */ if ( (vpmu_mode & XENPMU_MODE_ALL) || (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) { sampling = choose_hwdom_vcpu(); if ( !sampling ) return; } else sampling = sampled; vpmu = vcpu_vpmu(sampling); if ( !vpmu->arch_vpmu_ops ) return; /* PV(H) guest */ if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) ) { const struct cpu_user_regs *cur_regs; uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags; domid_t domid; if ( !vpmu->xenpmu_data ) return; if ( is_pvh_vcpu(sampling) && !(vpmu_mode & XENPMU_MODE_ALL) && !vpmu->arch_vpmu_ops->do_interrupt(regs) ) return; if ( vpmu_is_set(vpmu, VPMU_CACHED) ) return; /* PV guest will be reading PMU MSRs from xenpmu_data */ vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1); vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); if ( has_hvm_container_vcpu(sampled) ) *flags = 0; else *flags = PMU_SAMPLE_PV; if ( sampled == sampling ) domid = DOMID_SELF; else domid = sampled->domain->domain_id; /* Store appropriate registers in xenpmu_data */ /* FIXME: 32-bit PVH should go here as well */ if ( is_pv_32bit_vcpu(sampling) ) { /* * 32-bit dom0 cannot process Xen's addresses (which are 64 bit) * and therefore we treat it the same way as a non-privileged * PV 32-bit domain. */ struct compat_pmu_regs *cmp; cur_regs = guest_cpu_user_regs(); cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs; cmp->ip = cur_regs->rip; cmp->sp = cur_regs->rsp; cmp->flags = cur_regs->eflags; cmp->ss = cur_regs->ss; cmp->cs = cur_regs->cs; if ( (cmp->cs & 3) > 1 ) *flags |= PMU_SAMPLE_USER; } else { struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs; if ( (vpmu_mode & XENPMU_MODE_SELF) ) cur_regs = guest_cpu_user_regs(); else if ( !guest_mode(regs) && is_hardware_domain(sampling->domain) ) { cur_regs = regs; domid = DOMID_XEN; } else cur_regs = guest_cpu_user_regs(); r->ip = cur_regs->rip; r->sp = cur_regs->rsp; r->flags = cur_regs->eflags; if ( !has_hvm_container_vcpu(sampled) ) { r->ss = cur_regs->ss; r->cs = cur_regs->cs; if ( !(sampled->arch.flags & TF_kernel_mode) ) *flags |= PMU_SAMPLE_USER; } else { struct segment_register seg; hvm_get_segment_register(sampled, x86_seg_cs, &seg); r->cs = seg.sel; hvm_get_segment_register(sampled, x86_seg_ss, &seg); r->ss = seg.sel; r->cpl = seg.attr.fields.dpl; if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) *flags |= PMU_SAMPLE_REAL; } } vpmu->xenpmu_data->domain_id = domid; vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id; if ( is_hardware_domain(sampling->domain) ) vpmu->xenpmu_data->pcpu_id = smp_processor_id(); else vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id; vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); *flags |= PMU_CACHED; vpmu_set(vpmu, VPMU_CACHED); send_guest_vcpu_virq(sampling, VIRQ_XENPMU); return; } /* HVM guests */ vlapic = vcpu_vlapic(sampling); /* We don't support (yet) HVM dom0 */ ASSERT(sampling == sampled); if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) || !is_vlapic_lvtpc_enabled(vlapic) ) return; vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) ) { case APIC_MODE_FIXED: vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0); break; case APIC_MODE_NMI: sampling->nmi_pending = 1; break; } }
void pv_hypercall(struct cpu_user_regs *regs) { struct vcpu *curr = current; unsigned long eax; ASSERT(guest_kernel_mode(curr, regs)); eax = is_pv_32bit_vcpu(curr) ? regs->eax : regs->rax; BUILD_BUG_ON(ARRAY_SIZE(pv_hypercall_table) > ARRAY_SIZE(hypercall_args_table)); if ( (eax >= ARRAY_SIZE(pv_hypercall_table)) || !pv_hypercall_table[eax].native ) { regs->rax = -ENOSYS; return; } curr->hcall_preempted = false; if ( !is_pv_32bit_vcpu(curr) ) { unsigned long rdi = regs->rdi; unsigned long rsi = regs->rsi; unsigned long rdx = regs->rdx; unsigned long r10 = regs->r10; unsigned long r8 = regs->r8; unsigned long r9 = regs->r9; #ifndef NDEBUG /* Deliberately corrupt parameter regs not used by this hypercall. */ switch ( hypercall_args_table[eax].native ) { case 0: rdi = 0xdeadbeefdeadf00dUL; case 1: rsi = 0xdeadbeefdeadf00dUL; case 2: rdx = 0xdeadbeefdeadf00dUL; case 3: r10 = 0xdeadbeefdeadf00dUL; case 4: r8 = 0xdeadbeefdeadf00dUL; case 5: r9 = 0xdeadbeefdeadf00dUL; } #endif if ( unlikely(tb_init_done) ) { unsigned long args[6] = { rdi, rsi, rdx, r10, r8, r9 }; __trace_hypercall(TRC_PV_HYPERCALL_V2, eax, args); } regs->rax = pv_hypercall_table[eax].native(rdi, rsi, rdx, r10, r8, r9); #ifndef NDEBUG if ( !curr->hcall_preempted ) { /* Deliberately corrupt parameter regs used by this hypercall. */ switch ( hypercall_args_table[eax].native ) { case 6: regs->r9 = 0xdeadbeefdeadf00dUL; case 5: regs->r8 = 0xdeadbeefdeadf00dUL; case 4: regs->r10 = 0xdeadbeefdeadf00dUL; case 3: regs->rdx = 0xdeadbeefdeadf00dUL; case 2: regs->rsi = 0xdeadbeefdeadf00dUL; case 1: regs->rdi = 0xdeadbeefdeadf00dUL; } } #endif } else { unsigned int ebx = regs->ebx; unsigned int ecx = regs->ecx; unsigned int edx = regs->edx; unsigned int esi = regs->esi; unsigned int edi = regs->edi; unsigned int ebp = regs->ebp; #ifndef NDEBUG /* Deliberately corrupt parameter regs not used by this hypercall. */ switch ( hypercall_args_table[eax].compat ) { case 0: ebx = 0xdeadf00d; case 1: ecx = 0xdeadf00d; case 2: edx = 0xdeadf00d; case 3: esi = 0xdeadf00d; case 4: edi = 0xdeadf00d; case 5: ebp = 0xdeadf00d; } #endif if ( unlikely(tb_init_done) ) { unsigned long args[6] = { ebx, ecx, edx, esi, edi, ebp }; __trace_hypercall(TRC_PV_HYPERCALL_V2, eax, args); } curr->hcall_compat = true; regs->eax = pv_hypercall_table[eax].compat(ebx, ecx, edx, esi, edi, ebp); curr->hcall_compat = false; #ifndef NDEBUG if ( !curr->hcall_preempted ) { /* Deliberately corrupt parameter regs used by this hypercall. */ switch ( hypercall_args_table[eax].compat ) { case 6: regs->ebp = 0xdeadf00d; case 5: regs->edi = 0xdeadf00d; case 4: regs->esi = 0xdeadf00d; case 3: regs->edx = 0xdeadf00d; case 2: regs->ecx = 0xdeadf00d; case 1: regs->ebx = 0xdeadf00d; } } #endif } /* * PV guests use SYSCALL or INT $0x82 to make a hypercall, both of which * have trap semantics. If the hypercall has been preempted, rewind the * instruction pointer to reexecute the instruction. */ if ( curr->hcall_preempted ) regs->rip -= 2; perfc_incr(hypercalls); }