static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; unsigned long frames[pages]; int f; /* A GDT can be up to 64k in size, which corresponds to 8192 8-byte entries, or 16 4k pages.. */ BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { int level; pte_t *ptep = lookup_address(va, &level); unsigned long pfn, mfn; void *virt; BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); mfn = pfn_to_mfn(pfn); virt = __va(PFN_PHYS(pfn)); frames[f] = mfn; make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) BUG(); }
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); if (pte) make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables); return pte; }
static __cpuinit int cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; struct desc_struct *gdt; if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt == NULL) return -ENOMEM; gdt = get_cpu_gdt_table(cpu); ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.ds = __KERNEL_DS; ctxt->user_regs.es = __KERNEL_DS; ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); xen_copy_trap_info(ctxt->trap_ctxt); ctxt->ldt_ents = 0; BUG_ON((unsigned long)gdt & ~PAGE_MASK); make_lowmem_page_readonly(gdt); ctxt->gdt_frames[0] = virt_to_mfn(gdt); ctxt->gdt_ents = GDT_ENTRIES; ctxt->user_regs.cs = __KERNEL_CS; ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = idle->thread.sp0; #ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; ctxt->failsafe_callback_cs = __KERNEL_CS; #endif ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) BUG(); kfree(ctxt); return 0; }
static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; unsigned long frames[pages]; int f; /* * A GDT can be up to 64k in size, which corresponds to 8192 * 8-byte entries, or 16 4k pages.. */ BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { int level; pte_t *ptep; unsigned long pfn, mfn; void *virt; /* * The GDT is per-cpu and is in the percpu data area. * That can be virtually mapped, so we need to do a * page-walk to get the underlying MFN for the * hypercall. The page can also be in the kernel's * linear range, so we need to RO that mapping too. */ ptep = lookup_address(va, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); mfn = pfn_to_mfn(pfn); virt = __va(PFN_PHYS(pfn)); frames[f] = mfn; make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) BUG(); }
pgd_t *pgd_alloc(struct mm_struct *mm) { int i; pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); pgd_test_and_unpin(pgd); if (PTRS_PER_PMD == 1 || !pgd) return pgd; for (i = 0; i < USER_PTRS_PER_PGD; ++i) { pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); if (!pmd) goto out_oom; set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); } if (!HAVE_SHARED_KERNEL_PMD) { unsigned long flags; for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); if (!pmd) goto out_oom; set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); } spin_lock_irqsave(&pgd_lock, flags); for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { unsigned long v = (unsigned long)i << PGDIR_SHIFT; pgd_t *kpgd = pgd_offset_k(v); pud_t *kpud = pud_offset(kpgd, v); pmd_t *kpmd = pmd_offset(kpud, v); pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); memcpy(pmd, kpmd, PAGE_SIZE); make_lowmem_page_readonly( pmd, XENFEAT_writable_page_tables); } pgd_list_add(pgd); spin_unlock_irqrestore(&pgd_lock, flags); } return pgd; out_oom: for (i--; i >= 0; i--) kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); kmem_cache_free(pgd_cache, pgd); return NULL; }
void make_page_readonly(void *va, unsigned int feature) { pte_t *pte; int rc; if (xen_feature(feature)) return; pte = virt_to_ptep(va); rc = HYPERVISOR_update_va_mapping( (unsigned long)va, pte_wrprotect(*pte), 0); if (rc) /* fallback? */ xen_l1_entry_update(pte, pte_wrprotect(*pte)); if ((unsigned long)va >= (unsigned long)high_memory) { unsigned long pfn = pte_pfn(*pte); #ifdef CONFIG_HIGHMEM if (pfn >= highstart_pfn) kmap_flush_unused(); /* flush stale writable kmaps */ else #endif make_lowmem_page_readonly( phys_to_virt(pfn << PAGE_SHIFT), feature); } }
static int cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; struct desc_struct *gdt; unsigned long gdt_mfn; /* used to tell cpu_init() that it can proceed with initialization */ cpumask_set_cpu(cpu, cpu_callout_mask); if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt == NULL) return -ENOMEM; gdt = get_cpu_gdt_rw(cpu); #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.gs = __KERNEL_STACK_CANARY; #endif memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); /* * Bring up the CPU in cpu_bringup_and_idle() with the stack * pointing just below where pt_regs would be if it were a normal * kernel entry. */ ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS; ctxt->user_regs.ss = __KERNEL_DS; ctxt->user_regs.cs = __KERNEL_CS; ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle); xen_copy_trap_info(ctxt->trap_ctxt); ctxt->ldt_ents = 0; BUG_ON((unsigned long)gdt & ~PAGE_MASK); gdt_mfn = arbitrary_virt_to_mfn(gdt); make_lowmem_page_readonly(gdt); make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); ctxt->gdt_frames[0] = gdt_mfn; ctxt->gdt_ents = GDT_ENTRIES; /* * Set SS:SP that Xen will use when entering guest kernel mode * from guest user mode. Subsequent calls to load_sp0() can * change this value. */ ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = task_top_of_stack(idle); #ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; ctxt->failsafe_callback_cs = __KERNEL_CS; #else ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) BUG(); kfree(ctxt); return 0; }
static int __cpuinit cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; struct desc_struct *gdt; unsigned long gdt_mfn; if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt == NULL) return -ENOMEM; gdt = get_cpu_gdt_table(cpu); ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.gs = __KERNEL_STACK_CANARY; #else ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); if (xen_feature(XENFEAT_auto_translated_physmap) && xen_feature(XENFEAT_supervisor_mode_kernel)) { /* Note: PVH is not supported on x86_32. */ #ifdef CONFIG_X86_64 ctxt->user_regs.ds = __KERNEL_DS; ctxt->user_regs.es = 0; ctxt->user_regs.gs = 0; /* GUEST_GDTR_BASE and */ ctxt->u.pvh.gdtaddr = (unsigned long)gdt; /* GUEST_GDTR_LIMIT in the VMCS. */ ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1); ctxt->gs_base_user = (unsigned long) per_cpu(irq_stack_union.gs_base, cpu); #endif } else { ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS; xen_copy_trap_info(ctxt->trap_ctxt); ctxt->ldt_ents = 0; BUG_ON((unsigned long)gdt & ~PAGE_MASK); gdt_mfn = arbitrary_virt_to_mfn(gdt); make_lowmem_page_readonly(gdt); make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); ctxt->u.pv.gdt_frames[0] = gdt_mfn; ctxt->u.pv.gdt_ents = GDT_ENTRIES; ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = idle->thread.sp0; #ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; ctxt->failsafe_callback_cs = __KERNEL_CS; #endif ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; } ctxt->user_regs.cs = __KERNEL_CS; ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) BUG(); kfree(ctxt); return 0; }
void __init xen_start_kernel(void) { unsigned int i; struct xen_machphys_mapping mapping; unsigned long machine_to_phys_nr_ents; #ifdef CONFIG_X86_32 struct xen_platform_parameters pp; extern pte_t swapper_pg_fixmap[PTRS_PER_PTE]; unsigned long addr; #endif xen_setup_features(); if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { machine_to_phys_mapping = (unsigned long *)mapping.v_start; machine_to_phys_nr_ents = mapping.max_mfn + 1; } else machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) machine_to_phys_order++; if (!xen_feature(XENFEAT_auto_translated_physmap)) phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables)); reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE), __pa(xen_start_info->pt_base) + (xen_start_info->nr_pt_frames << PAGE_SHIFT), "Xen provided"); #ifdef CONFIG_X86_32 WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments)); init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base; if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) { hypervisor_virt_start = pp.virt_start; reserve_top_address(0UL - pp.virt_start); } BUG_ON(pte_index(hypervisor_virt_start)); /* Do an early initialization of the fixmap area */ make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables); addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); set_pmd(pmd_offset(pud_offset(swapper_pg_dir + pgd_index(addr), addr), addr), __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE)); #else check_efer(); xen_init_pt(); #endif #define __FIXADDR_TOP (-PAGE_SIZE) #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) #define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \ != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE))) FIX_BUG_ON(SHARED_INFO); FIX_BUG_ON(ISAMAP_BEGIN); FIX_BUG_ON(ISAMAP_END); #undef pmd_index #undef __FIXADDR_TOP /* Switch to the real shared_info page, and clear the dummy page. */ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); memset(empty_zero_page, 0, sizeof(empty_zero_page)); setup_vcpu_info(0); /* Set up mapping of lowest 1MB of physical memory. */ for (i = 0; i < NR_FIX_ISAMAPS; i++) if (is_initial_xendomain()) set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); else __set_fixmap(FIX_ISAMAP_BEGIN - i, virt_to_machine(empty_zero_page), PAGE_KERNEL_RO); }
static int cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; struct desc_struct *gdt; unsigned long gdt_mfn; /* used to tell cpu_init() that it can proceed with initialization */ cpumask_set_cpu(cpu, cpu_callout_mask); if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt == NULL) return -ENOMEM; gdt = get_cpu_gdt_rw(cpu); #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.gs = __KERNEL_STACK_CANARY; #endif memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS; ctxt->user_regs.ss = __KERNEL_DS; xen_copy_trap_info(ctxt->trap_ctxt); ctxt->ldt_ents = 0; BUG_ON((unsigned long)gdt & ~PAGE_MASK); gdt_mfn = arbitrary_virt_to_mfn(gdt); make_lowmem_page_readonly(gdt); make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); ctxt->gdt_frames[0] = gdt_mfn; ctxt->gdt_ents = GDT_ENTRIES; ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = idle->thread.sp0; #ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; ctxt->failsafe_callback_cs = __KERNEL_CS; #else ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; ctxt->user_regs.cs = __KERNEL_CS; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) BUG(); kfree(ctxt); return 0; }