int switch_compat(struct domain *d) { struct vcpu *v; int rc; if ( is_hvm_domain(d) || d->tot_pages != 0 ) return -EACCES; if ( is_pv_32bit_domain(d) ) return 0; d->arch.has_32bit_shinfo = 1; d->arch.is_32bit_pv = 1; for_each_vcpu( d, v ) { if ( (rc = setup_compat_arg_xlat(v)) || (rc = setup_compat_l4(v)) ) goto undo_and_fail; } domain_set_alloc_bitsize(d); recalculate_cpuid_policy(d); d->arch.x87_fip_width = 4; return 0; undo_and_fail: d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0; for_each_vcpu( d, v ) { free_compat_arg_xlat(v); release_compat_l4(v); }
int __init construct_dom0( struct domain *d, unsigned long _image_start, unsigned long image_len, unsigned long _initrd_start, unsigned long initrd_len, char *cmdline) { int i, rc, compatible, compat32, order, machine; struct cpu_user_regs *regs; unsigned long pfn, mfn; unsigned long nr_pages; unsigned long nr_pt_pages; unsigned long alloc_spfn; unsigned long alloc_epfn; unsigned long count; struct page_info *page = NULL; start_info_t *si; struct vcpu *v = d->vcpu[0]; unsigned long long value; #if defined(__i386__) char *image_start = (char *)_image_start; /* use lowmem mappings */ char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ #elif defined(__x86_64__) char *image_start = __va(_image_start); char *initrd_start = __va(_initrd_start); #endif #if CONFIG_PAGING_LEVELS >= 4 l4_pgentry_t *l4tab = NULL, *l4start = NULL; #endif l3_pgentry_t *l3tab = NULL, *l3start = NULL; l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; /* * This fully describes the memory layout of the initial domain. All * *_start address are page-aligned, except v_start (and v_end) which are * superpage-aligned. */ struct elf_binary elf; struct elf_dom_parms parms; unsigned long vkern_start; unsigned long vkern_end; unsigned long vinitrd_start; unsigned long vinitrd_end; unsigned long vphysmap_start; unsigned long vphysmap_end; unsigned long vstartinfo_start; unsigned long vstartinfo_end; unsigned long vstack_start; unsigned long vstack_end; unsigned long vpt_start; unsigned long vpt_end; unsigned long v_start; unsigned long v_end; /* Machine address of next candidate page-table page. */ unsigned long mpt_alloc; /* Sanity! */ BUG_ON(d->domain_id != 0); BUG_ON(d->vcpu[0] == NULL); BUG_ON(v->is_initialised); printk("*** LOADING DOMAIN 0 ***\n"); d->max_pages = ~0U; nr_pages = compute_dom0_nr_pages(); if ( (rc = elf_init(&elf, image_start, image_len)) != 0 ) return rc; #ifdef VERBOSE elf_set_verbose(&elf); #endif elf_parse_binary(&elf); if ( (rc = elf_xen_parse(&elf, &parms)) != 0 ) return rc; /* compatibility check */ compatible = 0; compat32 = 0; machine = elf_uval(&elf, elf.ehdr, e_machine); switch (CONFIG_PAGING_LEVELS) { case 3: /* x86_32p */ if (parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; printk(" Xen kernel: 32-bit, PAE, lsb\n"); if (elf_32bit(&elf) && parms.pae && machine == EM_386) compatible = 1; break; case 4: /* x86_64 */ printk(" Xen kernel: 64-bit, lsb, compat32\n"); if (elf_32bit(&elf) && parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; if (elf_32bit(&elf) && parms.pae && machine == EM_386) { compat32 = 1; compatible = 1; } if (elf_64bit(&elf) && machine == EM_X86_64) compatible = 1; break; } printk(" Dom0 kernel: %s%s, %s, paddr 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf_64bit(&elf) ? "64-bit" : "32-bit", parms.pae ? ", PAE" : "", elf_msb(&elf) ? "msb" : "lsb", elf.pstart, elf.pend); if ( elf.bsd_symtab_pstart ) printk(" Dom0 symbol map 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf.bsd_symtab_pstart, elf.bsd_symtab_pend); if ( !compatible ) { printk("Mismatch between Xen and DOM0 kernel\n"); return -EINVAL; } #if defined(__x86_64__) if ( compat32 ) { l1_pgentry_t gdt_l1e; d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; if ( nr_pages != (unsigned int)nr_pages ) nr_pages = UINT_MAX; /* * Map compatibility Xen segments into every VCPU's GDT. See * arch_domain_create() for further comments. */ gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR); for ( i = 0; i < MAX_VIRT_CPUS; i++ ) d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; flush_tlb_one_local(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE); } #endif if ( parms.pae == PAEKERN_extended_cr3 ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); if ( (parms.virt_hv_start_low != UNSET_ADDR) && elf_32bit(&elf) ) { unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; value = (parms.virt_hv_start_low + mask) & ~mask; BUG_ON(!is_pv_32bit_domain(d)); #if defined(__i386__) if ( value > HYPERVISOR_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); #else if ( value > __HYPERVISOR_COMPAT_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int, m2p_compat_vstart, value); #endif }
int __init construct_dom0( struct domain *d, const module_t *image, unsigned long image_headroom, module_t *initrd, void *(*bootstrap_map)(const module_t *), char *cmdline) { int i, cpu, rc, compatible, compat32, order, machine; struct cpu_user_regs *regs; unsigned long pfn, mfn; unsigned long nr_pages; unsigned long nr_pt_pages; unsigned long alloc_spfn; unsigned long alloc_epfn; unsigned long initrd_pfn = -1, initrd_mfn = 0; unsigned long count; struct page_info *page = NULL; start_info_t *si; struct vcpu *v = d->vcpu[0]; unsigned long long value; char *image_base = bootstrap_map(image); unsigned long image_len = image->mod_end; char *image_start = image_base + image_headroom; unsigned long initrd_len = initrd ? initrd->mod_end : 0; #if CONFIG_PAGING_LEVELS < 4 module_t mpt; void *mpt_ptr; #else l4_pgentry_t *l4tab = NULL, *l4start = NULL; #endif l3_pgentry_t *l3tab = NULL, *l3start = NULL; l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; /* * This fully describes the memory layout of the initial domain. All * *_start address are page-aligned, except v_start (and v_end) which are * superpage-aligned. */ struct elf_binary elf; struct elf_dom_parms parms; unsigned long vkern_start; unsigned long vkern_end; unsigned long vinitrd_start; unsigned long vinitrd_end; unsigned long vphysmap_start; unsigned long vphysmap_end; unsigned long vstartinfo_start; unsigned long vstartinfo_end; unsigned long vstack_start; unsigned long vstack_end; unsigned long vpt_start; unsigned long vpt_end; unsigned long v_start; unsigned long v_end; /* Machine address of next candidate page-table page. */ paddr_t mpt_alloc; /* Sanity! */ BUG_ON(d->domain_id != 0); BUG_ON(d->vcpu[0] == NULL); BUG_ON(v->is_initialised); printk("*** LOADING DOMAIN 0 ***\n"); d->max_pages = ~0U; if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 ) return rc; if ( (rc = elf_init(&elf, image_start, image_len)) != 0 ) return rc; #ifdef VERBOSE elf_set_verbose(&elf); #endif elf_parse_binary(&elf); if ( (rc = elf_xen_parse(&elf, &parms)) != 0 ) return rc; /* compatibility check */ compatible = compat32 = 0; machine = elf_uval(&elf, elf.ehdr, e_machine); switch (CONFIG_PAGING_LEVELS) { case 3: /* x86_32p */ if (parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; printk(" Xen kernel: 32-bit, PAE, lsb\n"); if (elf_32bit(&elf) && parms.pae && machine == EM_386) compatible = 1; break; case 4: /* x86_64 */ printk(" Xen kernel: 64-bit, lsb, compat32\n"); if (elf_32bit(&elf) && parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; if (elf_32bit(&elf) && parms.pae && machine == EM_386) { compat32 = 1; compatible = 1; } if (elf_64bit(&elf) && machine == EM_X86_64) compatible = 1; break; } printk(" Dom0 kernel: %s%s, %s, paddr 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf_64bit(&elf) ? "64-bit" : "32-bit", parms.pae ? ", PAE" : "", elf_msb(&elf) ? "msb" : "lsb", elf.pstart, elf.pend); if ( elf.bsd_symtab_pstart ) printk(" Dom0 symbol map 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf.bsd_symtab_pstart, elf.bsd_symtab_pend); if ( !compatible ) { printk("Mismatch between Xen and DOM0 kernel\n"); return -EINVAL; } if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE && !test_bit(XENFEAT_dom0, parms.f_supported) ) { printk("Kernel does not support Dom0 operation\n"); return -EINVAL; } #if defined(__x86_64__) if ( compat32 ) { d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; if ( setup_compat_arg_xlat(v) != 0 ) BUG(); } #endif nr_pages = compute_dom0_nr_pages(d, &parms, initrd_len); if ( parms.pae == PAEKERN_extended_cr3 ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); if ( (parms.virt_hv_start_low != UNSET_ADDR) && elf_32bit(&elf) ) { unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; value = (parms.virt_hv_start_low + mask) & ~mask; BUG_ON(!is_pv_32bit_domain(d)); #if defined(__i386__) if ( value > HYPERVISOR_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); #else if ( value > __HYPERVISOR_COMPAT_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int, m2p_compat_vstart, value); #endif }
static unsigned long __init compute_dom0_nr_pages( struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len) { unsigned long avail = avail_domheap_pages() + initial_images_nrpages(); unsigned long nr_pages = dom0_nrpages; unsigned long min_pages = dom0_min_nrpages; unsigned long max_pages = dom0_max_nrpages; /* Reserve memory for further dom0 vcpu-struct allocations... */ avail -= (opt_dom0_max_vcpus - 1UL) << get_order_from_bytes(sizeof(struct vcpu)); /* ...and compat_l4's, if needed. */ if ( is_pv_32on64_domain(d) ) avail -= opt_dom0_max_vcpus - 1; /* Reserve memory for iommu_dom0_init() (rough estimate). */ if ( iommu_enabled ) { unsigned int s; for ( s = 9; s < BITS_PER_LONG; s += 9 ) avail -= max_pdx >> s; } /* * If domain 0 allocation isn't specified, reserve 1/16th of available * memory for things like DMA buffers. This reservation is clamped to * a maximum of 128MB. */ if ( nr_pages == 0 ) nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT)); /* Negative memory specification means "all memory - specified amount". */ if ( (long)nr_pages < 0 ) nr_pages += avail; if ( (long)min_pages < 0 ) min_pages += avail; if ( (long)max_pages < 0 ) max_pages += avail; /* Clamp dom0 memory according to min/max limits and available memory. */ nr_pages = max(nr_pages, min_pages); nr_pages = min(nr_pages, max_pages); nr_pages = min(nr_pages, avail); #ifdef __x86_64__ if ( (parms->p2m_base == UNSET_ADDR) && (dom0_nrpages <= 0) && ((dom0_min_nrpages <= 0) || (nr_pages > min_pages)) ) { /* * Legacy Linux kernels (i.e. such without a XEN_ELFNOTE_INIT_P2M * note) require that there is enough virtual space beyond the initial * allocation to set up their initial page tables. This space is * roughly the same size as the p2m table, so make sure the initial * allocation doesn't consume more than about half the space that's * available between params.virt_base and the address space end. */ unsigned long vstart, vend, end; size_t sizeof_long = is_pv_32bit_domain(d) ? sizeof(int) : sizeof(long); vstart = parms->virt_base; vend = round_pgup(parms->virt_kend); if ( !parms->elf_notes[XEN_ELFNOTE_MOD_START_PFN].data.num ) vend += round_pgup(initrd_len); end = vend + nr_pages * sizeof_long; if ( end > vstart ) end += end - vstart; if ( end <= vstart || (sizeof_long < sizeof(end) && end > (1UL << (8 * sizeof_long))) ) { end = sizeof_long >= sizeof(end) ? 0 : 1UL << (8 * sizeof_long); nr_pages = (end - vend) / (2 * sizeof_long); if ( dom0_min_nrpages > 0 && nr_pages < min_pages ) nr_pages = min_pages; printk("Dom0 memory clipped to %lu pages\n", nr_pages); } } #endif d->max_pages = min_t(unsigned long, max_pages, UINT_MAX); return nr_pages; }
unsigned long __init dom0_compute_nr_pages( struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len) { nodeid_t node; unsigned long avail = 0, nr_pages, min_pages, max_pages; bool_t need_paging; for_each_node_mask ( node, dom0_nodes ) avail += avail_domheap_pages_region(node, 0, 0) + initial_images_nrpages(node); /* Reserve memory for further dom0 vcpu-struct allocations... */ avail -= (d->max_vcpus - 1UL) << get_order_from_bytes(sizeof(struct vcpu)); /* ...and compat_l4's, if needed. */ if ( is_pv_32bit_domain(d) ) avail -= d->max_vcpus - 1; /* Reserve memory for iommu_dom0_init() (rough estimate). */ if ( iommu_enabled ) { unsigned int s; for ( s = 9; s < BITS_PER_LONG; s += 9 ) avail -= max_pdx >> s; } need_paging = is_hvm_domain(d) && (!iommu_hap_pt_share || !paging_mode_hap(d)); for ( ; ; need_paging = 0 ) { nr_pages = dom0_nrpages; min_pages = dom0_min_nrpages; max_pages = dom0_max_nrpages; /* * If allocation isn't specified, reserve 1/16th of available memory * for things like DMA buffers. This reservation is clamped to a * maximum of 128MB. */ if ( nr_pages == 0 ) nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT)); /* Negative specification means "all memory - specified amount". */ if ( (long)nr_pages < 0 ) nr_pages += avail; if ( (long)min_pages < 0 ) min_pages += avail; if ( (long)max_pages < 0 ) max_pages += avail; /* Clamp according to min/max limits and available memory. */ nr_pages = max(nr_pages, min_pages); nr_pages = min(nr_pages, max_pages); nr_pages = min(nr_pages, avail); if ( !need_paging ) break; /* Reserve memory for shadow or HAP. */ avail -= dom0_paging_pages(d, nr_pages); } if ( is_pv_domain(d) && (parms->p2m_base == UNSET_ADDR) && (dom0_nrpages <= 0) && ((dom0_min_nrpages <= 0) || (nr_pages > min_pages)) ) { /* * Legacy Linux kernels (i.e. such without a XEN_ELFNOTE_INIT_P2M * note) require that there is enough virtual space beyond the initial * allocation to set up their initial page tables. This space is * roughly the same size as the p2m table, so make sure the initial * allocation doesn't consume more than about half the space that's * available between params.virt_base and the address space end. */ unsigned long vstart, vend, end; size_t sizeof_long = is_pv_32bit_domain(d) ? sizeof(int) : sizeof(long); vstart = parms->virt_base; vend = round_pgup(parms->virt_kend); if ( !parms->unmapped_initrd ) vend += round_pgup(initrd_len); end = vend + nr_pages * sizeof_long; if ( end > vstart ) end += end - vstart; if ( end <= vstart || (sizeof_long < sizeof(end) && end > (1UL << (8 * sizeof_long))) ) { end = sizeof_long >= sizeof(end) ? 0 : 1UL << (8 * sizeof_long); nr_pages = (end - vend) / (2 * sizeof_long); if ( dom0_min_nrpages > 0 && nr_pages < min_pages ) nr_pages = min_pages; printk("Dom0 memory clipped to %lu pages\n", nr_pages); } } d->max_pages = min_t(unsigned long, max_pages, UINT_MAX); return nr_pages; }