int __init construct_dom0( struct domain *d, unsigned long _image_start, unsigned long image_len, unsigned long _initrd_start, unsigned long initrd_len, char *cmdline) { int i, rc, compatible, compat32, order, machine; struct cpu_user_regs *regs; unsigned long pfn, mfn; unsigned long nr_pages; unsigned long nr_pt_pages; unsigned long alloc_spfn; unsigned long alloc_epfn; unsigned long count; struct page_info *page = NULL; start_info_t *si; struct vcpu *v = d->vcpu[0]; unsigned long long value; #if defined(__i386__) char *image_start = (char *)_image_start; /* use lowmem mappings */ char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ #elif defined(__x86_64__) char *image_start = __va(_image_start); char *initrd_start = __va(_initrd_start); #endif #if CONFIG_PAGING_LEVELS >= 4 l4_pgentry_t *l4tab = NULL, *l4start = NULL; #endif l3_pgentry_t *l3tab = NULL, *l3start = NULL; l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; /* * This fully describes the memory layout of the initial domain. All * *_start address are page-aligned, except v_start (and v_end) which are * superpage-aligned. */ struct elf_binary elf; struct elf_dom_parms parms; unsigned long vkern_start; unsigned long vkern_end; unsigned long vinitrd_start; unsigned long vinitrd_end; unsigned long vphysmap_start; unsigned long vphysmap_end; unsigned long vstartinfo_start; unsigned long vstartinfo_end; unsigned long vstack_start; unsigned long vstack_end; unsigned long vpt_start; unsigned long vpt_end; unsigned long v_start; unsigned long v_end; /* Machine address of next candidate page-table page. */ unsigned long mpt_alloc; /* Sanity! */ BUG_ON(d->domain_id != 0); BUG_ON(d->vcpu[0] == NULL); BUG_ON(v->is_initialised); printk("*** LOADING DOMAIN 0 ***\n"); d->max_pages = ~0U; nr_pages = compute_dom0_nr_pages(); if ( (rc = elf_init(&elf, image_start, image_len)) != 0 ) return rc; #ifdef VERBOSE elf_set_verbose(&elf); #endif elf_parse_binary(&elf); if ( (rc = elf_xen_parse(&elf, &parms)) != 0 ) return rc; /* compatibility check */ compatible = 0; compat32 = 0; machine = elf_uval(&elf, elf.ehdr, e_machine); switch (CONFIG_PAGING_LEVELS) { case 3: /* x86_32p */ if (parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; printk(" Xen kernel: 32-bit, PAE, lsb\n"); if (elf_32bit(&elf) && parms.pae && machine == EM_386) compatible = 1; break; case 4: /* x86_64 */ printk(" Xen kernel: 64-bit, lsb, compat32\n"); if (elf_32bit(&elf) && parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; if (elf_32bit(&elf) && parms.pae && machine == EM_386) { compat32 = 1; compatible = 1; } if (elf_64bit(&elf) && machine == EM_X86_64) compatible = 1; break; } printk(" Dom0 kernel: %s%s, %s, paddr 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf_64bit(&elf) ? "64-bit" : "32-bit", parms.pae ? ", PAE" : "", elf_msb(&elf) ? "msb" : "lsb", elf.pstart, elf.pend); if ( elf.bsd_symtab_pstart ) printk(" Dom0 symbol map 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf.bsd_symtab_pstart, elf.bsd_symtab_pend); if ( !compatible ) { printk("Mismatch between Xen and DOM0 kernel\n"); return -EINVAL; } #if defined(__x86_64__) if ( compat32 ) { l1_pgentry_t gdt_l1e; d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; if ( nr_pages != (unsigned int)nr_pages ) nr_pages = UINT_MAX; /* * Map compatibility Xen segments into every VCPU's GDT. See * arch_domain_create() for further comments. */ gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR); for ( i = 0; i < MAX_VIRT_CPUS; i++ ) d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) + FIRST_RESERVED_GDT_PAGE)] = gdt_l1e; flush_tlb_one_local(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE); } #endif if ( parms.pae == PAEKERN_extended_cr3 ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); if ( (parms.virt_hv_start_low != UNSET_ADDR) && elf_32bit(&elf) ) { unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; value = (parms.virt_hv_start_low + mask) & ~mask; BUG_ON(!is_pv_32bit_domain(d)); #if defined(__i386__) if ( value > HYPERVISOR_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); #else if ( value > __HYPERVISOR_COMPAT_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int, m2p_compat_vstart, value); #endif }
uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr) { uint64_t elf_round = (elf_64bit(elf) ? 8 : 4) - 1; return (addr + elf_round) & ~elf_round; }
static int xc_dom_load_elf_symtab(struct xc_dom_image *dom, struct elf_binary *elf, int load) { struct elf_binary syms; const elf_shdr *shdr, *shdr2; xen_vaddr_t symtab, maxaddr; char *hdr; size_t size; int h, count, type, i, tables = 0; if ( elf_swap(elf) ) { xc_dom_printf("%s: non-native byte order, bsd symtab not supported\n", __FUNCTION__); return 0; } if ( load ) { if ( !dom->bsd_symtab_start ) return 0; size = dom->kernel_seg.vend - dom->bsd_symtab_start; hdr = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start); *(int *)hdr = size - sizeof(int); } else { size = sizeof(int) + elf_size(elf, elf->ehdr) + elf_shdr_count(elf) * elf_size(elf, shdr); hdr = xc_dom_malloc(dom, size); if ( hdr == NULL ) return 0; dom->bsd_symtab_start = elf_round_up(&syms, dom->kernel_seg.vend); } memcpy(hdr + sizeof(int), elf->image, elf_size(elf, elf->ehdr)); memcpy(hdr + sizeof(int) + elf_size(elf, elf->ehdr), elf->image + elf_uval(elf, elf->ehdr, e_shoff), elf_shdr_count(elf) * elf_size(elf, shdr)); if ( elf_64bit(elf) ) { Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(int)); ehdr->e_phoff = 0; ehdr->e_phentsize = 0; ehdr->e_phnum = 0; ehdr->e_shoff = elf_size(elf, elf->ehdr); ehdr->e_shstrndx = SHN_UNDEF; } else { Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(int)); ehdr->e_phoff = 0; ehdr->e_phentsize = 0; ehdr->e_phnum = 0; ehdr->e_shoff = elf_size(elf, elf->ehdr); ehdr->e_shstrndx = SHN_UNDEF; } if ( elf_init(&syms, hdr + sizeof(int), size - sizeof(int)) ) return -1; if ( xc_dom_logfile ) elf_set_logfile(&syms, xc_dom_logfile, 1); symtab = dom->bsd_symtab_start + sizeof(int); maxaddr = elf_round_up(&syms, symtab + elf_size(&syms, syms.ehdr) + elf_shdr_count(&syms) * elf_size(&syms, shdr)); xc_dom_printf("%s/%s: bsd_symtab_start=%" PRIx64 ", kernel.end=0x%" PRIx64 " -- symtab=0x%" PRIx64 ", maxaddr=0x%" PRIx64 "\n", __FUNCTION__, load ? "load" : "parse", dom->bsd_symtab_start, dom->kernel_seg.vend, symtab, maxaddr); count = elf_shdr_count(&syms); for ( h = 0; h < count; h++ ) { shdr = elf_shdr_by_index(&syms, h); type = elf_uval(&syms, shdr, sh_type); if ( type == SHT_STRTAB ) { /* Look for a strtab @i linked to symtab @h. */ for ( i = 0; i < count; i++ ) { shdr2 = elf_shdr_by_index(&syms, i); if ( (elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB) && (elf_uval(&syms, shdr2, sh_link) == h) ) break; } /* Skip symtab @h if we found no corresponding strtab @i. */ if ( i == count ) { if ( elf_64bit(&syms) ) *(Elf64_Off*)(&shdr->e64.sh_offset) = 0; else *(Elf32_Off*)(&shdr->e32.sh_offset) = 0; continue; } } if ( (type == SHT_STRTAB) || (type == SHT_SYMTAB) ) { /* Mangled to be based on ELF header location. */ if ( elf_64bit(&syms) ) *(Elf64_Off*)(&shdr->e64.sh_offset) = maxaddr - symtab; else *(Elf32_Off*)(&shdr->e32.sh_offset) = maxaddr - symtab; size = elf_uval(&syms, shdr, sh_size); maxaddr = elf_round_up(&syms, maxaddr + size); tables++; xc_dom_printf("%s: h=%d %s, size=0x%zx, maxaddr=0x%" PRIx64 "\n", __FUNCTION__, h, type == SHT_SYMTAB ? "symtab" : "strtab", size, maxaddr); if ( load ) { shdr2 = elf_shdr_by_index(elf, h); memcpy((void*)elf_section_start(&syms, shdr), elf_section_start(elf, shdr2), size); } } /* Name is NULL. */ if ( elf_64bit(&syms) ) *(Elf64_Half*)(&shdr->e64.sh_name) = 0; else *(Elf32_Word*)(&shdr->e32.sh_name) = 0; } if ( tables == 0 ) { xc_dom_printf("%s: no symbol table present\n", __FUNCTION__); dom->bsd_symtab_start = 0; return 0; } if ( !load ) dom->kernel_seg.vend = maxaddr; return 0; }
int __init construct_dom0( struct domain *d, const module_t *image, unsigned long image_headroom, module_t *initrd, void *(*bootstrap_map)(const module_t *), char *cmdline) { int i, cpu, rc, compatible, compat32, order, machine; struct cpu_user_regs *regs; unsigned long pfn, mfn; unsigned long nr_pages; unsigned long nr_pt_pages; unsigned long alloc_spfn; unsigned long alloc_epfn; unsigned long initrd_pfn = -1, initrd_mfn = 0; unsigned long count; struct page_info *page = NULL; start_info_t *si; struct vcpu *v = d->vcpu[0]; unsigned long long value; char *image_base = bootstrap_map(image); unsigned long image_len = image->mod_end; char *image_start = image_base + image_headroom; unsigned long initrd_len = initrd ? initrd->mod_end : 0; #if CONFIG_PAGING_LEVELS < 4 module_t mpt; void *mpt_ptr; #else l4_pgentry_t *l4tab = NULL, *l4start = NULL; #endif l3_pgentry_t *l3tab = NULL, *l3start = NULL; l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; /* * This fully describes the memory layout of the initial domain. All * *_start address are page-aligned, except v_start (and v_end) which are * superpage-aligned. */ struct elf_binary elf; struct elf_dom_parms parms; unsigned long vkern_start; unsigned long vkern_end; unsigned long vinitrd_start; unsigned long vinitrd_end; unsigned long vphysmap_start; unsigned long vphysmap_end; unsigned long vstartinfo_start; unsigned long vstartinfo_end; unsigned long vstack_start; unsigned long vstack_end; unsigned long vpt_start; unsigned long vpt_end; unsigned long v_start; unsigned long v_end; /* Machine address of next candidate page-table page. */ paddr_t mpt_alloc; /* Sanity! */ BUG_ON(d->domain_id != 0); BUG_ON(d->vcpu[0] == NULL); BUG_ON(v->is_initialised); printk("*** LOADING DOMAIN 0 ***\n"); d->max_pages = ~0U; if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 ) return rc; if ( (rc = elf_init(&elf, image_start, image_len)) != 0 ) return rc; #ifdef VERBOSE elf_set_verbose(&elf); #endif elf_parse_binary(&elf); if ( (rc = elf_xen_parse(&elf, &parms)) != 0 ) return rc; /* compatibility check */ compatible = compat32 = 0; machine = elf_uval(&elf, elf.ehdr, e_machine); switch (CONFIG_PAGING_LEVELS) { case 3: /* x86_32p */ if (parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; printk(" Xen kernel: 32-bit, PAE, lsb\n"); if (elf_32bit(&elf) && parms.pae && machine == EM_386) compatible = 1; break; case 4: /* x86_64 */ printk(" Xen kernel: 64-bit, lsb, compat32\n"); if (elf_32bit(&elf) && parms.pae == PAEKERN_bimodal) parms.pae = PAEKERN_extended_cr3; if (elf_32bit(&elf) && parms.pae && machine == EM_386) { compat32 = 1; compatible = 1; } if (elf_64bit(&elf) && machine == EM_X86_64) compatible = 1; break; } printk(" Dom0 kernel: %s%s, %s, paddr 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf_64bit(&elf) ? "64-bit" : "32-bit", parms.pae ? ", PAE" : "", elf_msb(&elf) ? "msb" : "lsb", elf.pstart, elf.pend); if ( elf.bsd_symtab_pstart ) printk(" Dom0 symbol map 0x%" PRIx64 " -> 0x%" PRIx64 "\n", elf.bsd_symtab_pstart, elf.bsd_symtab_pend); if ( !compatible ) { printk("Mismatch between Xen and DOM0 kernel\n"); return -EINVAL; } if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE && !test_bit(XENFEAT_dom0, parms.f_supported) ) { printk("Kernel does not support Dom0 operation\n"); return -EINVAL; } #if defined(__x86_64__) if ( compat32 ) { d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; if ( setup_compat_arg_xlat(v) != 0 ) BUG(); } #endif nr_pages = compute_dom0_nr_pages(d, &parms, initrd_len); if ( parms.pae == PAEKERN_extended_cr3 ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); if ( (parms.virt_hv_start_low != UNSET_ADDR) && elf_32bit(&elf) ) { unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; value = (parms.virt_hv_start_low + mask) & ~mask; BUG_ON(!is_pv_32bit_domain(d)); #if defined(__i386__) if ( value > HYPERVISOR_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); #else if ( value > __HYPERVISOR_COMPAT_VIRT_START ) panic("Domain 0 expects too high a hypervisor start address.\n"); HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int, m2p_compat_vstart, value); #endif }
static int __init pvh_load_kernel(struct domain *d, const module_t *image, unsigned long image_headroom, module_t *initrd, void *image_base, char *cmdline, paddr_t *entry, paddr_t *start_info_addr) { void *image_start = image_base + image_headroom; unsigned long image_len = image->mod_end; struct elf_binary elf; struct elf_dom_parms parms; paddr_t last_addr; struct hvm_start_info start_info = { 0 }; struct hvm_modlist_entry mod = { 0 }; struct vcpu *v = d->vcpu[0]; int rc; if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 ) { printk("Error trying to detect bz compressed kernel\n"); return rc; } if ( (rc = elf_init(&elf, image_start, image_len)) != 0 ) { printk("Unable to init ELF\n"); return rc; } #ifdef VERBOSE elf_set_verbose(&elf); #endif elf_parse_binary(&elf); if ( (rc = elf_xen_parse(&elf, &parms)) != 0 ) { printk("Unable to parse kernel for ELFNOTES\n"); return rc; } if ( parms.phys_entry == UNSET_ADDR32 ) { printk("Unable to find XEN_ELFNOTE_PHYS32_ENTRY address\n"); return -EINVAL; } printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os, parms.guest_ver, parms.loader, elf_64bit(&elf) ? "64-bit" : "32-bit"); /* Copy the OS image and free temporary buffer. */ elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base); elf.dest_size = parms.virt_kend - parms.virt_kstart; elf_set_vcpu(&elf, v); rc = elf_load_binary(&elf); if ( rc < 0 ) { printk("Failed to load kernel: %d\n", rc); printk("Xen dom0 kernel broken ELF: %s\n", elf_check_broken(&elf)); return rc; } last_addr = ROUNDUP(parms.virt_kend - parms.virt_base, PAGE_SIZE); if ( initrd != NULL ) { rc = hvm_copy_to_guest_phys(last_addr, mfn_to_virt(initrd->mod_start), initrd->mod_end, v); if ( rc ) { printk("Unable to copy initrd to guest\n"); return rc; } mod.paddr = last_addr; mod.size = initrd->mod_end; last_addr += ROUNDUP(initrd->mod_end, PAGE_SIZE); } /* Free temporary buffers. */ discard_initial_images(); if ( cmdline != NULL ) { rc = hvm_copy_to_guest_phys(last_addr, cmdline, strlen(cmdline) + 1, v); if ( rc ) { printk("Unable to copy guest command line\n"); return rc; } start_info.cmdline_paddr = last_addr; /* * Round up to 32/64 bits (depending on the guest kernel bitness) so * the modlist/start_info is aligned. */ last_addr += ROUNDUP(strlen(cmdline) + 1, elf_64bit(&elf) ? 8 : 4); } if ( initrd != NULL ) { rc = hvm_copy_to_guest_phys(last_addr, &mod, sizeof(mod), v); if ( rc ) { printk("Unable to copy guest modules\n"); return rc; } start_info.modlist_paddr = last_addr; start_info.nr_modules = 1; last_addr += sizeof(mod); } start_info.magic = XEN_HVM_START_MAGIC_VALUE; start_info.flags = SIF_PRIVILEGED | SIF_INITDOMAIN; rc = hvm_copy_to_guest_phys(last_addr, &start_info, sizeof(start_info), v); if ( rc ) { printk("Unable to copy start info to guest\n"); return rc; } *entry = parms.phys_entry; *start_info_addr = last_addr; return 0; }