示例#1
0
/*
 * Add a mapping for the machine page at the given virtual address.
 */
static void
map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
{
	x86pte_t *ptep;
	x86pte_t pteval;

	pteval = ma | pte_bits;
	if (level > 0)
		pteval |= PT_PAGESIZE;
	if (va >= target_kernel_text && pge_support)
		pteval |= PT_GLOBAL;

	if (map_debug && ma != va)
		dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
		    " pte=0x%" PRIx64 " l=%d\n",
		    (uint64_t)ma, (uint64_t)va, pteval, level);

#if defined(__xpv)
	/*
	 * see if we can avoid find_pte() on the hypervisor
	 */
	if (HYPERVISOR_update_va_mapping(va, pteval,
	    UVMF_INVLPG | UVMF_LOCAL) == 0)
		return;
#endif

	/*
	 * Find the pte that will map this address. This creates any
	 * missing intermediate level page tables
	 */
	ptep = find_pte(va, NULL, level, 0);

	/*
	 * When paravirtualized, we must use hypervisor calls to modify the
	 * PTE, since paging is active. On real hardware we just write to
	 * the pagetables which aren't in use yet.
	 */
#if defined(__xpv)
	ptep = ptep;	/* shut lint up */
	if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
		dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
		    " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
		    (uint64_t)va, level, (uint64_t)ma, pteval);
#else
	if (va < 1024 * 1024)
		pteval |= PT_NOCACHE;		/* for video RAM */
	if (pae_support)
		*ptep = pteval;
	else
		*((x86pte32_t *)ptep) = (x86pte32_t)pteval;
#endif
}
示例#2
0
/*
 * Primitive version of panic, prints a message then resets the system
 */
void
dboot_panic(char *fmt, ...)
{
	va_list	args;

	va_start(args, fmt);
	do_dboot_printf(fmt, args);

	if (boot_console_type(NULL) == CONS_SCREEN_TEXT) {
		dboot_printf("Press any key to reboot\n");
		(void) bcons_getchar();
	}
	outb(0x64, 0xfe);	/* this resets the system, see pc_reset() */
	dboot_halt();		/* just in case */
}
示例#3
0
/*
 * From a pseudo-physical address, find the corresponding machine address.
 */
maddr_t
pa_to_ma(paddr_t pa)
{
	pfn_t pfn;
	ulong_t mfn;

	pfn = mmu_btop(pa - mfn_base);
	if (pa < mfn_base || pfn >= xen_info->nr_pages)
		dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
	mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
#ifdef DEBUG
	if (mfn_to_pfn_mapping[mfn] != pfn)
		dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
		    pfn, mfn, mfn_to_pfn_mapping[mfn]);
#endif
	return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
}
示例#4
0
/*
 * From a machine address, find the corresponding pseudo-physical address.
 * Pseudo-physical address are contiguous and run from mfn_base in each VM.
 * Machine addresses are the real underlying hardware addresses.
 * These are needed for page table entries. Note that this routine is
 * poorly protected. A bad value of "ma" will cause a page fault.
 */
paddr_t
ma_to_pa(maddr_t ma)
{
	ulong_t pgoff = ma & MMU_PAGEOFFSET;
	ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
	paddr_t pa;

	if (pfn >= xen_info->nr_pages)
		return (-(paddr_t)1);
	pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
#ifdef DEBUG
	if (ma != pa_to_ma(pa))
		dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
		    "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
#endif
	return (pa);
}
示例#5
0
paddr_t
make_ptable(x86pte_t *pteval, uint_t level)
{
	paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);

	if (level == top_level && level == 2)
		*pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
	else
		*pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;

#ifdef __xpv
	/* Remove write permission to the new page table. */
	if (HYPERVISOR_update_va_mapping(new_table,
	    *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
		dboot_panic("HYP_update_va_mapping error");
#endif

	if (map_debug)
		dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
		    PRIx64 "\n", level, (ulong_t)new_table, *pteval);
	return (new_table);
}
示例#6
0
/*
 * parse the elf file for program information
 */
int
dboot_elfload64(uintptr_t file_image)
{
	Elf64_Ehdr *eh;
	Elf64_Phdr *phdr;
	Elf64_Shdr *shdr;
	caddr_t allphdrs, sechdrs;
	int i;
	paddr_t src;
	paddr_t dst;
	paddr_t next_addr;

	elf_file = (caddr_t)file_image;

	allphdrs = NULL;

	eh = getehdr();
	if (eh == NULL)
		dboot_panic("getehdr() failed");

	if (eh->e_type != ET_EXEC)
		dboot_panic("not ET_EXEC, e_type = 0x%x", eh->e_type);

	if (eh->e_phnum == 0 || eh->e_phoff == 0)
		dboot_panic("no program headers");

	/*
	 * Get the program headers.
	 */
	allphdrs = PGETBYTES(eh->e_phoff);
	if (allphdrs == NULL)
		dboot_panic("Failed to get program headers e_phnum = %d",
		    eh->e_phnum);

	/*
	 * Get the section headers.
	 */
	sechdrs = PGETBYTES(eh->e_shoff);
	if (sechdrs == NULL)
		dboot_panic("Failed to get section headers e_shnum = %d",
		    eh->e_shnum);

	/*
	 * Next look for interesting program headers.
	 */
	for (i = 0; i < eh->e_phnum; i++) {
		/*LINTED [ELF program header alignment]*/
		phdr = (Elf64_Phdr *)(allphdrs + eh->e_phentsize * i);

		/*
		 * Dynamically-linked executable.
		 * Complain.
		 */
		if (phdr->p_type == PT_INTERP) {
			dboot_printf("warning: PT_INTERP section\n");
			continue;
		}

		/*
		 * at this point we only care about PT_LOAD segments
		 */
		if (phdr->p_type != PT_LOAD)
			continue;

		if (phdr->p_flags == (PF_R | PF_W) && phdr->p_vaddr == 0) {
			dboot_printf("warning: krtld reloc info?\n");
			continue;
		}

		/*
		 * If memory size is zero just ignore this header.
		 */
		if (phdr->p_memsz == 0)
			continue;

		/*
		 * If load address 1:1 then ignore this header.
		 */
		if (phdr->p_paddr == phdr->p_vaddr) {
			if (prom_debug)
				dboot_printf("Skipping PT_LOAD segment for "
				    "paddr = 0x%lx\n", (ulong_t)phdr->p_paddr);
			continue;
		}

		/*
		 * copy the data to kernel area
		 */
		if (phdr->p_paddr != FOUR_MEG && phdr->p_paddr != 2 * FOUR_MEG)
			dboot_panic("Bad paddr for kernel nucleus segment");
		src = (uintptr_t)PGETBYTES(phdr->p_offset);
		dst = ktext_phys + phdr->p_paddr - FOUR_MEG;
		if (prom_debug)
			dboot_printf("copying %ld bytes from ELF offset 0x%lx "
			    "to physaddr 0x%lx (va=0x%lx)\n",
			    (ulong_t)phdr->p_filesz, (ulong_t)phdr->p_offset,
			    (ulong_t)dst, (ulong_t)phdr->p_vaddr);
		(void) memcpy((void *)(uintptr_t)dst,
		    (void *)(uintptr_t)src, (size_t)phdr->p_filesz);

		next_addr = dst + phdr->p_filesz;
	}


	/*
	 * Next look for bss
	 */
	for (i = 0; i < eh->e_shnum; i++) {
		shdr = (Elf64_Shdr *)(sechdrs + eh->e_shentsize * i);

		/* zero out bss */
		if (shdr->sh_type == SHT_NOBITS) {
			if (prom_debug)
				dboot_printf("zeroing BSS %ld bytes from "
				    "physaddr 0x%llx (end=0x%llx)\n",
				    (ulong_t)shdr->sh_size,
				    (long long unsigned)next_addr,
				    next_addr + shdr->sh_size);
			(void) memset((void *)(uintptr_t)next_addr, 0,
			    shdr->sh_size);
			break;
		}
	}

	/*
	 * Ignore the intepreter (or should we die if there is one??)
	 */
	return (0);
}
示例#7
0
/*
 * Walk through the module information finding the last used address.
 * The first available address will become the top level page table.
 *
 * We then build the phys_install memlist from the multiboot information.
 */
static void
init_mem_alloc(void)
{
	mb_memory_map_t *mmap;
	mb_module_t *mod;
	uint64_t start;
	uint64_t end;
	uint64_t page_offset = MMU_PAGEOFFSET;	/* needs to be 64 bits */
	extern char _end[];
	int i;

	DBG_MSG("Entered init_mem_alloc()\n");
	DBG((uintptr_t)mb_info);

	if (mb_info->mods_count > MAX_MODULES) {
		dboot_panic("Too many modules (%d) -- the maximum is %d.",
		    mb_info->mods_count, MAX_MODULES);
	}
	/*
	 * search the modules to find the last used address
	 * we'll build the module list while we're walking through here
	 */
	DBG_MSG("\nFinding Modules\n");
	check_higher((paddr_t)&_end);
	for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0;
	    i < mb_info->mods_count;
	    ++mod, ++i) {
		if (prom_debug) {
			dboot_printf("\tmodule #%d: %s at: 0x%lx, len 0x%lx\n",
			    i, (char *)(mod->mod_name),
			    (ulong_t)mod->mod_start, (ulong_t)mod->mod_end);
		}
		modules[i].bm_addr = mod->mod_start;
		if (mod->mod_start > mod->mod_end) {
			dboot_panic("module[%d]: Invalid module start address "
			    "(0x%llx)", i, (uint64_t)mod->mod_start);
		}
		modules[i].bm_size = mod->mod_end - mod->mod_start;

		check_higher(mod->mod_end);
	}
	bi->bi_modules = (native_ptr_t)modules;
	DBG(bi->bi_modules);
	bi->bi_module_cnt = mb_info->mods_count;
	DBG(bi->bi_module_cnt);

	/*
	 * Walk through the memory map from multiboot and build our memlist
	 * structures. Note these will have native format pointers.
	 */
	DBG_MSG("\nFinding Memory Map\n");
	DBG(mb_info->flags);
	max_mem = 0;
	if (mb_info->flags & 0x40) {
		int cnt = 0;

		DBG(mb_info->mmap_addr);
		DBG(mb_info->mmap_length);
		check_higher(mb_info->mmap_addr + mb_info->mmap_length);

		for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
		    (uint32_t)mmap < mb_info->mmap_addr + mb_info->mmap_length;
		    mmap = (mb_memory_map_t *)((uint32_t)mmap + mmap->size
		    + sizeof (mmap->size))) {
			++cnt;
			start = ((uint64_t)mmap->base_addr_high << 32) +
			    mmap->base_addr_low;
			end = start + ((uint64_t)mmap->length_high << 32) +
			    mmap->length_low;

			if (prom_debug)
				dboot_printf("\ttype: %d %" PRIx64 "..%"
				    PRIx64 "\n", mmap->type, start, end);

			/*
			 * page align start and end
			 */
			start = (start + page_offset) & ~page_offset;
			end &= ~page_offset;
			if (end <= start)
				continue;

			/*
			 * only type 1 is usable RAM
			 */
			switch (mmap->type) {
			case 1:
				if (end > max_mem)
					max_mem = end;
				memlists[memlists_used].addr = start;
				memlists[memlists_used].size = end - start;
				++memlists_used;
				if (memlists_used > MAX_MEMLIST)
					dboot_panic("too many memlists");
				break;
			case 2:
				rsvdmemlists[rsvdmemlists_used].addr = start;
				rsvdmemlists[rsvdmemlists_used].size =
				    end - start;
				++rsvdmemlists_used;
				if (rsvdmemlists_used > MAX_MEMLIST)
					dboot_panic("too many rsvdmemlists");
				break;
			default:
				continue;
			}
		}
		build_pcimemlists((mb_memory_map_t *)mb_info->mmap_addr, cnt);
	} else if (mb_info->flags & 0x01) {
		DBG(mb_info->mem_lower);
		memlists[memlists_used].addr = 0;
		memlists[memlists_used].size = mb_info->mem_lower * 1024;
		++memlists_used;
		DBG(mb_info->mem_upper);
		memlists[memlists_used].addr = 1024 * 1024;
		memlists[memlists_used].size = mb_info->mem_upper * 1024;
		++memlists_used;

		/*
		 * Old platform - assume I/O space at the end of memory.
		 */
		pcimemlists[0].addr =
		    (mb_info->mem_upper * 1024) + (1024 * 1024);
		pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
		pcimemlists[0].next = 0;
		pcimemlists[0].prev = 0;
		bi->bi_pcimem = (native_ptr_t)pcimemlists;
		DBG(bi->bi_pcimem);
	} else {
		dboot_panic("No memory info from boot loader!!!");
	}

	check_higher(bi->bi_cmdline);

	/*
	 * finish processing the physinstall list
	 */
	sort_physinstall();

	/*
	 * build bios reserved mem lists
	 */
	build_rsvdmemlists();
}
示例#8
0
static void
build_pcimemlists(mmap_t *mem, int num)
{
	mmap_t *mmap;
	uint64_t page_offset = MMU_PAGEOFFSET;	/* needs to be 64 bits */
	uint64_t start;
	uint64_t end;
	int i;

	/*
	 * initialize
	 */
	pcimemlists[0].addr = pci_lo_limit;
	pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
	pcimemlists_used = 1;

	/*
	 * Fill in PCI memlists.
	 */
	for (mmap = mem, i = 0; i < num; ++i, ++mmap) {
		start = ((uint64_t)mmap->base_addr_high << 32) +
		    mmap->base_addr_low;
		end = start + ((uint64_t)mmap->length_high << 32) +
		    mmap->length_low;

		if (prom_debug)
			dboot_printf("\ttype: %d %" PRIx64 "..%"
			    PRIx64 "\n", mmap->type, start, end);

		/*
		 * page align start and end
		 */
		start = (start + page_offset) & ~page_offset;
		end &= ~page_offset;
		if (end <= start)
			continue;

		exclude_from_pci(start, end);
	}

	/*
	 * Finish off the pcimemlist
	 */
	if (prom_debug) {
		for (i = 0; i < pcimemlists_used; ++i) {
			dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
			    PRIx64 "\n", pcimemlists[i].addr,
			    pcimemlists[i].addr + pcimemlists[i].size);
		}
	}
	pcimemlists[0].next = 0;
	pcimemlists[0].prev = 0;
	for (i = 1; i < pcimemlists_used; ++i) {
		pcimemlists[i].prev =
		    (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
		pcimemlists[i].next = 0;
		pcimemlists[i - 1].next =
		    (native_ptr_t)(uintptr_t)(pcimemlists + i);
	}
	bi->bi_pcimem = (native_ptr_t)pcimemlists;
	DBG(bi->bi_pcimem);
}
示例#9
0
/*
 * dump out the contents of page tables...
 */
static void
dump_tables(void)
{
	uint_t save_index[4];	/* for recursion */
	char *save_table[4];	/* for recursion */
	uint_t	l;
	uint64_t va;
	uint64_t pgsize;
	int index;
	int i;
	x86pte_t pteval;
	char *table;
	static char *tablist = "\t\t\t";
	char *tabs = tablist + 3 - top_level;
	uint_t pa, pa1;
#if !defined(__xpv)
#define	maddr_t paddr_t
#endif /* !__xpv */

	dboot_printf("Finished pagetables:\n");
	table = (char *)(uintptr_t)top_page_table;
	l = top_level;
	va = 0;
	for (index = 0; index < ptes_per_table; ++index) {
		pgsize = 1ull << shift_amt[l];
		if (pae_support)
			pteval = ((x86pte_t *)table)[index];
		else
			pteval = ((x86pte32_t *)table)[index];
		if (pteval == 0)
			goto next_entry;

		dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
		    tabs + l, (void *)table, index, (uint64_t)pteval, va);
		pa = ma_to_pa(pteval & MMU_PAGEMASK);
		dboot_printf(" physaddr=%x\n", pa);

		/*
		 * Don't try to walk hypervisor private pagetables
		 */
		if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
			save_table[l] = table;
			save_index[l] = index;
			--l;
			index = -1;
			table = (char *)(uintptr_t)
			    ma_to_pa(pteval & MMU_PAGEMASK);
			goto recursion;
		}

		/*
		 * shorten dump for consecutive mappings
		 */
		for (i = 1; index + i < ptes_per_table; ++i) {
			if (pae_support)
				pteval = ((x86pte_t *)table)[index + i];
			else
				pteval = ((x86pte32_t *)table)[index + i];
			if (pteval == 0)
				break;
			pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
			if (pa1 != pa + i * pgsize)
				break;
		}
		if (i > 2) {
			dboot_printf("%s...\n", tabs + l);
			va += pgsize * (i - 2);
			index += i - 2;
		}
next_entry:
		va += pgsize;
		if (l == 3 && index == 256)	/* VA hole */
			va = 0xffff800000000000ull;
recursion:
		;
	}
	if (l < top_level) {
		++l;
		index = save_index[l];
		table = save_table[l];
		goto recursion;
	}
}
示例#10
0
/*
 * Either hypervisor-specific or grub-specific code builds the initial
 * memlists. This code does the sort/merge/link for final use.
 */
static void
sort_physinstall(void)
{
	int i;
#if !defined(__xpv)
	int j;
	struct boot_memlist tmp;

	/*
	 * Now sort the memlists, in case they weren't in order.
	 * Yeah, this is a bubble sort; small, simple and easy to get right.
	 */
	DBG_MSG("Sorting phys-installed list\n");
	for (j = memlists_used - 1; j > 0; --j) {
		for (i = 0; i < j; ++i) {
			if (memlists[i].addr < memlists[i + 1].addr)
				continue;
			tmp = memlists[i];
			memlists[i] = memlists[i + 1];
			memlists[i + 1] = tmp;
		}
	}

	/*
	 * Merge any memlists that don't have holes between them.
	 */
	for (i = 0; i <= memlists_used - 1; ++i) {
		if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
			continue;

		if (prom_debug)
			dboot_printf(
			    "merging mem segs %" PRIx64 "...%" PRIx64
			    " w/ %" PRIx64 "...%" PRIx64 "\n",
			    memlists[i].addr,
			    memlists[i].addr + memlists[i].size,
			    memlists[i + 1].addr,
			    memlists[i + 1].addr + memlists[i + 1].size);

		memlists[i].size += memlists[i + 1].size;
		for (j = i + 1; j < memlists_used - 1; ++j)
			memlists[j] = memlists[j + 1];
		--memlists_used;
		DBG(memlists_used);
		--i;	/* after merging we need to reexamine, so do this */
	}
#endif	/* __xpv */

	if (prom_debug) {
		dboot_printf("\nFinal memlists:\n");
		for (i = 0; i < memlists_used; ++i) {
			dboot_printf("\t%d: addr=%" PRIx64 " size=%"
			    PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
		}
	}

	/*
	 * link together the memlists with native size pointers
	 */
	memlists[0].next = 0;
	memlists[0].prev = 0;
	for (i = 1; i < memlists_used; ++i) {
		memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
		memlists[i].next = 0;
		memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
	}
	bi->bi_phys_install = (native_ptr_t)memlists;
	DBG(bi->bi_phys_install);
}
示例#11
0
/*ARGSUSED*/
void
startup_kernel(void)
{
	char *cmdline;
	uintptr_t addr;
#if defined(__xpv)
	physdev_set_iopl_t set_iopl;
#endif /* __xpv */

	/*
	 * At this point we are executing in a 32 bit real mode.
	 */
#if defined(__xpv)
	cmdline = (char *)xen_info->cmd_line;
#else /* __xpv */
	cmdline = (char *)mb_info->cmdline;
#endif /* __xpv */

	prom_debug = (strstr(cmdline, "prom_debug") != NULL);
	map_debug = (strstr(cmdline, "map_debug") != NULL);

#if defined(__xpv)
	/*
	 * For dom0, before we initialize the console subsystem we'll
	 * need to enable io operations, so set I/O priveldge level to 1.
	 */
	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
		set_iopl.iopl = 1;
		(void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
	}
#endif /* __xpv */

	bcons_init(cmdline);
	DBG_MSG("\n\nSolaris prekernel set: ");
	DBG_MSG(cmdline);
	DBG_MSG("\n");

	if (strstr(cmdline, "multiboot") != NULL) {
		dboot_panic(NO_MULTIBOOT);
	}

	/*
	 * boot info must be 16 byte aligned for 64 bit kernel ABI
	 */
	addr = (uintptr_t)boot_info;
	addr = (addr + 0xf) & ~0xf;
	bi = (struct xboot_info *)addr;
	DBG((uintptr_t)bi);
	bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;

	/*
	 * Need correct target_kernel_text value
	 */
#if defined(_BOOT_TARGET_amd64)
	target_kernel_text = KERNEL_TEXT_amd64;
#elif defined(__xpv)
	target_kernel_text = KERNEL_TEXT_i386_xpv;
#else
	target_kernel_text = KERNEL_TEXT_i386;
#endif
	DBG(target_kernel_text);

#if defined(__xpv)

	/*
	 * XXPV	Derive this stuff from CPUID / what the hypervisor has enabled
	 */

#if defined(_BOOT_TARGET_amd64)
	/*
	 * 64-bit hypervisor.
	 */
	amd64_support = 1;
	pae_support = 1;

#else	/* _BOOT_TARGET_amd64 */

	/*
	 * See if we are running on a PAE Hypervisor
	 */
	{
		xen_capabilities_info_t caps;

		if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
			dboot_panic("HYPERVISOR_xen_version(caps) failed");
		caps[sizeof (caps) - 1] = 0;
		if (prom_debug)
			dboot_printf("xen capabilities %s\n", caps);
		if (strstr(caps, "x86_32p") != NULL)
			pae_support = 1;
	}

#endif	/* _BOOT_TARGET_amd64 */
	{
		xen_platform_parameters_t p;

		if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
			dboot_panic("HYPERVISOR_xen_version(parms) failed");
		DBG(p.virt_start);
		mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
	}

	/*
	 * The hypervisor loads stuff starting at 1Gig
	 */
	mfn_base = ONE_GIG;
	DBG(mfn_base);

	/*
	 * enable writable page table mode for the hypervisor
	 */
	if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
	    VMASST_TYPE_writable_pagetables) < 0)
		dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");

	/*
	 * check for NX support
	 */
	if (pae_support) {
		uint32_t eax = 0x80000000;
		uint32_t edx = get_cpuid_edx(&eax);

		if (eax >= 0x80000001) {
			eax = 0x80000001;
			edx = get_cpuid_edx(&eax);
			if (edx & CPUID_AMD_EDX_NX)
				NX_support = 1;
		}
	}

#if !defined(_BOOT_TARGET_amd64)

	/*
	 * The 32-bit hypervisor uses segmentation to protect itself from
	 * guests. This means when a guest attempts to install a flat 4GB
	 * code or data descriptor the 32-bit hypervisor will protect itself
	 * by silently shrinking the segment such that if the guest attempts
	 * any access where the hypervisor lives a #gp fault is generated.
	 * The problem is that some applications expect a full 4GB flat
	 * segment for their current thread pointer and will use negative
	 * offset segment wrap around to access data. TLS support in linux
	 * brand is one example of this.
	 *
	 * The 32-bit hypervisor can catch the #gp fault in these cases
	 * and emulate the access without passing the #gp fault to the guest
	 * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
	 * Seems like this should have been the default.
	 * Either way, we want the hypervisor -- and not Solaris -- to deal
	 * to deal with emulating these accesses.
	 */
	if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
	    VMASST_TYPE_4gb_segments) < 0)
		dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
#endif	/* !_BOOT_TARGET_amd64 */

#else	/* __xpv */

	/*
	 * use cpuid to enable MMU features
	 */
	if (have_cpuid()) {
		uint32_t eax, edx;

		eax = 1;
		edx = get_cpuid_edx(&eax);
		if (edx & CPUID_INTC_EDX_PSE)
			largepage_support = 1;
		if (edx & CPUID_INTC_EDX_PGE)
			pge_support = 1;
		if (edx & CPUID_INTC_EDX_PAE)
			pae_support = 1;

		eax = 0x80000000;
		edx = get_cpuid_edx(&eax);
		if (eax >= 0x80000001) {
			eax = 0x80000001;
			edx = get_cpuid_edx(&eax);
			if (edx & CPUID_AMD_EDX_LM)
				amd64_support = 1;
			if (edx & CPUID_AMD_EDX_NX)
				NX_support = 1;
		}
	} else {
		dboot_printf("cpuid not supported\n");
	}
#endif /* __xpv */


#if defined(_BOOT_TARGET_amd64)
	if (amd64_support == 0)
		dboot_panic("long mode not supported, rebooting");
	else if (pae_support == 0)
		dboot_panic("long mode, but no PAE; rebooting");
#else
	/*
	 * Allow the command line to over-ride use of PAE for 32 bit.
	 */
	if (strstr(cmdline, "disablePAE=true") != NULL) {
		pae_support = 0;
		NX_support = 0;
		amd64_support = 0;
	}
#endif

	/*
	 * initialize the simple memory allocator
	 */
	init_mem_alloc();

#if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
	/*
	 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
	 */
	if (max_mem < FOUR_GIG && NX_support == 0)
		pae_support = 0;
#endif

	/*
	 * configure mmu information
	 */
	if (pae_support) {
		shift_amt = shift_amt_pae;
		ptes_per_table = 512;
		pte_size = 8;
		lpagesize = TWO_MEG;
#if defined(_BOOT_TARGET_amd64)
		top_level = 3;
#else
		top_level = 2;
#endif
	} else {
		pae_support = 0;
		NX_support = 0;
		shift_amt = shift_amt_nopae;
		ptes_per_table = 1024;
		pte_size = 4;
		lpagesize = FOUR_MEG;
		top_level = 1;
	}

	DBG(pge_support);
	DBG(NX_support);
	DBG(largepage_support);
	DBG(amd64_support);
	DBG(top_level);
	DBG(pte_size);
	DBG(ptes_per_table);
	DBG(lpagesize);

#if defined(__xpv)
	ktext_phys = ONE_GIG;		/* from UNIX Mapfile */
#else
	ktext_phys = FOUR_MEG;		/* from UNIX Mapfile */
#endif

#if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
	/*
	 * For grub, copy kernel bits from the ELF64 file to final place.
	 */
	DBG_MSG("\nAllocating nucleus pages.\n");
	ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
	if (ktext_phys == 0)
		dboot_panic("failed to allocate aligned kernel memory");
	if (dboot_elfload64(mb_header.load_addr) != 0)
		dboot_panic("failed to parse kernel ELF image, rebooting");
#endif

	DBG(ktext_phys);

	/*
	 * Allocate page tables.
	 */
	build_page_tables();

	/*
	 * return to assembly code to switch to running kernel
	 */
	entry_addr_low = (uint32_t)target_kernel_text;
	DBG(entry_addr_low);
	bi->bi_use_largepage = largepage_support;
	bi->bi_use_pae = pae_support;
	bi->bi_use_pge = pge_support;
	bi->bi_use_nx = NX_support;

#if defined(__xpv)

	bi->bi_next_paddr = next_avail_addr - mfn_base;
	DBG(bi->bi_next_paddr);
	bi->bi_next_vaddr = (native_ptr_t)next_avail_addr;
	DBG(bi->bi_next_vaddr);

	/*
	 * unmap unused pages in start area to make them available for DMA
	 */
	while (next_avail_addr < scratch_end) {
		(void) HYPERVISOR_update_va_mapping(next_avail_addr,
		    0, UVMF_INVLPG | UVMF_LOCAL);
		next_avail_addr += MMU_PAGESIZE;
	}

	bi->bi_xen_start_info = (uintptr_t)xen_info;
	DBG((uintptr_t)HYPERVISOR_shared_info);
	bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
	bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;

#else /* __xpv */

	bi->bi_next_paddr = next_avail_addr;
	DBG(bi->bi_next_paddr);
	bi->bi_next_vaddr = (uintptr_t)next_avail_addr;
	DBG(bi->bi_next_vaddr);
	bi->bi_mb_info = (uintptr_t)mb_info;
	bi->bi_top_page_table = (uintptr_t)top_page_table;

#endif /* __xpv */

	bi->bi_kseg_size = FOUR_MEG;
	DBG(bi->bi_kseg_size);

#ifndef __xpv
	if (map_debug)
		dump_tables();
#endif

	DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
}
示例#12
0
/*
 * Build page tables to map all of memory used so far as well as the kernel.
 */
static void
build_page_tables(void)
{
	uint32_t psize;
	uint32_t level;
	uint32_t off;
	uint64_t start;
#if !defined(__xpv)
	uint32_t i;
	uint64_t end;
#endif	/* __xpv */

	/*
	 * If we're on metal, we need to create the top level pagetable.
	 */
#if defined(__xpv)
	top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
#else /* __xpv */
	top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
#endif /* __xpv */
	DBG((uintptr_t)top_page_table);

	/*
	 * Determine if we'll use large mappings for kernel, then map it.
	 */
	if (largepage_support) {
		psize = lpagesize;
		level = 1;
	} else {
		psize = MMU_PAGESIZE;
		level = 0;
	}

	DBG_MSG("Mapping kernel\n");
	DBG(ktext_phys);
	DBG(target_kernel_text);
	DBG(ksize);
	DBG(psize);
	for (off = 0; off < ksize; off += psize)
		map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);

	/*
	 * The kernel will need a 1 page window to work with page tables
	 */
	bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE);
	DBG(bi->bi_pt_window);
	bi->bi_pte_to_pt_window =
	    (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
	DBG(bi->bi_pte_to_pt_window);

#if defined(__xpv)
	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
		/* If this is a domU we're done. */
		DBG_MSG("\nPage tables constructed\n");
		return;
	}
#endif /* __xpv */

	/*
	 * We need 1:1 mappings for the lower 1M of memory to access
	 * BIOS tables used by a couple of drivers during boot.
	 *
	 * The following code works because our simple memory allocator
	 * only grows usage in an upwards direction.
	 *
	 * Note that by this point in boot some mappings for low memory
	 * may already exist because we've already accessed device in low
	 * memory.  (Specifically the video frame buffer and keyboard
	 * status ports.)  If we're booting on raw hardware then GRUB
	 * created these mappings for us.  If we're booting under a
	 * hypervisor then we went ahead and remapped these devices into
	 * memory allocated within dboot itself.
	 */
	if (map_debug)
		dboot_printf("1:1 map pa=0..1Meg\n");
	for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
#if defined(__xpv)
		map_ma_at_va(start, start, 0);
#else /* __xpv */
		map_pa_at_va(start, start, 0);
#endif /* __xpv */
	}

#if !defined(__xpv)
	for (i = 0; i < memlists_used; ++i) {
		start = memlists[i].addr;

		end = start + memlists[i].size;

		if (map_debug)
			dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
			    start, end);
		while (start < end && start < next_avail_addr) {
			map_pa_at_va(start, start, 0);
			start += MMU_PAGESIZE;
		}
	}
#endif /* !__xpv */

	DBG_MSG("\nPage tables constructed\n");
}