예제 #1
0
/*ARGSUSED*/
void
hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
	offset_t kpm_pages_off)
{
	pfn_t base, end;

	/*
	 * kphysm_add_memory_dynamic() does not set nkpmpgs
	 * when page_t memory is externally allocated.  That
	 * code must properly calculate nkpmpgs in all cases
	 * if nkpmpgs needs to be used at some point.
	 */

	/*
	 * The meta (page_t) pages for dynamically added memory are allocated
	 * either from the incoming memory itself or from existing memory.
	 * In the former case the base of the incoming pages will be different
	 * than the base of the dynamic segment so call memseg_get_start() to
	 * get the actual base of the incoming memory for each case.
	 */

	base = memseg_get_start(msp);
	end = msp->pages_end;

	hat_devload(kas.a_hat, kpm_vbase + mmu_ptob(base),
	    mmu_ptob(end - base), base, PROT_READ | PROT_WRITE,
	    HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
}
예제 #2
0
/*
 * Return the virtual address of the mapping area
 */
caddr_t
i_cpr_map_setup(void)
{
	/*
	 * Allocate a virtual memory range spanned by an hmeblk.
	 * This would be 8 hments or 64k bytes.  Starting VA
	 * must be 64k (8-page) aligned.
	 */
	cpr_vaddr = vmem_xalloc(heap_arena,
	    mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
	    0, 0, NULL, NULL, VM_NOSLEEP);
	return (cpr_vaddr);
}
예제 #3
0
/*
 * save/restore prom pages or free related allocs
 */
int
i_cpr_prom_pages(int action)
{
	int error;

	if (action == CPR_PROM_SAVE) {
		if (ppage_buf == NULL) {
			ASSERT(pphys_list == NULL);
			if (error = i_cpr_find_ppages())
				return (error);
			i_cpr_save_ppages();
		}
	} else if (action == CPR_PROM_RESTORE) {
		i_cpr_restore_ppages();
	} else if (action == CPR_PROM_FREE) {
		if (pphys_list) {
			ASSERT(pphys_list_size);
			kmem_free(pphys_list, pphys_list_size);
			pphys_list = NULL;
			pphys_list_size = 0;
		}
		if (ppage_buf) {
			ASSERT(ppage_count);
			kmem_free(ppage_buf, mmu_ptob(ppage_count));
			CPR_DEBUG(CPR_DEBUG1, "freed %ld prom pages\n",
			    ppage_count);
			ppage_buf = NULL;
			ppage_count = 0;
		}
	}
	return (0);
}
예제 #4
0
pgcnt_t
i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
{
	pgcnt_t count = 0;

	if (i_cpr_storage_desc_base) {
		count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
		    (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
		    mapflag, bitfunc, DBG_SHOWRANGE);
	}
	if (i_cpr_storage_data_base) {
		count += cpr_count_pages(i_cpr_storage_data_base,
		    (size_t)mmu_ptob(i_cpr_storage_data_sz),
		    mapflag, bitfunc, DBG_SHOWRANGE);
	}
	return (count);
}
예제 #5
0
static paddr_t
mdb_ma_to_pa(uint64_t ma)
{
	pfn_t pfn = mdb_mfn_to_pfn(mmu_btop(ma));
	if (pfn == -(pfn_t)1)
		return (-(paddr_t)1);

	return (mmu_ptob((paddr_t)pfn) | (ma & (MMU_PAGESIZE - 1)));
}
예제 #6
0
/*ARGSUSED*/
void
hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
{
	pfn_t base, end;

	/*
	 * The meta (page_t) pages for dynamically added memory are allocated
	 * either from the incoming memory itself or from existing memory.
	 * In the former case the base of the incoming pages will be different
	 * than the base of the dynamic segment so call memseg_get_start() to
	 * get the actual base of the incoming memory for each case.
	 */

	base = memseg_get_start(msp);
	end = msp->pages_end;

	hat_unload(kas.a_hat, kpm_vbase +  mmu_ptob(base), mmu_ptob(end - base),
	    HAT_UNLOAD | HAT_UNLOAD_UNLOCK | HAT_UNLOAD_UNMAP);
}
예제 #7
0
/*
 * Derived from cpr_write_statefile().
 * Allocate (or reallocate after exhausting the supply) descriptors for each
 * chunk of contiguous sensitive kpages.
 */
static int
i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
    int retry)
{
	pgcnt_t npages;
	int chunks;
	csd_t	*descp, *end;
	size_t	len;
	char *str = "i_cpr_storage_desc_alloc:";

	/*
	 * On initial allocation, add some extra to cover overhead caused
	 * by the allocation for the storage area later.
	 */
	if (retry == 0) {
		chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
		    EXTRA_DESCS;
		npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
		CPR_DEBUG(CPR_DEBUG7, "%s chunks %d, ", str, chunks);
	} else {
		CPR_DEBUG(CPR_DEBUG7, "%s retry %d: ", str, retry);
		npages = *pgsp + 1;
	}
	/* Free old descriptors, if any */
	if (*basepp)
		kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));

	descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
	if (descp == NULL) {
		CPR_DEBUG(CPR_DEBUG7, "%s no space for descriptors!\n", str);
		return (ENOMEM);
	}

	*pgsp = npages;
	len = mmu_ptob(npages);
	end = *endpp = descp + (len / (sizeof (**basepp)));
	CPR_DEBUG(CPR_DEBUG7, "npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
	    "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
	    (void *)*basepp, (void *)*endpp);
	i_cpr_storage_desc_init(descp, npages, end);
	return (0);
}
예제 #8
0
/*
 * Replacement for devmap_devmem_setup() which will map a machine address
 * instead of a register set/offset.
 */
void
gfxp_map_devmem(devmap_cookie_t dhc, gfx_maddr_t maddr, size_t length,
    ddi_device_acc_attr_t *attrp)
{
	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
	pfn_t pfn;


#ifdef __xpv
	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
	pfn = xen_assign_pfn(mmu_btop(maddr));
#else
	pfn = mmu_btop(maddr);
#endif

	dhp->dh_pfn = pfn;
	dhp->dh_len = mmu_ptob(mmu_btopr(length));
	dhp->dh_roff = 0;

#ifndef DEVMAP_DEVMEM_COOKIE
#define	DEVMAP_DEVMEM_COOKIE	((ddi_umem_cookie_t)0x1) /* XXPV */
#endif /* DEVMAP_DEVMEM_COOKIE */
	dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
	/*LINTED: E_EXPR_NULL_EFFECT*/
	dhp->dh_flags |= DEVMAP_DEFAULTS;
	dhp->dh_maxprot = PROT_ALL & dhp->dh_orig_maxprot;

	/* no callbacks needed */
	bzero(&dhp->dh_callbackops, sizeof (struct devmap_callback_ctl));

	switch (attrp->devacc_attr_dataorder) {
	case DDI_UNORDERED_OK_ACC:
		dhp->dh_hat_attr = HAT_UNORDERED_OK;
		break;
	case DDI_MERGING_OK_ACC:
		dhp->dh_hat_attr = HAT_MERGING_OK;
		break;
	case DDI_LOADCACHING_OK_ACC:
		dhp->dh_hat_attr = HAT_LOADCACHING_OK;
		break;
	case DDI_STORECACHING_OK_ACC:
		dhp->dh_hat_attr = HAT_STORECACHING_OK;
		break;
	case DDI_STRICTORDER_ACC:
	default:
		dhp->dh_hat_attr = HAT_STRICTORDER;
	}

	/* don't use large pages */
	dhp->dh_mmulevel = 0;
	dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;

	dhp->dh_flags |= DEVMAP_SETUP_DONE;
}
예제 #9
0
void
i_cpr_storage_free(void)
{
	/* Free descriptors */
	if (i_cpr_storage_desc_base) {
		kmem_free(i_cpr_storage_desc_base,
		    mmu_ptob(i_cpr_storage_desc_pgcnt));
		i_cpr_storage_desc_base = NULL;
		i_cpr_storage_desc_pgcnt = 0;
	}


	/* Data storage */
	if (i_cpr_storage_data_base) {
		kmem_free(i_cpr_storage_data_base,
		    mmu_ptob(i_cpr_storage_data_sz));
		i_cpr_storage_data_base = NULL;
		i_cpr_storage_data_sz = 0;
	}
}
예제 #10
0
static void
i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
{
	size_t	len = mmu_ptob(npages);

	/* Initialize the descriptors to something impossible. */
	bzero(descp, len);
#ifdef	DEBUG
	/*
	 * This condition is tested by an ASSERT
	 */
	for (; descp < end; descp++)
		descp->csd_dirty_spfn = (uint_t)-1;
#endif
}
예제 #11
0
/*
 * Take a retired page off the retired-pages vnode and clear the toxic flags.
 * If "free" is nonzero, lock it and put it back on the freelist. If "free"
 * is zero, the caller already holds SE_EXCL lock so we simply unretire it
 * and don't do anything else with it.
 *
 * Any unretire messages are printed from this routine.
 *
 * Returns 0 if page pp was unretired; else an error code.
 */
int
page_unretire_pp(page_t *pp, int free)
{
	/*
	 * To be retired, a page has to be hashed onto the retired_pages vnode
	 * and have PR_RETIRED set in p_toxic.
	 */
	if (free == 0 || page_try_reclaim_lock(pp, SE_EXCL, SE_RETIRED)) {
		ASSERT(PAGE_EXCL(pp));
		PR_DEBUG(prd_ulocked);
		if (!PP_RETIRED(pp)) {
			PR_DEBUG(prd_unotretired);
			page_unlock(pp);
			return (page_retire_done(pp, PRD_UNR_NOT));
		}

		PR_MESSAGE(CE_NOTE, 1, "unretiring retired"
		    " page 0x%08x.%08x", mmu_ptob((uint64_t)pp->p_pagenum));
		if (pp->p_toxic & PR_FMA) {
			PR_DECR_KSTAT(pr_fma);
		} else if (pp->p_toxic & PR_UE) {
			PR_DECR_KSTAT(pr_ue);
		} else {
			PR_DECR_KSTAT(pr_mce);
		}
		page_clrtoxic(pp, PR_ALLFLAGS);

		if (free) {
			PR_DEBUG(prd_udestroy);
			page_destroy(pp, 0);
		} else {
			PR_DEBUG(prd_uhashout);
			page_hashout(pp, NULL);
		}

		mutex_enter(&freemem_lock);
		availrmem++;
		mutex_exit(&freemem_lock);

		PR_DEBUG(prd_uunretired);
		PR_DECR_KSTAT(pr_retired);
		PR_INCR_KSTAT(pr_unretired);
		return (page_retire_done(pp, PRD_UNR_SUCCESS));
	}
	PR_DEBUG(prd_unotlocked);
	return (page_retire_done(pp, PRD_UNR_CANTLOCK));
}
예제 #12
0
/*
 * Estimate how much memory we will need to save
 * the sensitive pages with compression.
 */
static caddr_t
i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
{
	pgcnt_t alloc_pcnt, last_pcnt;
	caddr_t addr;
	char *str;

	str = "i_cpr_storage_data_alloc:";
	if (retry_cnt == 0) {
		/*
		 * common compression ratio is about 3:1
		 * initial storage allocation is estimated at 40%
		 * to cover the majority of cases
		 */
		alloc_pcnt = INITIAL_ALLOC_PCNT;
		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
		CPR_DEBUG(CPR_DEBUG7, "%s sensitive pages: %ld\n", str, pages);
		CPR_DEBUG(CPR_DEBUG7,
		    "%s initial est pages: %ld, alloc %ld%%\n",
		    str, *alloc_pages, alloc_pcnt);
	} else {
		/*
		 * calculate the prior compression percentage (x100)
		 * from the last attempt to save sensitive pages
		 */
		ASSERT(sensitive_pages_saved != 0);
		last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
		    sensitive_pages_saved;
		CPR_DEBUG(CPR_DEBUG7, "%s last ratio %ld%%\n", str, last_pcnt);

		/*
		 * new estimated storage size is based on
		 * the larger ratio + 5% for each retry:
		 * pages * (last + [5%, 10%])
		 */
		alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
		    (retry_cnt * 5);
		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
		CPR_DEBUG(CPR_DEBUG7, "%s Retry est pages: %ld, alloc %ld%%\n",
		    str, *alloc_pages, alloc_pcnt);
	}

	addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
	CPR_DEBUG(CPR_DEBUG7, "%s alloc %ld pages\n", str, *alloc_pages);
	return (addr);
}
예제 #13
0
/*
 * From a machine address, find the corresponding pseudo-physical address.
 * Pseudo-physical address are contiguous and run from mfn_base in each VM.
 * Machine addresses are the real underlying hardware addresses.
 * These are needed for page table entries. Note that this routine is
 * poorly protected. A bad value of "ma" will cause a page fault.
 */
paddr_t
ma_to_pa(maddr_t ma)
{
	ulong_t pgoff = ma & MMU_PAGEOFFSET;
	ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
	paddr_t pa;

	if (pfn >= xen_info->nr_pages)
		return (-(paddr_t)1);
	pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
#ifdef DEBUG
	if (ma != pa_to_ma(pa))
		dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
		    "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
#endif
	return (pa);
}
예제 #14
0
static x86pte_t
get_pte(hat_t *hat, htable_t *htable, uintptr_t addr)
{
	x86pte_t buf;

	if (htable->ht_flags & HTABLE_COPIED) {
		uintptr_t ptr = (uintptr_t)hat->hat_copied_ptes;
		ptr += va2entry(htable, addr) << mmu.pte_size_shift;
		return (*(x86pte_t *)ptr);
	}

	paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
	paddr += va2entry(htable, addr) << mmu.pte_size_shift;

	if ((mdb_pread(&buf, mmu.pte_size, paddr)) == mmu.pte_size)
		return (buf);

	return (0);
}
예제 #15
0
파일: xpvtap.c 프로젝트: pcd1193182/openzfs
static void
xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
{
	blkif_request_t *req;
	struct seg *seg;
	struct as *as;
	caddr_t uaddr;
	int e;


	as = state->bt_map.um_as;
	if (as == NULL) {
		return;
	}

	/* get a copy of the original request */
	req = &state->bt_map.um_outstanding_reqs[uid];

	/* unmap the grefs for this request */
	if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
	    (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
	    (req->nr_segments != 0)) {
		uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
		AS_LOCK_ENTER(as, RW_READER);
		seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
		if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
		    (seg->s_base + seg->s_size))) {
			AS_LOCK_EXIT(as);
			xpvtap_rs_free(state->bt_map.um_rs, uid);
			return;
		}

		e = segmf_release_grefs(seg, uaddr, req->nr_segments);
		if (e != 0) {
			cmn_err(CE_WARN, "unable to release grefs");
		}

		AS_LOCK_EXIT(as);
	}

	/* free up the user ring id */
	xpvtap_rs_free(state->bt_map.um_rs, uid);
}
예제 #16
0
caddr_t
psm_map_phys_new(paddr_t addr, size_t len, int prot)
{
	uint_t pgoffset;
	paddr_t base;
	pgcnt_t npages;
	caddr_t cvaddr;

	if (len == 0)
		return (0);

	pgoffset = addr & MMU_PAGEOFFSET;
	base = addr - pgoffset;
	npages = mmu_btopr(len + pgoffset);
	cvaddr = device_arena_alloc(ptob(npages), VM_NOSLEEP);
	if (cvaddr == NULL)
		return (0);
	hat_devload(kas.a_hat, cvaddr, mmu_ptob(npages), mmu_btop(base),
	    prot, HAT_LOAD_LOCK);
	return (cvaddr + pgoffset);
}
예제 #17
0
/*
 * Prints any page retire messages to the user, and decides what
 * error code is appropriate for the condition reported.
 */
static int
page_retire_done(page_t *pp, int code)
{
	page_retire_op_t *prop;
	uint64_t	pa = 0;
	int		i;

	if (pp != NULL) {
		pa = mmu_ptob((uint64_t)pp->p_pagenum);
	}

	prop = NULL;
	for (i = 0; page_retire_ops[i].pr_key != PRD_INVALID_KEY; i++) {
		if (page_retire_ops[i].pr_key == code) {
			prop = &page_retire_ops[i];
			break;
		}
	}

#ifdef	DEBUG
	if (page_retire_ops[i].pr_key == PRD_INVALID_KEY) {
		cmn_err(CE_PANIC, "page_retire_done: Invalid opcode %d", code);
	}
#endif

	ASSERT(prop->pr_key == code);

	prop->pr_count++;

	PR_MESSAGE(CE_NOTE, prop->pr_msglvl, prop->pr_message, pa);
	if (pp != NULL) {
		page_settoxic(pp, PR_MSG);
	}

	return (prop->pr_retval);
}
예제 #18
0
static x86pte_t
get_pte(hat_t *hat, htable_t *htable, uintptr_t addr)
{
	x86pte_t buf;
	x86pte32_t *pte32 = (x86pte32_t *)&buf;
	size_t len;

	if (htable->ht_flags & HTABLE_VLP) {
		uintptr_t ptr = (uintptr_t)hat->hat_vlp_ptes;
		ptr += va2entry(htable, addr) << mmu.pte_size_shift;
		len = mdb_vread(&buf, mmu.pte_size, ptr);
	} else {
		paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
		paddr += va2entry(htable, addr) << mmu.pte_size_shift;
		len = mdb_pread(&buf, mmu.pte_size, paddr);
	}

	if (len != mmu.pte_size)
		return (0);

	if (mmu.pte_size == sizeof (x86pte_t))
		return (buf);
	return (*pte32);
}
예제 #19
0
/*
 * We're done using the mapping area; release virtual space
 */
void
i_cpr_map_destroy(void)
{
	vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
	cpr_vaddr = NULL;
}
예제 #20
0
/*
 * Create multiboot info structure (mbi) base on the saved mbi.
 * Recalculate values of the pointer type fields in the data
 * structure based on the new starting physical address of the
 * data structure.
 */
static int
fastboot_build_mbi(char *mdep, fastboot_info_t *nk)
{
	mb_module_t	*mbp;
	multiboot_info_t	*mbi;	/* pointer to multiboot structure */
	uintptr_t	start_addr_va;	/* starting VA of mbi */
	uintptr_t	start_addr_pa;	/* starting PA of mbi */
	size_t		offs = 0;	/* offset from the starting address */
	size_t		arglen;		/* length of the command line arg */
	size_t		size;	/* size of the memory reserved for mbi */
	size_t		mdnsz;	/* length of the boot archive name */

	/*
	 * If mdep is not NULL or empty, use the length of mdep + 1
	 * (for NULL terminating) as the length of the new command
	 * line; else use the saved command line length as the
	 * length for the new command line.
	 */
	if (mdep != NULL && strlen(mdep) != 0) {
		arglen = strlen(mdep) + 1;
	} else {
		arglen = saved_cmdline_len;
	}

	/*
	 * Allocate memory for the new multiboot info structure (mbi).
	 * If we have reserved memory for mbi but it's not enough,
	 * free it and reallocate.
	 */
	size = PAGESIZE + P2ROUNDUP(arglen, PAGESIZE);
	if (nk->fi_mbi_size && nk->fi_mbi_size < size) {
		contig_free((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
		nk->fi_mbi_size = 0;
	}

	if (nk->fi_mbi_size == 0) {
		if ((nk->fi_new_mbi_va =
		    (uintptr_t)contig_alloc(size, &fastboot_below_1G_dma_attr,
		    PAGESIZE, 0)) == NULL) {
			cmn_err(CE_NOTE, fastboot_enomem_msg,
			    (uint64_t)size, "1G");
			return (-1);
		}
		/*
		 * fi_mbi_size must be set after the allocation succeeds
		 * as it's used to determine how much memory to free.
		 */
		nk->fi_mbi_size = size;
	}

	/*
	 * Initalize memory
	 */
	bzero((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);

	/*
	 * Get PA for the new mbi
	 */
	start_addr_va = nk->fi_new_mbi_va;
	start_addr_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
	    (caddr_t)start_addr_va));
	nk->fi_new_mbi_pa = (paddr_t)start_addr_pa;

	/*
	 * Populate the rest of the fields in the data structure
	 */

	/*
	 * Copy from the saved mbi to preserve all non-pointer type fields.
	 */
	mbi = (multiboot_info_t *)start_addr_va;
	bcopy(&saved_mbi, mbi, sizeof (*mbi));

	/*
	 * Recalculate mods_addr.  Set mod_start and mod_end based on
	 * the physical address of the new boot archive.  Set mod_name
	 * to the name of the new boto archive.
	 */
	offs += sizeof (multiboot_info_t);
	mbi->mods_addr = start_addr_pa + offs;
	mbp = (mb_module_t *)(start_addr_va + offs);
	mbp->mod_start = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_dest_pa;
	mbp->mod_end = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_next_pa;

	offs += sizeof (mb_module_t);
	mdnsz = strlen(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE]) + 1;
	bcopy(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE],
	    (void *)(start_addr_va + offs), mdnsz);
	mbp->mod_name = start_addr_pa + offs;
	mbp->reserved = 0;

	/*
	 * Make sure the offset is 16-byte aligned to avoid unaligned access.
	 */
	offs += mdnsz;
	offs = P2ROUNDUP_TYPED(offs, 16, size_t);

	/*
	 * Recalculate mmap_addr
	 */
	mbi->mmap_addr = start_addr_pa + offs;
	bcopy((void *)(uintptr_t)saved_mmap, (void *)(start_addr_va + offs),
	    saved_mbi.mmap_length);
	offs += saved_mbi.mmap_length;

	/*
	 * Recalculate drives_addr
	 */
	mbi->drives_addr = start_addr_pa + offs;
	bcopy((void *)(uintptr_t)saved_drives, (void *)(start_addr_va + offs),
	    saved_mbi.drives_length);
	offs += saved_mbi.drives_length;

	/*
	 * Recalculate the address of cmdline.  Set cmdline to contain the
	 * new boot argument.
	 */
	mbi->cmdline = start_addr_pa + offs;

	if (mdep != NULL && strlen(mdep) != 0) {
		bcopy(mdep, (void *)(start_addr_va + offs), arglen);
	} else {
		bcopy((void *)saved_cmdline, (void *)(start_addr_va + offs),
		    arglen);
	}

	/* clear fields and flags that are not copied */
	bzero(&mbi->config_table,
	    sizeof (*mbi) - offsetof(multiboot_info_t, config_table));
	mbi->flags &= ~(MB_INFO_CONFIG_TABLE | MB_INFO_BOOT_LOADER_NAME |
	    MB_INFO_APM_TABLE | MB_INFO_VIDEO_INFO);

	return (0);
}
예제 #21
0
/*
 * This function performs the following tasks:
 * - Read the sizes of the new kernel and boot archive.
 * - Allocate memory for the new kernel and boot archive.
 * - Allocate memory for page tables necessary for mapping the memory
 *   allocated for the files.
 * - Read the new kernel and boot archive into memory.
 * - Map in the fast reboot switcher.
 * - Load the fast reboot switcher to FASTBOOT_SWTCH_PA.
 * - Build the new multiboot_info structure
 * - Build page tables for the low 1G of physical memory.
 * - Mark the data structure as valid if all steps have succeeded.
 */
void
fastboot_load_kernel(char *mdep)
{
	void		*buf = NULL;
	int		i;
	fastboot_file_t	*fb;
	uint32_t	dboot_start_offset;
	char		kern_bootpath[OBP_MAXPATHLEN];
	extern uintptr_t postbootkernelbase;
	uintptr_t	saved_kernelbase;
	int		bootpath_len = 0;
	int		is_failsafe = 0;
	int		is_retry = 0;
	uint64_t	end_addr;

	if (!fastreboot_capable)
		return;

	if (newkernel.fi_valid)
		fastboot_free_newkernel(&newkernel);

	saved_kernelbase = postbootkernelbase;

	postbootkernelbase = 0;

	/*
	 * Initialize various HAT related fields in the data structure
	 */
	fastboot_init_fields(&newkernel);

	bzero(kern_bootpath, OBP_MAXPATHLEN);

	/*
	 * Process the boot argument
	 */
	bzero(fastboot_args, OBP_MAXPATHLEN);
	fastboot_parse_mdep(mdep, kern_bootpath, &bootpath_len, fastboot_args);

	/*
	 * Make sure we get the null character
	 */
	bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_UNIX],
	    bootpath_len);
	bcopy(kern_bootfile,
	    &fastboot_filename[FASTBOOT_NAME_UNIX][bootpath_len],
	    strlen(kern_bootfile) + 1);

	bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE],
	    bootpath_len);

	if (bcmp(kern_bootfile, FAILSAFE_BOOTFILE32,
	    (sizeof (FAILSAFE_BOOTFILE32) - 1)) == 0 ||
	    bcmp(kern_bootfile, FAILSAFE_BOOTFILE64,
	    (sizeof (FAILSAFE_BOOTFILE64) - 1)) == 0) {
		is_failsafe = 1;
	}

load_kernel_retry:
	/*
	 * Read in unix and boot_archive
	 */
	end_addr = DBOOT_ENTRY_ADDRESS;
	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
		struct _buf	*file;
		uintptr_t	va;
		uint64_t	fsize;
		size_t		fsize_roundup, pt_size;
		int		page_index;
		uintptr_t	offset;
		ddi_dma_attr_t dma_attr = fastboot_dma_attr;


		dprintf("fastboot_filename[%d] = %s\n",
		    i, fastboot_filename[i]);

		if ((file = kobj_open_file(fastboot_filename[i])) ==
		    (struct _buf *)-1) {
			cmn_err(CE_NOTE, "!Fastboot: Couldn't open %s",
			    fastboot_filename[i]);
			goto err_out;
		}

		if (kobj_get_filesize(file, &fsize) != 0) {
			cmn_err(CE_NOTE,
			    "!Fastboot: Couldn't get filesize for %s",
			    fastboot_filename[i]);
			goto err_out;
		}

		fsize_roundup = P2ROUNDUP_TYPED(fsize, PAGESIZE, size_t);

		/*
		 * Where the files end in physical memory after being
		 * relocated by the fast boot switcher.
		 */
		end_addr += fsize_roundup;
		if (end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_hi) {
			cmn_err(CE_NOTE, "!Fastboot: boot archive is too big");
			goto err_out;
		}

		/*
		 * Adjust dma_attr_addr_lo so that the new kernel and boot
		 * archive will not be overridden during relocation.
		 */
		if (end_addr > fastboot_dma_attr.dma_attr_addr_lo ||
		    end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_lo) {

			if (is_retry) {
				/*
				 * If we have already tried and didn't succeed,
				 * just give up.
				 */
				cmn_err(CE_NOTE,
				    "!Fastboot: boot archive is too big");
				goto err_out;
			} else {
				/* Set the flag so we don't keep retrying */
				is_retry++;

				/* Adjust dma_attr_addr_lo */
				fastboot_dma_attr.dma_attr_addr_lo = end_addr;
				fastboot_below_1G_dma_attr.dma_attr_addr_lo =
				    end_addr;

				/*
				 * Free the memory we have already allocated
				 * whose physical addresses might not fit
				 * the new lo and hi constraints.
				 */
				fastboot_free_mem(&newkernel, end_addr);
				goto load_kernel_retry;
			}
		}


		if (!fastboot_contig)
			dma_attr.dma_attr_sgllen = (fsize / PAGESIZE) +
			    (((fsize % PAGESIZE) == 0) ? 0 : 1);

		if ((buf = contig_alloc(fsize, &dma_attr, PAGESIZE, 0))
		    == NULL) {
			cmn_err(CE_NOTE, fastboot_enomem_msg, fsize, "64G");
			goto err_out;
		}

		va = P2ROUNDUP_TYPED((uintptr_t)buf, PAGESIZE, uintptr_t);

		if (kobj_read_file(file, (char *)va, fsize, 0) < 0) {
			cmn_err(CE_NOTE, "!Fastboot: Couldn't read %s",
			    fastboot_filename[i]);
			goto err_out;
		}

		fb = &newkernel.fi_files[i];
		fb->fb_va = va;
		fb->fb_size = fsize;
		fb->fb_sectcnt = 0;

		pt_size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup);

		/*
		 * If we have reserved memory but it not enough, free it.
		 */
		if (fb->fb_pte_list_size && fb->fb_pte_list_size < pt_size) {
			contig_free((void *)fb->fb_pte_list_va,
			    fb->fb_pte_list_size);
			fb->fb_pte_list_size = 0;
		}

		if (fb->fb_pte_list_size == 0) {
			if ((fb->fb_pte_list_va =
			    (x86pte_t *)contig_alloc(pt_size,
			    &fastboot_below_1G_dma_attr, PAGESIZE, 0))
			    == NULL) {
				cmn_err(CE_NOTE, fastboot_enomem_msg,
				    (uint64_t)pt_size, "1G");
				goto err_out;
			}
			/*
			 * fb_pte_list_size must be set after the allocation
			 * succeeds as it's used to determine how much memory to
			 * free.
			 */
			fb->fb_pte_list_size = pt_size;
		}

		bzero((void *)(fb->fb_pte_list_va), fb->fb_pte_list_size);

		fb->fb_pte_list_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
		    (caddr_t)fb->fb_pte_list_va));

		for (page_index = 0, offset = 0; offset < fb->fb_size;
		    offset += PAGESIZE) {
			uint64_t paddr;

			paddr = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
			    (caddr_t)fb->fb_va + offset));

			ASSERT(paddr >= fastboot_dma_attr.dma_attr_addr_lo);

			/*
			 * Include the pte_bits so we don't have to make
			 * it in assembly.
			 */
			fb->fb_pte_list_va[page_index++] = (x86pte_t)
			    (paddr | pte_bits);
		}

		fb->fb_pte_list_va[page_index] = FASTBOOT_TERMINATE;

		if (i == FASTBOOT_UNIX) {
			Ehdr	*ehdr = (Ehdr *)va;
			int	j;

			/*
			 * Sanity checks:
			 */
			for (j = 0; j < SELFMAG; j++) {
				if (ehdr->e_ident[j] != ELFMAG[j]) {
					cmn_err(CE_NOTE, "!Fastboot: Bad ELF "
					    "signature");
					goto err_out;
				}
			}

			if (ehdr->e_ident[EI_CLASS] == ELFCLASS32 &&
			    ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
			    ehdr->e_machine == EM_386) {

				fb->fb_sectcnt = sizeof (fb->fb_sections) /
				    sizeof (fb->fb_sections[0]);

				if (fastboot_elf32_find_loadables((void *)va,
				    fsize, &fb->fb_sections[0],
				    &fb->fb_sectcnt, &dboot_start_offset) < 0) {
					cmn_err(CE_NOTE, "!Fastboot: ELF32 "
					    "program section failure");
					goto err_out;
				}

				if (fb->fb_sectcnt == 0) {
					cmn_err(CE_NOTE, "!Fastboot: No ELF32 "
					    "program sections found");
					goto err_out;
				}

				if (is_failsafe) {
					/* Failsafe boot_archive */
					bcopy(BOOTARCHIVE32_FAILSAFE,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE32_FAILSAFE));
				} else {
					bcopy(BOOTARCHIVE32,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE32));
				}

			} else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64 &&
			    ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
			    ehdr->e_machine == EM_AMD64) {

				if (fastboot_elf64_find_dboot_load_offset(
				    (void *)va, fsize, &dboot_start_offset)
				    != 0) {
					cmn_err(CE_NOTE, "!Fastboot: Couldn't "
					    "find ELF64 dboot entry offset");
					goto err_out;
				}

				if (!is_x86_feature(x86_featureset,
				    X86FSET_64) ||
				    !is_x86_feature(x86_featureset,
				    X86FSET_PAE)) {
					cmn_err(CE_NOTE, "Fastboot: Cannot "
					    "reboot to %s: "
					    "not a 64-bit capable system",
					    kern_bootfile);
					goto err_out;
				}

				if (is_failsafe) {
					/* Failsafe boot_archive */
					bcopy(BOOTARCHIVE64_FAILSAFE,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE64_FAILSAFE));
				} else {
					bcopy(BOOTARCHIVE64,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE64));
				}
			} else {
				cmn_err(CE_NOTE, "!Fastboot: Unknown ELF type");
				goto err_out;
			}

			fb->fb_dest_pa = DBOOT_ENTRY_ADDRESS -
			    dboot_start_offset;

			fb->fb_next_pa = DBOOT_ENTRY_ADDRESS + fsize_roundup;
		} else {
			fb->fb_dest_pa = newkernel.fi_files[i - 1].fb_next_pa;
			fb->fb_next_pa = fb->fb_dest_pa + fsize_roundup;
		}

		kobj_close_file(file);

	}

	/*
	 * Add the function that will switch us to 32-bit protected mode
	 */
	fb = &newkernel.fi_files[FASTBOOT_SWTCH];
	fb->fb_va = fb->fb_dest_pa = FASTBOOT_SWTCH_PA;
	fb->fb_size = MMU_PAGESIZE;

	hat_devload(kas.a_hat, (caddr_t)fb->fb_va,
	    MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
	    PROT_READ | PROT_WRITE | PROT_EXEC,
	    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);

	/*
	 * Build the new multiboot_info structure
	 */
	if (fastboot_build_mbi(fastboot_args, &newkernel) != 0) {
		goto err_out;
	}

	/*
	 * Build page table for low 1G physical memory. Use big pages.
	 * Allocate 4 (5 for amd64) pages for the page tables.
	 *    1 page for PML4 (amd64)
	 *    1 page for Page-Directory-Pointer Table
	 *    2 pages for Page Directory
	 *    1 page for Page Table.
	 * The page table entry will be rewritten to map the physical
	 * address as we do the copying.
	 */
	if (newkernel.fi_has_pae) {
#ifdef	__amd64
		size_t size = MMU_PAGESIZE * 5;
#else
		size_t size = MMU_PAGESIZE * 4;
#endif	/* __amd64 */

		if (newkernel.fi_pagetable_size && newkernel.fi_pagetable_size
		    < size) {
			contig_free((void *)newkernel.fi_pagetable_va,
			    newkernel.fi_pagetable_size);
			newkernel.fi_pagetable_size = 0;
		}

		if (newkernel.fi_pagetable_size == 0) {
			if ((newkernel.fi_pagetable_va = (uintptr_t)
			    contig_alloc(size, &fastboot_below_1G_dma_attr,
			    MMU_PAGESIZE, 0)) == NULL) {
				cmn_err(CE_NOTE, fastboot_enomem_msg,
				    (uint64_t)size, "1G");
				goto err_out;
			}
			/*
			 * fi_pagetable_size must be set after the allocation
			 * succeeds as it's used to determine how much memory to
			 * free.
			 */
			newkernel.fi_pagetable_size = size;
		}

		bzero((void *)(newkernel.fi_pagetable_va), size);

		newkernel.fi_pagetable_pa =
		    mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
		    (caddr_t)newkernel.fi_pagetable_va));

		newkernel.fi_last_table_pa = newkernel.fi_pagetable_pa +
		    size - MMU_PAGESIZE;

		newkernel.fi_next_table_va = newkernel.fi_pagetable_va +
		    MMU_PAGESIZE;
		newkernel.fi_next_table_pa = newkernel.fi_pagetable_pa +
		    MMU_PAGESIZE;

		fastboot_build_pagetables(&newkernel);
	}


	/* Generate MD5 checksums */
	fastboot_cksum_generate(&newkernel);

	/* Mark it as valid */
	newkernel.fi_valid = 1;
	newkernel.fi_magic = FASTBOOT_MAGIC;

	postbootkernelbase = saved_kernelbase;
	return;

err_out:
	postbootkernelbase = saved_kernelbase;
	newkernel.fi_valid = 0;
	fastboot_free_newkernel(&newkernel);
}
예제 #22
0
/*ARGSUSED2*/
int
do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
{
	privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
	privcmd_mmap_entry_t *umme;
	struct as *as = curproc->p_as;
	struct seg *seg;
	int i, error = 0;

	if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
		return (EFAULT);

	DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
	    privcmd_mmap_entry_t *, mmc->entry);

	if (mmc->dom == DOMID_SELF) {
		error = ENOTSUP;	/* Too paranoid? */
		goto done;
	}

	for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
		privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
		caddr_t addr;

		if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
			error = EFAULT;
			break;
		}

		DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
		    ulong_t, mme->npages);

		if (mme->mfn == MFN_INVALID) {
			error = EINVAL;
			break;
		}

		addr = (caddr_t)mme->va;

		/*
		 * Find the segment we want to mess with, then add
		 * the mfn range to the segment.
		 */
		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
		if ((seg = as_findseg(as, addr, 0)) == NULL ||
		    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
			error = EINVAL;
		else
			error = segmf_add_mfns(seg, addr,
			    mme->mfn, mme->npages, mmc->dom);
		AS_LOCK_EXIT(as, &as->a_lock);

		if (error != 0)
			break;
	}

done:
	DTRACE_XPV1(mmap__end, int, error);

	return (error);
}
예제 #23
0
/*
 * This routine allocates space to save the sensitive kernel pages,
 * i.e. kernel data nucleus, kvalloc and kvseg segments.
 * It's assumed that those segments are the only areas that can be
 * contaminated by memory allocations during statefile dumping.
 * The space allocated here contains:
 * 	A list of descriptors describing the saved sensitive pages.
 * 	The storage area for saving the compressed sensitive kernel pages.
 * Since storage pages are allocated from segkmem, they need to be
 * excluded when saving.
 */
int
i_cpr_save_sensitive_kpages(void)
{
	static const char pages_fmt[] = "\n%s %s allocs\n"
	    "	spages %ld, vpages %ld, diff %ld\n";
	int retry_cnt;
	int error = 0;
	pgcnt_t pages, spages, vpages;
	caddr_t	addr;
	char *str;

	/*
	 * Tag sensitive kpages. Allocate space for storage descriptors
	 * and storage data area based on the resulting bitmaps.
	 * Note: The storage space will be part of the sensitive
	 * segment, so we need to tag kpages here before the storage
	 * is actually allocated just so their space won't be accounted
	 * for. They will not be part of the statefile although those
	 * pages will be claimed by cprboot.
	 */
	cpr_clear_bitmaps();

	spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
	vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
	pages = spages - vpages;

	str = "i_cpr_save_sensitive_kpages:";
	CPR_DEBUG(CPR_DEBUG7, pages_fmt, "before", str, spages, vpages, pages);

	/*
	 * Allocate space to save the clean sensitive kpages
	 */
	for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
		/*
		 * Alloc on first pass or realloc if we are retrying because
		 * of insufficient storage for sensitive pages
		 */
		if (retry_cnt == 0 || error == ENOMEM) {
			if (i_cpr_storage_data_base) {
				kmem_free(i_cpr_storage_data_base,
				    mmu_ptob(i_cpr_storage_data_sz));
				i_cpr_storage_data_base = NULL;
				i_cpr_storage_data_sz = 0;
			}
			addr = i_cpr_storage_data_alloc(pages,
			    &i_cpr_storage_data_sz, retry_cnt);
			if (addr == NULL) {
				CPR_DEBUG(CPR_DEBUG7,
				    "\n%s can't allocate data storage space!\n",
				    str);
				return (ENOMEM);
			}
			i_cpr_storage_data_base = addr;
			i_cpr_storage_data_end =
			    addr + mmu_ptob(i_cpr_storage_data_sz);
		}

		/*
		 * Allocate on first pass, only realloc if retry is because of
		 * insufficient descriptors, but reset contents on each pass
		 * (desc_alloc resets contents as well)
		 */
		if (retry_cnt == 0 || error == -1) {
			error = i_cpr_storage_desc_alloc(
			    &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
			    &i_cpr_storage_desc_end, retry_cnt);
			if (error != 0)
				return (error);
		} else {
			i_cpr_storage_desc_init(i_cpr_storage_desc_base,
			    i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
		}

		/*
		 * We are ready to save the sensitive kpages to storage.
		 * We cannot trust what's tagged in the bitmaps anymore
		 * after storage allocations.  Clear up the bitmaps and
		 * retag the sensitive kpages again.  The storage pages
		 * should be untagged.
		 */
		cpr_clear_bitmaps();

		spages =
		    i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
		vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);

		CPR_DEBUG(CPR_DEBUG7, pages_fmt, "after ", str,
		    spages, vpages, spages - vpages);

		/*
		 * Returns 0 on success, -1 if too few descriptors, and
		 * ENOMEM if not enough space to save sensitive pages
		 */
		CPR_DEBUG(CPR_DEBUG1, "compressing pages to storage...\n");
		error = i_cpr_save_to_storage();
		if (error == 0) {
			/* Saving to storage succeeded */
			CPR_DEBUG(CPR_DEBUG1, "compressed %d pages\n",
			    sensitive_pages_saved);
			break;
		} else if (error == -1)
			CPR_DEBUG(CPR_DEBUG1, "%s too few descriptors\n", str);
	}
	if (error == -1)
		error = ENOMEM;
	return (error);
}
예제 #24
0
/*
 * Fill in the remaining CPU context and initialize it.
 */
static int
mp_set_cpu_context(vcpu_guest_context_t *vgc, cpu_t *cp)
{
	uint_t vec, iopl;

	vgc->flags = VGCF_IN_KERNEL;

	/*
	 * fpu_ctx we leave as zero; on first fault we'll store
	 * sse_initial into it anyway.
	 */

#if defined(__amd64)
	vgc->user_regs.cs = KCS_SEL | SEL_KPL;	/* force to ring 3 */
#else
	vgc->user_regs.cs = KCS_SEL;
#endif
	vgc->user_regs.ds = KDS_SEL;
	vgc->user_regs.es = KDS_SEL;
	vgc->user_regs.ss = KDS_SEL;
	vgc->kernel_ss = KDS_SEL;

	/*
	 * Allow I/O privilege level for Dom0 kernel.
	 */
	if (DOMAIN_IS_INITDOMAIN(xen_info))
		iopl = (PS_IOPL & 0x1000); /* ring 1 */
	else
		iopl = 0;

#if defined(__amd64)
	vgc->user_regs.fs = 0;
	vgc->user_regs.gs = 0;
	vgc->user_regs.rflags = F_OFF | iopl;
#elif defined(__i386)
	vgc->user_regs.fs = KFS_SEL;
	vgc->user_regs.gs = KGS_SEL;
	vgc->user_regs.eflags = F_OFF | iopl;
	vgc->event_callback_cs = vgc->user_regs.cs;
	vgc->failsafe_callback_cs = vgc->user_regs.cs;
#endif

	/*
	 * Initialize the trap_info_t from the IDT
	 */
#if !defined(__lint)
	ASSERT(NIDT == sizeof (vgc->trap_ctxt) / sizeof (vgc->trap_ctxt[0]));
#endif
	for (vec = 0; vec < NIDT; vec++) {
		trap_info_t *ti = &vgc->trap_ctxt[vec];

		if (xen_idt_to_trap_info(vec,
		    &cp->cpu_m.mcpu_idt[vec], ti) == 0) {
			ti->cs = KCS_SEL;
			ti->vector = vec;
		}
	}

	/*
	 * No LDT
	 */

	/*
	 * (We assert in various places that the GDT is (a) aligned on a
	 * page boundary and (b) one page long, so this really should fit..)
	 */
#ifdef CRASH_XEN
	vgc->gdt_frames[0] = pa_to_ma(mmu_btop(cp->cpu_m.mcpu_gdtpa));
#else
	vgc->gdt_frames[0] = pfn_to_mfn(mmu_btop(cp->cpu_m.mcpu_gdtpa));
#endif
	vgc->gdt_ents = NGDT;

	vgc->ctrlreg[0] = CR0_ENABLE_FPU_FLAGS(getcr0());

#if defined(__i386)
	if (mmu.pae_hat)
		vgc->ctrlreg[3] =
		    xen_pfn_to_cr3(pfn_to_mfn(kas.a_hat->hat_htable->ht_pfn));
	else
#endif
		vgc->ctrlreg[3] =
		    pa_to_ma(mmu_ptob(kas.a_hat->hat_htable->ht_pfn));

	vgc->ctrlreg[4] = getcr4();

	vgc->event_callback_eip = (uintptr_t)xen_callback;
	vgc->failsafe_callback_eip = (uintptr_t)xen_failsafe_callback;
	vgc->flags |= VGCF_failsafe_disables_events;

#if defined(__amd64)
	/*
	 * XXPV should this be moved to init_cpu_syscall?
	 */
	vgc->syscall_callback_eip = (uintptr_t)sys_syscall;
	vgc->flags |= VGCF_syscall_disables_events;

	ASSERT(vgc->user_regs.gs == 0);
	vgc->gs_base_kernel = (uintptr_t)cp;
#endif

	return (xen_vcpu_initialize(cp->cpu_id, vgc));
}
예제 #25
0
/*
 * Create a guest virtual cpu context so that the virtual cpu
 * springs into life in the domain just about to call mp_startup()
 *
 * Virtual CPUs must be initialized once in the lifetime of the domain;
 * after that subsequent attempts to start them will fail with X_EEXIST.
 *
 * Thus 'alloc' -really- creates and initializes the virtual
 * CPU context just once. Once the initialisation succeeds, we never
 * free it, nor the regular cpu_t to which it refers.
 */
void *
mach_cpucontext_alloc(struct cpu *cp)
{
	kthread_t *tp = cp->cpu_thread;
	vcpu_guest_context_t vgc;

	int err = 1;

	/*
	 * First, augment the incoming cpu structure
	 * - vcpu pointer reference
	 * - pending event storage area
	 * - physical address of GDT
	 */
	cp->cpu_m.mcpu_vcpu_info =
	    &HYPERVISOR_shared_info->vcpu_info[cp->cpu_id];
	cp->cpu_m.mcpu_evt_pend = kmem_zalloc(
	    sizeof (struct xen_evt_data), KM_SLEEP);
	cp->cpu_m.mcpu_gdtpa =
	    mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)cp->cpu_gdt));

	if ((err = xen_gdt_setprot(cp, PROT_READ)) != 0)
		goto done;

	/*
	 * Now set up the vcpu context so that we can start this vcpu
	 * in the kernel at tp->t_pc (mp_startup).  Note that the
	 * thread will thread_exit() shortly after performing the
	 * initialization; in particular, we will *never* take a
	 * privilege transition on this thread.
	 */

	bzero(&vgc, sizeof (vgc));

#ifdef __amd64
	vgc.user_regs.rip = tp->t_pc;
	vgc.user_regs.rsp = tp->t_sp;
	vgc.user_regs.rbp = tp->t_sp - 2 * sizeof (greg_t);
#else
	vgc.user_regs.eip = tp->t_pc;
	vgc.user_regs.esp = tp->t_sp;
	vgc.user_regs.ebp = tp->t_sp - 2 * sizeof (greg_t);
#endif
	/*
	 * XXPV	Fix resume, if Russ didn't already fix it.
	 *
	 * Note that resume unconditionally puts t->t_stk + sizeof (regs)
	 * into kernel_sp via HYPERVISOR_stack_switch. This anticipates
	 * that only lwps take traps that switch to the kernel stack;
	 * part of creating an lwp adjusts the stack by subtracting
	 * sizeof (struct regs) off t_stk.
	 *
	 * The more interesting question is, why do we do all the work
	 * of a fully fledged lwp for a plain thread?  In particular
	 * we don't have to call HYPERVISOR_stack_switch for lwp-less threads
	 * or futz with the LDT.  This should probably all be done with
	 * an lwp context operator to keep pure thread context switch fast.
	 */
	vgc.kernel_sp = (ulong_t)tp->t_stk;

	err = mp_set_cpu_context(&vgc, cp);

done:
	if (err) {
		mach_cpucontext_free(cp, NULL, err);
		return (NULL);
	}
	return (cp);
}
예제 #26
0
void
xen_hvm_init(void)
{
	struct cpuid_regs cp;
	uint32_t xen_signature[4], base;
	char *xen_str;
	struct xen_add_to_physmap xatp;
	xen_capabilities_info_t caps;
	pfn_t pfn;
	uint64_t msrval, val;
	extern int apix_enable;

	if (xen_hvm_inited != 0)
		return;

	xen_hvm_inited = 1;

	/*
	 * Xen's pseudo-cpuid function returns a string representing
	 * the Xen signature in %ebx, %ecx, and %edx.
	 * Loop over the base values, since it may be different if
	 * the hypervisor has hyper-v emulation switched on.
	 *
	 * %eax contains the maximum supported cpuid function.
	 */
	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
		cp.cp_eax = base;
		(void) __cpuid_insn(&cp);
		xen_signature[0] = cp.cp_ebx;
		xen_signature[1] = cp.cp_ecx;
		xen_signature[2] = cp.cp_edx;
		xen_signature[3] = 0;
		xen_str = (char *)xen_signature;
		if (strcmp("XenVMMXenVMM", xen_str)  == 0 &&
		    cp.cp_eax >= (base + 2))
			break;
	}
	if (base >= 0x40010000)
		return;

	/*
	 * cpuid function at base + 1 returns the Xen version in %eax.  The
	 * top 16 bits are the major version, the bottom 16 are the minor
	 * version.
	 */
	cp.cp_eax = base + 1;
	(void) __cpuid_insn(&cp);
	xen_major = cp.cp_eax >> 16;
	xen_minor = cp.cp_eax & 0xffff;

	/*
	 * Below version 3.1 we can't do anything special as a HVM domain;
	 * the PV drivers don't work, many hypercalls are not available,
	 * etc.
	 */
	if (xen_major < 3 || (xen_major == 3 && xen_minor < 1))
		return;

	/*
	 * cpuid function at base + 2 returns information about the
	 * hypercall page.  %eax nominally contains the number of pages
	 * with hypercall code, but according to the Xen guys, "I'll
	 * guarantee that remains one forever more, so you can just
	 * allocate a single page and get quite upset if you ever see CPUID
	 * return more than one page."  %ebx contains an MSR we use to ask
	 * Xen to remap each page at a specific pfn.
	 */
	cp.cp_eax = base + 2;
	(void) __cpuid_insn(&cp);

	/*
	 * Let Xen know where we want the hypercall page mapped.  We
	 * already have a page allocated in the .text section to simplify
	 * the wrapper code.
	 */
	pfn = va_to_pfn(&hypercall_page);
	msrval = mmu_ptob(pfn);
	wrmsr(cp.cp_ebx, msrval);

	/* Fill in the xen_info data */
	xen_info = &__xen_info;
	(void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor);

	if (hvm_get_param(HVM_PARAM_STORE_PFN, &val) < 0)
		return;
	/*
	 * The first hypercall worked, so mark hypercalls as working.
	 */
	xen_hvm_features |= XEN_HVM_HYPERCALLS;

	xen_info->store_mfn = (mfn_t)val;
	if (hvm_get_param(HVM_PARAM_STORE_EVTCHN, &val) < 0)
		return;
	xen_info->store_evtchn = (mfn_t)val;

	/* Figure out whether the hypervisor is 32-bit or 64-bit.  */
	if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) {
		((char *)(caps))[sizeof (caps) - 1] = '\0';
		if (strstr(caps, "x86_64") != NULL)
			xen_bits = 64;
		else if (strstr(caps, "x86_32") != NULL)
			xen_bits = 32;
	}

	if (xen_bits < 0)
		return;
#ifdef __amd64
	ASSERT(xen_bits == 64);
#endif

	/*
	 * Allocate space for the shared_info page and tell Xen where it
	 * is.
	 */
	xen_shared_info_frame = va_to_pfn(&hypercall_shared_info_page);
	xatp.domid = DOMID_SELF;
	xatp.idx = 0;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.gpfn = xen_shared_info_frame;
	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0)
		return;

	HYPERVISOR_shared_info = (void *)&hypercall_shared_info_page;

	/*
	 * A working HVM tlb flush hypercall was introduced in Xen 3.3.
	 */
	if (xen_major > 3 || (xen_major == 3 && xen_minor >= 3))
		xen_hvm_features |= XEN_HVM_TLBFLUSH;

	/* FIXME Disable apix for the time being */
	apix_enable = 0;
}
예제 #27
0
파일: seg_mf.c 프로젝트: bahamas10/openzfs
/*ARGSUSED*/
static int
segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
    enum fault_type type, uint_t prot)
{
	struct segmf_data *data = seg->s_data;
	uint_t hat_flags = HAT_LOAD_NOCONSIST;
	mfn_t mfn;
	x86pte_t pte;
	segmf_map_t *map;
	uint_t idx;


	idx = seg_page(seg, addr);
	map = &data->map[idx];
	ASSERT(map->t_type == SEGMF_MAP_MFN);

	mfn = map->u.m.m_mfn;

	if (type == F_SOFTLOCK) {
		mutex_enter(&freemem_lock);
		data->softlockcnt++;
		mutex_exit(&freemem_lock);
		hat_flags |= HAT_LOAD_LOCK;
	} else
		hat_flags |= HAT_LOAD;

	if (segmf_faultpage_debug > 0) {
		uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
		    (void *)addr, data->domid, mfn, prot);
		segmf_faultpage_debug--;
	}

	/*
	 * Ask the HAT to load a throwaway mapping to page zero, then
	 * overwrite it with our foreign domain mapping. It gets removed
	 * later via hat_unload()
	 */
	hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
	    PROT_READ | HAT_UNORDERED_OK, hat_flags);

	pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
	if (prot & PROT_WRITE)
		pte |= PT_WRITABLE;

	if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
	    UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
		hat_flags = HAT_UNLOAD_UNMAP;

		if (type == F_SOFTLOCK) {
			hat_flags |= HAT_UNLOAD_UNLOCK;
			mutex_enter(&freemem_lock);
			data->softlockcnt--;
			mutex_exit(&freemem_lock);
		}

		hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
		return (FC_MAKE_ERR(EFAULT));
	}

	return (0);
}
예제 #28
0
/*
 * Report all hat's that either use PFN as a page table or that map the page.
 */
static int
do_report_maps(pfn_t pfn)
{
	struct hat *hatp;
	struct hat hat;
	htable_t *ht;
	htable_t htable;
	uintptr_t base;
	int h;
	int level;
	int entry;
	x86pte_t pte;
	x86pte_t buf;
	x86pte32_t *pte32 = (x86pte32_t *)&buf;
	physaddr_t paddr;
	size_t len;

	/*
	 * The hats are kept in a list with khat at the head.
	 */
	for (hatp = khat; hatp != NULL; hatp = hat.hat_next) {
		/*
		 * read the hat and its hash table
		 */
		if (mdb_vread(&hat, sizeof (hat), (uintptr_t)hatp) == -1) {
			mdb_warn("Couldn't read struct hat\n");
			return (DCMD_ERR);
		}

		/*
		 * read the htable hashtable
		 */
		paddr = 0;
		for (h = 0; h < hat.hat_num_hash; ++h) {
			if (mdb_vread(&ht, sizeof (htable_t *),
			    (uintptr_t)(hat.hat_ht_hash + h)) == -1) {
				mdb_warn("Couldn't read htable\n");
				return (DCMD_ERR);
			}
			for (; ht != NULL; ht = htable.ht_next) {
				if (mdb_vread(&htable, sizeof (htable_t),
				    (uintptr_t)ht) == -1) {
					mdb_warn("Couldn't read htable\n");
					return (DCMD_ERR);
				}

				/*
				 * only report kernel addresses once
				 */
				if (hatp != khat &&
				    htable.ht_vaddr >= kernelbase)
					continue;

				/*
				 * Is the PFN a pagetable itself?
				 */
				if (htable.ht_pfn == pfn) {
					mdb_printf("Pagetable for "
					    "hat=%p htable=%p\n", hatp, ht);
					continue;
				}

				/*
				 * otherwise, examine page mappings
				 */
				level = htable.ht_level;
				if (level > mmu.max_page_level)
					continue;
				paddr = mmu_ptob((physaddr_t)htable.ht_pfn);
				for (entry = 0;
				    entry < HTABLE_NUM_PTES(&htable);
				    ++entry) {

					base = htable.ht_vaddr + entry *
					    mmu.level_size[level];

					/*
					 * only report kernel addresses once
					 */
					if (hatp != khat &&
					    base >= kernelbase)
						continue;

					len = mdb_pread(&buf, mmu.pte_size,
					    paddr + entry * mmu.pte_size);
					if (len != mmu.pte_size)
						return (DCMD_ERR);
					if (mmu.pte_size == sizeof (x86pte_t))
						pte = buf;
					else
						pte = *pte32;

					if ((pte & PT_VALID) == 0)
						continue;
					if (level == 0 || !(pte & PT_PAGESIZE))
						pte &= PT_PADDR;
					else
						pte &= PT_PADDR_LGPG;
					if (mmu_btop(mdb_ma_to_pa(pte)) != pfn)
						continue;
					mdb_printf("hat=%p maps addr=%p\n",
					    hatp, (caddr_t)base);
				}
			}
		}
	}

done:
	return (DCMD_OK);
}
예제 #29
0
파일: vpm.c 프로젝트: GuangmingZang/maczfs
void
vpm_init()
{
	long  npages;
	struct vpmap *vpm;
	struct vpmfree *vpmflp;
	int i, ndx;
	extern void prefetch_smap_w(void *);

	if (!vpm_cache_enable) {
		return;
	}

	/*
	 * Set the size of the cache.
	 */
	vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100);
	if (vpm_cache_size < VPMAP_MINCACHE) {
		vpm_cache_size = VPMAP_MINCACHE;
	}

	/*
	 * Number of freelists.
	 */
	if (vpm_nfreelist == 0) {
		vpm_nfreelist = max_ncpus;
	} else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) {
		cmn_err(CE_WARN, "vpmap create : number of freelist "
		"vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus);
		vpm_nfreelist = 2 * max_ncpus;
	}

	/*
	 * Round it up to the next power of 2
	 */
	if (vpm_nfreelist & (vpm_nfreelist - 1)) {
		vpm_nfreelist = 1 << (highbit(vpm_nfreelist));
	}
	vpmd_freemsk = vpm_nfreelist - 1;

	/*
	 * Use a per cpu rotor index to spread the allocations evenly
	 * across the available vpm freelists.
	 */
	vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP);
	ndx = 0;
	for (i = 0; i < max_ncpus; i++) {

		vpmd_cpu[i].vfree_ndx = ndx;
		ndx = (ndx + 1) & vpmd_freemsk;
	}

	/*
	 * Allocate and initialize the freelist.
	 */
	vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree),
				KM_SLEEP);
	for (i = 0; i < vpm_nfreelist; i++) {

		vpmflp = &vpmd_free[i];
		/*
		 * Set up initial queue pointers. They will get flipped
		 * back and forth.
		 */
		vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ];
		vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ];
	}

	npages = mmu_btop(vpm_cache_size);


	/*
	 * Allocate and initialize the vpmap structs.
	 */
	vpmd_vpmap = kmem_zalloc(sizeof (struct vpmap) * npages, KM_SLEEP);
	for (vpm = vpmd_vpmap; vpm <= &vpmd_vpmap[npages - 1]; vpm++) {
		struct vpmfree *vpmflp;
		union vpm_freeq *releq;
		struct vpmap *vpmapf;

		/*
		 * Use prefetch as we have to walk thru a large number of
		 * these data structures. We just use the smap's prefetch
		 * routine as it does the same. This should work fine
		 * for x64(this needs to be modifed when enabled on sparc).
		 */
		prefetch_smap_w((void *)vpm);

		vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm);

		vpmflp = VPMAP2VMF(vpm);
		releq = vpmflp->vpm_releq;

		vpmapf = releq->vpmq_free;
		if (vpmapf == NULL) {
			releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
		} else {
			vpm->vpm_next = vpmapf;
			vpm->vpm_prev = vpmapf->vpm_prev;
			vpmapf->vpm_prev = vpm;
			vpm->vpm_prev->vpm_next = vpm;
			releq->vpmq_free = vpm->vpm_next;
		}

		/*
		 * Indicate that the vpmap is on the releq at start
		 */
		vpm->vpm_ndxflg = VPMRELEQ;
	}
}
예제 #30
0
static int
do_ptable_dcmd(pfn_t pfn)
{
	struct hat *hatp;
	struct hat hat;
	htable_t *ht;
	htable_t htable;
	uintptr_t base;
	int h;
	int level;
	int entry;
	uintptr_t pagesize;
	x86pte_t pte;
	x86pte_t buf;
	x86pte32_t *pte32 = (x86pte32_t *)&buf;
	physaddr_t paddr;
	size_t len;

	/*
	 * The hats are kept in a list with khat at the head.
	 */
	for (hatp = khat; hatp != NULL; hatp = hat.hat_next) {
		/*
		 * read the hat and its hash table
		 */
		if (mdb_vread(&hat, sizeof (hat), (uintptr_t)hatp) == -1) {
			mdb_warn("Couldn't read struct hat\n");
			return (DCMD_ERR);
		}

		/*
		 * read the htable hashtable
		 */
		paddr = 0;
		for (h = 0; h < hat.hat_num_hash; ++h) {
			if (mdb_vread(&ht, sizeof (htable_t *),
			    (uintptr_t)(hat.hat_ht_hash + h)) == -1) {
				mdb_warn("Couldn't read htable\n");
				return (DCMD_ERR);
			}
			for (; ht != NULL; ht = htable.ht_next) {
				if (mdb_vread(&htable, sizeof (htable_t),
				    (uintptr_t)ht) == -1) {
					mdb_warn("Couldn't read htable\n");
					return (DCMD_ERR);
				}

				/*
				 * Is this the PFN for this htable
				 */
				if (htable.ht_pfn == pfn)
					goto found_it;
			}
		}
	}

found_it:
	if (htable.ht_pfn == pfn) {
		mdb_printf("htable=%p\n", ht);
		level = htable.ht_level;
		base = htable.ht_vaddr;
		pagesize = mmu.level_size[level];
	} else {
		mdb_printf("Unknown pagetable - assuming level/addr 0");
		level = 0;	/* assume level == 0 for PFN */
		base = 0;
		pagesize = MMU_PAGESIZE;
	}

	paddr = mmu_ptob((physaddr_t)pfn);
	for (entry = 0; entry < mmu.ptes_per_table; ++entry) {
		len = mdb_pread(&buf, mmu.pte_size,
		    paddr + entry * mmu.pte_size);
		if (len != mmu.pte_size)
			return (DCMD_ERR);
		if (mmu.pte_size == sizeof (x86pte_t))
			pte = buf;
		else
			pte = *pte32;

		if (pte == 0)
			continue;

		mdb_printf("[%3d] va=%p ", entry, base + entry * pagesize);
		do_pte_dcmd(level, pte);
	}

done:
	return (DCMD_OK);
}