/*
 * The list of mfn pages is out of date.  Recompute it.
 */
static void
rebuild_mfn_list(void)
{
	int i = 0;
	size_t sz;
	size_t off;
	pfn_t pfn;

	SUSPEND_DEBUG("rebuild_mfn_list\n");

	sz = ((mfn_count * sizeof (mfn_t)) + MMU_PAGEOFFSET) & MMU_PAGEMASK;

	for (off = 0; off < sz; off += MMU_PAGESIZE) {
		size_t j = mmu_btop(off);
		if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
			pfn = hat_getpfnum(kas.a_hat,
			    (caddr_t)&mfn_list_pages[j]);
			mfn_list_pages_page[i++] = pfn_to_mfn(pfn);
		}

		pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list + off);
		mfn_list_pages[j] = pfn_to_mfn(pfn);
	}

	pfn = hat_getpfnum(kas.a_hat, (caddr_t)mfn_list_pages_page);
	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
	    = pfn_to_mfn(pfn);
}
示例#2
0
/**
 * Allocates physical memory which satisfy the given constraints.
 *
 * @param   uPhysHi        The upper physical address limit (inclusive).
 * @param   puPhys         Where to store the physical address of the allocated
 *                         memory. Optional, can be NULL.
 * @param   cb             Size of allocation.
 * @param   uAlignment     Alignment.
 * @param   fContig        Whether the memory must be physically contiguous or
 *                         not.
 *
 * @returns Virtual address of allocated memory block or NULL if allocation
 *        failed.
 */
DECLHIDDEN(void *) rtR0SolMemAlloc(uint64_t uPhysHi, uint64_t *puPhys, size_t cb, uint64_t uAlignment, bool fContig)
{
    if ((cb & PAGEOFFSET) != 0)
        return NULL;

    size_t cPages = (cb + PAGESIZE - 1) >> PAGESHIFT;
    if (!cPages)
        return NULL;

    ddi_dma_attr_t DmaAttr = s_rtR0SolDmaAttr;
    DmaAttr.dma_attr_addr_hi    = uPhysHi;
    DmaAttr.dma_attr_align      = uAlignment;
    if (!fContig)
        DmaAttr.dma_attr_sgllen = cPages > INT_MAX ? INT_MAX - 1 : cPages;
    else
        AssertRelease(DmaAttr.dma_attr_sgllen == 1);

    void *pvMem = contig_alloc(cb, &DmaAttr, PAGESIZE, 1 /* can sleep */);
    if (!pvMem)
    {
        LogRel(("rtR0SolMemAlloc failed. cb=%u Align=%u fContig=%d\n", (unsigned)cb, (unsigned)uAlignment, fContig));
        return NULL;
    }

    pfn_t PageFrameNum = hat_getpfnum(kas.a_hat, (caddr_t)pvMem);
    AssertRelease(PageFrameNum != PFN_INVALID);
    if (puPhys)
        *puPhys = (uint64_t)PageFrameNum << PAGESHIFT;

    return pvMem;
}
/*
 * Set up our xenstore page and event channel. Domain 0 needs to allocate a
 * page and event channel; other domains use what we are told.
 */
void
xb_init(void)
{
	int err;

	if (DOMAIN_IS_INITDOMAIN(xen_info)) {

		if (xb_addr != NULL)
			return;

		xb_addr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
		    &xb_cookie);
		xen_info->store_mfn = pfn_to_mfn(hat_getpfnum(kas.a_hat,
		    xb_addr));

		err = xen_alloc_unbound_evtchn(0,
		    (int *)&xen_info->store_evtchn);
		ASSERT(err == 0);
	} else {
		/*
		 * This is harmless on first boot, but needed for resume and
		 * migrate. We use kbm_map_ma() as a shortcut instead of
		 * directly using HYPERVISOR_update_va_mapping().
		 */
		ASSERT(xb_addr != NULL);
		kbm_map_ma(mfn_to_ma(xen_info->store_mfn),
		    (uintptr_t)xb_addr, 0);
	}

	ASSERT(xen_info->store_evtchn);
}
示例#4
0
static void
segkmem_xdump_range(void *arg, void *start, size_t size)
{
	struct as *as = arg;
	caddr_t addr = start;
	caddr_t addr_end = addr + size;

	while (addr < addr_end) {
		pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
		if (pfn != PFN_INVALID && pfn <= physmax && pf_is_memory(pfn))
			dump_addpage(as, addr, pfn);
		addr += PAGESIZE;
		dump_timeleft = dump_timeout;
	}
}
/**
 * Returns the physical address for a virtual address.
 *
 * @param pv        The virtual address.
 *
 * @returns The physical address corresponding to @a pv.
 */
static uint64_t rtR0MemObjSolVirtToPhys(void *pv)
{
    struct hat *pHat         = NULL;
    pfn_t       PageFrameNum = 0;
    uintptr_t   uVirtAddr    = (uintptr_t)pv;

    if (SOL_IS_KRNL_ADDR(pv))
        pHat = kas.a_hat;
    else
    {
        proc_t *pProcess = (proc_t *)RTR0ProcHandleSelf();
        AssertRelease(pProcess);
        pHat = pProcess->p_as->a_hat;
    }

    PageFrameNum = hat_getpfnum(pHat, (caddr_t)(uVirtAddr & PAGEMASK));
    AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolVirtToPhys failed. pv=%p\n", pv));
    return (((uint64_t)PageFrameNum << PAGE_SHIFT) | (uVirtAddr & PAGE_OFFSET_MASK));
}
示例#6
0
/*
 * Map address "addr" in address space "as" into a kernel virtual address.
 * The memory is guaranteed to be resident and locked down.
 */
static caddr_t
mapin(struct as *as, caddr_t addr, int writing)
{
	page_t *pp;
	caddr_t kaddr;
	pfn_t pfnum;

	/*
	 * NB: Because of past mistakes, we have bits being returned
	 * by getpfnum that are actually the page type bits of the pte.
	 * When the object we are trying to map is a memory page with
	 * a page structure everything is ok and we can use the optimal
	 * method, ppmapin.  Otherwise, we have to do something special.
	 */
	pfnum = hat_getpfnum(as->a_hat, addr);
	if (pf_is_memory(pfnum)) {
		pp = page_numtopp_nolock(pfnum);
		if (pp != NULL) {
			ASSERT(PAGE_LOCKED(pp));
			kaddr = ppmapin(pp, writing ?
				(PROT_READ | PROT_WRITE) : PROT_READ,
				(caddr_t)-1);
			return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
		}
	}

	/*
	 * Oh well, we didn't have a page struct for the object we were
	 * trying to map in; ppmapin doesn't handle devices, but allocating a
	 * heap address allows ppmapout to free virutal space when done.
	 */
	kaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);

	hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
		writing ? (PROT_READ | PROT_WRITE) : PROT_READ, HAT_LOAD_LOCK);

	return (kaddr + ((uintptr_t)addr & PAGEOFFSET));
}
int
xen_ldt_setprot(user_desc_t *ldt, size_t lsize, uint_t prot)
{
	int err;
	caddr_t	lva = (caddr_t)ldt;
#if defined(__amd64)
	int pt_bits = PT_VALID;
	pgcnt_t npgs;
	if (prot & PROT_WRITE)
		pt_bits |= PT_WRITABLE;
#endif	/* __amd64 */

	if ((err = as_setprot(&kas, (caddr_t)ldt, lsize, prot)) != 0)
		goto done;

#if defined(__amd64)

	ASSERT(IS_P2ALIGNED(lsize, PAGESIZE));
	npgs = mmu_btop(lsize);
	while (npgs--) {
		if ((err = xen_kpm_page(hat_getpfnum(kas.a_hat, lva),
		    pt_bits)) != 0)
			break;
		lva += PAGESIZE;
	}
#endif	/* __amd64 */

done:
	if (err) {
		cmn_err(CE_WARN, "xen_ldt_setprot(%p, %s) failed: error %d",
		    (void *)lva,
		    (prot & PROT_WRITE) ? "writable" : "read-only", err);
	}

	return (err);
}
/*
 * Top level routine to direct suspend/resume of a domain.
 */
void
xen_suspend_domain(void)
{
	extern void rtcsync(void);
	extern hrtime_t hres_last_tick;
	mfn_t start_info_mfn;
	ulong_t flags;
	pfn_t pfn;
	int i;

	/*
	 * Check that we are happy to suspend on this hypervisor.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
		cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
		    "version: v%lu.%lu%s, need at least version v3.0.4 or "
		    "-xvm based hypervisor", XENVER_CURRENT(xv_major),
		    XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
		return;
	}

	/*
	 * XXPV - Are we definitely OK to suspend by the time we've connected
	 * the handler?
	 */

	cpr_err(CE_NOTE, "Domain suspending for save/migrate");

	SUSPEND_DEBUG("xen_suspend_domain\n");

	/*
	 * suspend interrupts and devices
	 * XXPV - we use suspend/resume for both save/restore domains (like sun
	 * cpr) and for migration.  Would be nice to know the difference if
	 * possible.  For save/restore where down time may be a long time, we
	 * may want to do more of the things that cpr does.  (i.e. notify user
	 * processes, shrink memory footprint for faster restore, etc.)
	 */
	xen_suspend_devices();
	SUSPEND_DEBUG("xenbus_suspend\n");
	xenbus_suspend();

	pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
	start_info_mfn = pfn_to_mfn(pfn);

	/*
	 * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
	 * wrt xenbus being suspended here?
	 */
	mutex_enter(&cpu_lock);

	/*
	 * Suspend must be done on vcpu 0, as no context for other CPUs is
	 * saved.
	 *
	 * XXPV - add to taskq API ?
	 */
	thread_affinity_set(curthread, 0);
	kpreempt_disable();

	SUSPEND_DEBUG("xen_start_migrate\n");
	xen_start_migrate();
	if (ncpus > 1)
		suspend_cpus();

	/*
	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
	 * any holder would have dropped it to get through suspend_cpus().
	 */
	mutex_enter(&ec_lock);

	/*
	 * From here on in, we can't take locks.
	 */
	SUSPEND_DEBUG("ec_suspend\n");
	ec_suspend();
	SUSPEND_DEBUG("gnttab_suspend\n");
	gnttab_suspend();

	flags = intr_clear();

	xpv_time_suspend();

	/*
	 * Currently, the hypervisor incorrectly fails to bring back
	 * powered-down VCPUs.  Thus we need to record any powered-down VCPUs
	 * to prevent any attempts to operate on them.  But we have to do this
	 * *after* the very first time we do ec_suspend().
	 */
	for (i = 1; i < ncpus; i++) {
		if (cpu[i] == NULL)
			continue;

		if (cpu_get_state(cpu[i]) == P_POWEROFF)
			CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
	}

	/*
	 * The dom0 save/migrate code doesn't automatically translate
	 * these into PFNs, but expects them to be, so we do it here.
	 * We don't use mfn_to_pfn() because so many OS services have
	 * been disabled at this point.
	 */
	xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
	xen_info->console.domU.mfn =
	    mfn_to_pfn_mapping[xen_info->console.domU.mfn];

	if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
		prom_printf("xen_suspend_domain(): "
		    "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    0, UVMF_INVLPG)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_update_va_mapping() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	SUSPEND_DEBUG("HYPERVISOR_suspend\n");

	/*
	 * At this point we suspend and sometime later resume.
	 */
	if (HYPERVISOR_suspend(start_info_mfn)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_suspend() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	/*
	 * Point HYPERVISOR_shared_info to its new value.
	 */
	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
	    UVMF_INVLPG))
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);

	if (xen_info->nr_pages != mfn_count) {
		prom_printf("xen_suspend_domain(): number of pages"
		    " changed, was 0x%lx, now 0x%lx\n", mfn_count,
		    xen_info->nr_pages);
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	xpv_time_resume();

	cached_max_mfn = 0;

	SUSPEND_DEBUG("gnttab_resume\n");
	gnttab_resume();

	/* XXPV: add a note that this must be lockless. */
	SUSPEND_DEBUG("ec_resume\n");
	ec_resume();

	intr_restore(flags);

	if (ncpus > 1)
		resume_cpus();

	mutex_exit(&ec_lock);
	xen_end_migrate();
	mutex_exit(&cpu_lock);

	/*
	 * Now we can take locks again.
	 */

	/*
	 * Force the tick value used for tv_nsec in hres_tick() to be up to
	 * date. rtcsync() will reset the hrestime value appropriately.
	 */
	hres_last_tick = xpv_gethrtime();

	/*
	 * XXPV: we need to have resumed the CPUs since this takes locks, but
	 * can remote CPUs see bad state? Presumably yes. Should probably nest
	 * taking of todlock inside of cpu_lock, or vice versa, then provide an
	 * unlocked version.  Probably need to call clkinitf to reset cpu freq
	 * and re-calibrate if we migrated to a different speed cpu.  Also need
	 * to make a (re)init_cpu_info call to update processor info structs
	 * and device tree info.  That remains to be written at the moment.
	 */
	rtcsync();

	rebuild_mfn_list();

	SUSPEND_DEBUG("xenbus_resume\n");
	xenbus_resume();
	SUSPEND_DEBUG("xenbus_resume_devices\n");
	xen_resume_devices();

	thread_affinity_clear(curthread);
	kpreempt_enable();

	SUSPEND_DEBUG("finished xen_suspend_domain\n");

	/*
	 * We have restarted our suspended domain, update the hypervisor
	 * details. NB: This must be done at the end of this function,
	 * since we need the domain to be completely resumed before
	 * these functions will work correctly.
	 */
	xen_set_version(XENVER_CURRENT_IDX);

	/*
	 * We can check and report a warning, but we don't stop the
	 * process.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
		cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
		    "but need at least version v3.0.4",
		    XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
		    XENVER_CURRENT(xv_ver));

	cmn_err(CE_NOTE, "domain restore/migrate completed");
}
示例#9
0
/*
 * This function performs the following tasks:
 * - Read the sizes of the new kernel and boot archive.
 * - Allocate memory for the new kernel and boot archive.
 * - Allocate memory for page tables necessary for mapping the memory
 *   allocated for the files.
 * - Read the new kernel and boot archive into memory.
 * - Map in the fast reboot switcher.
 * - Load the fast reboot switcher to FASTBOOT_SWTCH_PA.
 * - Build the new multiboot_info structure
 * - Build page tables for the low 1G of physical memory.
 * - Mark the data structure as valid if all steps have succeeded.
 */
void
fastboot_load_kernel(char *mdep)
{
	void		*buf = NULL;
	int		i;
	fastboot_file_t	*fb;
	uint32_t	dboot_start_offset;
	char		kern_bootpath[OBP_MAXPATHLEN];
	extern uintptr_t postbootkernelbase;
	uintptr_t	saved_kernelbase;
	int		bootpath_len = 0;
	int		is_failsafe = 0;
	int		is_retry = 0;
	uint64_t	end_addr;

	if (!fastreboot_capable)
		return;

	if (newkernel.fi_valid)
		fastboot_free_newkernel(&newkernel);

	saved_kernelbase = postbootkernelbase;

	postbootkernelbase = 0;

	/*
	 * Initialize various HAT related fields in the data structure
	 */
	fastboot_init_fields(&newkernel);

	bzero(kern_bootpath, OBP_MAXPATHLEN);

	/*
	 * Process the boot argument
	 */
	bzero(fastboot_args, OBP_MAXPATHLEN);
	fastboot_parse_mdep(mdep, kern_bootpath, &bootpath_len, fastboot_args);

	/*
	 * Make sure we get the null character
	 */
	bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_UNIX],
	    bootpath_len);
	bcopy(kern_bootfile,
	    &fastboot_filename[FASTBOOT_NAME_UNIX][bootpath_len],
	    strlen(kern_bootfile) + 1);

	bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE],
	    bootpath_len);

	if (bcmp(kern_bootfile, FAILSAFE_BOOTFILE32,
	    (sizeof (FAILSAFE_BOOTFILE32) - 1)) == 0 ||
	    bcmp(kern_bootfile, FAILSAFE_BOOTFILE64,
	    (sizeof (FAILSAFE_BOOTFILE64) - 1)) == 0) {
		is_failsafe = 1;
	}

load_kernel_retry:
	/*
	 * Read in unix and boot_archive
	 */
	end_addr = DBOOT_ENTRY_ADDRESS;
	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
		struct _buf	*file;
		uintptr_t	va;
		uint64_t	fsize;
		size_t		fsize_roundup, pt_size;
		int		page_index;
		uintptr_t	offset;
		ddi_dma_attr_t dma_attr = fastboot_dma_attr;


		dprintf("fastboot_filename[%d] = %s\n",
		    i, fastboot_filename[i]);

		if ((file = kobj_open_file(fastboot_filename[i])) ==
		    (struct _buf *)-1) {
			cmn_err(CE_NOTE, "!Fastboot: Couldn't open %s",
			    fastboot_filename[i]);
			goto err_out;
		}

		if (kobj_get_filesize(file, &fsize) != 0) {
			cmn_err(CE_NOTE,
			    "!Fastboot: Couldn't get filesize for %s",
			    fastboot_filename[i]);
			goto err_out;
		}

		fsize_roundup = P2ROUNDUP_TYPED(fsize, PAGESIZE, size_t);

		/*
		 * Where the files end in physical memory after being
		 * relocated by the fast boot switcher.
		 */
		end_addr += fsize_roundup;
		if (end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_hi) {
			cmn_err(CE_NOTE, "!Fastboot: boot archive is too big");
			goto err_out;
		}

		/*
		 * Adjust dma_attr_addr_lo so that the new kernel and boot
		 * archive will not be overridden during relocation.
		 */
		if (end_addr > fastboot_dma_attr.dma_attr_addr_lo ||
		    end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_lo) {

			if (is_retry) {
				/*
				 * If we have already tried and didn't succeed,
				 * just give up.
				 */
				cmn_err(CE_NOTE,
				    "!Fastboot: boot archive is too big");
				goto err_out;
			} else {
				/* Set the flag so we don't keep retrying */
				is_retry++;

				/* Adjust dma_attr_addr_lo */
				fastboot_dma_attr.dma_attr_addr_lo = end_addr;
				fastboot_below_1G_dma_attr.dma_attr_addr_lo =
				    end_addr;

				/*
				 * Free the memory we have already allocated
				 * whose physical addresses might not fit
				 * the new lo and hi constraints.
				 */
				fastboot_free_mem(&newkernel, end_addr);
				goto load_kernel_retry;
			}
		}


		if (!fastboot_contig)
			dma_attr.dma_attr_sgllen = (fsize / PAGESIZE) +
			    (((fsize % PAGESIZE) == 0) ? 0 : 1);

		if ((buf = contig_alloc(fsize, &dma_attr, PAGESIZE, 0))
		    == NULL) {
			cmn_err(CE_NOTE, fastboot_enomem_msg, fsize, "64G");
			goto err_out;
		}

		va = P2ROUNDUP_TYPED((uintptr_t)buf, PAGESIZE, uintptr_t);

		if (kobj_read_file(file, (char *)va, fsize, 0) < 0) {
			cmn_err(CE_NOTE, "!Fastboot: Couldn't read %s",
			    fastboot_filename[i]);
			goto err_out;
		}

		fb = &newkernel.fi_files[i];
		fb->fb_va = va;
		fb->fb_size = fsize;
		fb->fb_sectcnt = 0;

		pt_size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup);

		/*
		 * If we have reserved memory but it not enough, free it.
		 */
		if (fb->fb_pte_list_size && fb->fb_pte_list_size < pt_size) {
			contig_free((void *)fb->fb_pte_list_va,
			    fb->fb_pte_list_size);
			fb->fb_pte_list_size = 0;
		}

		if (fb->fb_pte_list_size == 0) {
			if ((fb->fb_pte_list_va =
			    (x86pte_t *)contig_alloc(pt_size,
			    &fastboot_below_1G_dma_attr, PAGESIZE, 0))
			    == NULL) {
				cmn_err(CE_NOTE, fastboot_enomem_msg,
				    (uint64_t)pt_size, "1G");
				goto err_out;
			}
			/*
			 * fb_pte_list_size must be set after the allocation
			 * succeeds as it's used to determine how much memory to
			 * free.
			 */
			fb->fb_pte_list_size = pt_size;
		}

		bzero((void *)(fb->fb_pte_list_va), fb->fb_pte_list_size);

		fb->fb_pte_list_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
		    (caddr_t)fb->fb_pte_list_va));

		for (page_index = 0, offset = 0; offset < fb->fb_size;
		    offset += PAGESIZE) {
			uint64_t paddr;

			paddr = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
			    (caddr_t)fb->fb_va + offset));

			ASSERT(paddr >= fastboot_dma_attr.dma_attr_addr_lo);

			/*
			 * Include the pte_bits so we don't have to make
			 * it in assembly.
			 */
			fb->fb_pte_list_va[page_index++] = (x86pte_t)
			    (paddr | pte_bits);
		}

		fb->fb_pte_list_va[page_index] = FASTBOOT_TERMINATE;

		if (i == FASTBOOT_UNIX) {
			Ehdr	*ehdr = (Ehdr *)va;
			int	j;

			/*
			 * Sanity checks:
			 */
			for (j = 0; j < SELFMAG; j++) {
				if (ehdr->e_ident[j] != ELFMAG[j]) {
					cmn_err(CE_NOTE, "!Fastboot: Bad ELF "
					    "signature");
					goto err_out;
				}
			}

			if (ehdr->e_ident[EI_CLASS] == ELFCLASS32 &&
			    ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
			    ehdr->e_machine == EM_386) {

				fb->fb_sectcnt = sizeof (fb->fb_sections) /
				    sizeof (fb->fb_sections[0]);

				if (fastboot_elf32_find_loadables((void *)va,
				    fsize, &fb->fb_sections[0],
				    &fb->fb_sectcnt, &dboot_start_offset) < 0) {
					cmn_err(CE_NOTE, "!Fastboot: ELF32 "
					    "program section failure");
					goto err_out;
				}

				if (fb->fb_sectcnt == 0) {
					cmn_err(CE_NOTE, "!Fastboot: No ELF32 "
					    "program sections found");
					goto err_out;
				}

				if (is_failsafe) {
					/* Failsafe boot_archive */
					bcopy(BOOTARCHIVE32_FAILSAFE,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE32_FAILSAFE));
				} else {
					bcopy(BOOTARCHIVE32,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE32));
				}

			} else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64 &&
			    ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
			    ehdr->e_machine == EM_AMD64) {

				if (fastboot_elf64_find_dboot_load_offset(
				    (void *)va, fsize, &dboot_start_offset)
				    != 0) {
					cmn_err(CE_NOTE, "!Fastboot: Couldn't "
					    "find ELF64 dboot entry offset");
					goto err_out;
				}

				if (!is_x86_feature(x86_featureset,
				    X86FSET_64) ||
				    !is_x86_feature(x86_featureset,
				    X86FSET_PAE)) {
					cmn_err(CE_NOTE, "Fastboot: Cannot "
					    "reboot to %s: "
					    "not a 64-bit capable system",
					    kern_bootfile);
					goto err_out;
				}

				if (is_failsafe) {
					/* Failsafe boot_archive */
					bcopy(BOOTARCHIVE64_FAILSAFE,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE64_FAILSAFE));
				} else {
					bcopy(BOOTARCHIVE64,
					    &fastboot_filename
					    [FASTBOOT_NAME_BOOTARCHIVE]
					    [bootpath_len],
					    sizeof (BOOTARCHIVE64));
				}
			} else {
				cmn_err(CE_NOTE, "!Fastboot: Unknown ELF type");
				goto err_out;
			}

			fb->fb_dest_pa = DBOOT_ENTRY_ADDRESS -
			    dboot_start_offset;

			fb->fb_next_pa = DBOOT_ENTRY_ADDRESS + fsize_roundup;
		} else {
			fb->fb_dest_pa = newkernel.fi_files[i - 1].fb_next_pa;
			fb->fb_next_pa = fb->fb_dest_pa + fsize_roundup;
		}

		kobj_close_file(file);

	}

	/*
	 * Add the function that will switch us to 32-bit protected mode
	 */
	fb = &newkernel.fi_files[FASTBOOT_SWTCH];
	fb->fb_va = fb->fb_dest_pa = FASTBOOT_SWTCH_PA;
	fb->fb_size = MMU_PAGESIZE;

	hat_devload(kas.a_hat, (caddr_t)fb->fb_va,
	    MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
	    PROT_READ | PROT_WRITE | PROT_EXEC,
	    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);

	/*
	 * Build the new multiboot_info structure
	 */
	if (fastboot_build_mbi(fastboot_args, &newkernel) != 0) {
		goto err_out;
	}

	/*
	 * Build page table for low 1G physical memory. Use big pages.
	 * Allocate 4 (5 for amd64) pages for the page tables.
	 *    1 page for PML4 (amd64)
	 *    1 page for Page-Directory-Pointer Table
	 *    2 pages for Page Directory
	 *    1 page for Page Table.
	 * The page table entry will be rewritten to map the physical
	 * address as we do the copying.
	 */
	if (newkernel.fi_has_pae) {
#ifdef	__amd64
		size_t size = MMU_PAGESIZE * 5;
#else
		size_t size = MMU_PAGESIZE * 4;
#endif	/* __amd64 */

		if (newkernel.fi_pagetable_size && newkernel.fi_pagetable_size
		    < size) {
			contig_free((void *)newkernel.fi_pagetable_va,
			    newkernel.fi_pagetable_size);
			newkernel.fi_pagetable_size = 0;
		}

		if (newkernel.fi_pagetable_size == 0) {
			if ((newkernel.fi_pagetable_va = (uintptr_t)
			    contig_alloc(size, &fastboot_below_1G_dma_attr,
			    MMU_PAGESIZE, 0)) == NULL) {
				cmn_err(CE_NOTE, fastboot_enomem_msg,
				    (uint64_t)size, "1G");
				goto err_out;
			}
			/*
			 * fi_pagetable_size must be set after the allocation
			 * succeeds as it's used to determine how much memory to
			 * free.
			 */
			newkernel.fi_pagetable_size = size;
		}

		bzero((void *)(newkernel.fi_pagetable_va), size);

		newkernel.fi_pagetable_pa =
		    mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
		    (caddr_t)newkernel.fi_pagetable_va));

		newkernel.fi_last_table_pa = newkernel.fi_pagetable_pa +
		    size - MMU_PAGESIZE;

		newkernel.fi_next_table_va = newkernel.fi_pagetable_va +
		    MMU_PAGESIZE;
		newkernel.fi_next_table_pa = newkernel.fi_pagetable_pa +
		    MMU_PAGESIZE;

		fastboot_build_pagetables(&newkernel);
	}


	/* Generate MD5 checksums */
	fastboot_cksum_generate(&newkernel);

	/* Mark it as valid */
	newkernel.fi_valid = 1;
	newkernel.fi_magic = FASTBOOT_MAGIC;

	postbootkernelbase = saved_kernelbase;
	return;

err_out:
	postbootkernelbase = saved_kernelbase;
	newkernel.fi_valid = 0;
	fastboot_free_newkernel(&newkernel);
}
示例#10
0
/*
 * Create multiboot info structure (mbi) base on the saved mbi.
 * Recalculate values of the pointer type fields in the data
 * structure based on the new starting physical address of the
 * data structure.
 */
static int
fastboot_build_mbi(char *mdep, fastboot_info_t *nk)
{
	mb_module_t	*mbp;
	multiboot_info_t	*mbi;	/* pointer to multiboot structure */
	uintptr_t	start_addr_va;	/* starting VA of mbi */
	uintptr_t	start_addr_pa;	/* starting PA of mbi */
	size_t		offs = 0;	/* offset from the starting address */
	size_t		arglen;		/* length of the command line arg */
	size_t		size;	/* size of the memory reserved for mbi */
	size_t		mdnsz;	/* length of the boot archive name */

	/*
	 * If mdep is not NULL or empty, use the length of mdep + 1
	 * (for NULL terminating) as the length of the new command
	 * line; else use the saved command line length as the
	 * length for the new command line.
	 */
	if (mdep != NULL && strlen(mdep) != 0) {
		arglen = strlen(mdep) + 1;
	} else {
		arglen = saved_cmdline_len;
	}

	/*
	 * Allocate memory for the new multiboot info structure (mbi).
	 * If we have reserved memory for mbi but it's not enough,
	 * free it and reallocate.
	 */
	size = PAGESIZE + P2ROUNDUP(arglen, PAGESIZE);
	if (nk->fi_mbi_size && nk->fi_mbi_size < size) {
		contig_free((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
		nk->fi_mbi_size = 0;
	}

	if (nk->fi_mbi_size == 0) {
		if ((nk->fi_new_mbi_va =
		    (uintptr_t)contig_alloc(size, &fastboot_below_1G_dma_attr,
		    PAGESIZE, 0)) == NULL) {
			cmn_err(CE_NOTE, fastboot_enomem_msg,
			    (uint64_t)size, "1G");
			return (-1);
		}
		/*
		 * fi_mbi_size must be set after the allocation succeeds
		 * as it's used to determine how much memory to free.
		 */
		nk->fi_mbi_size = size;
	}

	/*
	 * Initalize memory
	 */
	bzero((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);

	/*
	 * Get PA for the new mbi
	 */
	start_addr_va = nk->fi_new_mbi_va;
	start_addr_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
	    (caddr_t)start_addr_va));
	nk->fi_new_mbi_pa = (paddr_t)start_addr_pa;

	/*
	 * Populate the rest of the fields in the data structure
	 */

	/*
	 * Copy from the saved mbi to preserve all non-pointer type fields.
	 */
	mbi = (multiboot_info_t *)start_addr_va;
	bcopy(&saved_mbi, mbi, sizeof (*mbi));

	/*
	 * Recalculate mods_addr.  Set mod_start and mod_end based on
	 * the physical address of the new boot archive.  Set mod_name
	 * to the name of the new boto archive.
	 */
	offs += sizeof (multiboot_info_t);
	mbi->mods_addr = start_addr_pa + offs;
	mbp = (mb_module_t *)(start_addr_va + offs);
	mbp->mod_start = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_dest_pa;
	mbp->mod_end = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_next_pa;

	offs += sizeof (mb_module_t);
	mdnsz = strlen(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE]) + 1;
	bcopy(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE],
	    (void *)(start_addr_va + offs), mdnsz);
	mbp->mod_name = start_addr_pa + offs;
	mbp->reserved = 0;

	/*
	 * Make sure the offset is 16-byte aligned to avoid unaligned access.
	 */
	offs += mdnsz;
	offs = P2ROUNDUP_TYPED(offs, 16, size_t);

	/*
	 * Recalculate mmap_addr
	 */
	mbi->mmap_addr = start_addr_pa + offs;
	bcopy((void *)(uintptr_t)saved_mmap, (void *)(start_addr_va + offs),
	    saved_mbi.mmap_length);
	offs += saved_mbi.mmap_length;

	/*
	 * Recalculate drives_addr
	 */
	mbi->drives_addr = start_addr_pa + offs;
	bcopy((void *)(uintptr_t)saved_drives, (void *)(start_addr_va + offs),
	    saved_mbi.drives_length);
	offs += saved_mbi.drives_length;

	/*
	 * Recalculate the address of cmdline.  Set cmdline to contain the
	 * new boot argument.
	 */
	mbi->cmdline = start_addr_pa + offs;

	if (mdep != NULL && strlen(mdep) != 0) {
		bcopy(mdep, (void *)(start_addr_va + offs), arglen);
	} else {
		bcopy((void *)saved_cmdline, (void *)(start_addr_va + offs),
		    arglen);
	}

	/* clear fields and flags that are not copied */
	bzero(&mbi->config_table,
	    sizeof (*mbi) - offsetof(multiboot_info_t, config_table));
	mbi->flags &= ~(MB_INFO_CONFIG_TABLE | MB_INFO_BOOT_LOADER_NAME |
	    MB_INFO_APM_TABLE | MB_INFO_VIDEO_INFO);

	return (0);
}
示例#11
0
/*
 * Create a guest virtual cpu context so that the virtual cpu
 * springs into life in the domain just about to call mp_startup()
 *
 * Virtual CPUs must be initialized once in the lifetime of the domain;
 * after that subsequent attempts to start them will fail with X_EEXIST.
 *
 * Thus 'alloc' -really- creates and initializes the virtual
 * CPU context just once. Once the initialisation succeeds, we never
 * free it, nor the regular cpu_t to which it refers.
 */
void *
mach_cpucontext_alloc(struct cpu *cp)
{
	kthread_t *tp = cp->cpu_thread;
	vcpu_guest_context_t vgc;

	int err = 1;

	/*
	 * First, augment the incoming cpu structure
	 * - vcpu pointer reference
	 * - pending event storage area
	 * - physical address of GDT
	 */
	cp->cpu_m.mcpu_vcpu_info =
	    &HYPERVISOR_shared_info->vcpu_info[cp->cpu_id];
	cp->cpu_m.mcpu_evt_pend = kmem_zalloc(
	    sizeof (struct xen_evt_data), KM_SLEEP);
	cp->cpu_m.mcpu_gdtpa =
	    mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)cp->cpu_gdt));

	if ((err = xen_gdt_setprot(cp, PROT_READ)) != 0)
		goto done;

	/*
	 * Now set up the vcpu context so that we can start this vcpu
	 * in the kernel at tp->t_pc (mp_startup).  Note that the
	 * thread will thread_exit() shortly after performing the
	 * initialization; in particular, we will *never* take a
	 * privilege transition on this thread.
	 */

	bzero(&vgc, sizeof (vgc));

#ifdef __amd64
	vgc.user_regs.rip = tp->t_pc;
	vgc.user_regs.rsp = tp->t_sp;
	vgc.user_regs.rbp = tp->t_sp - 2 * sizeof (greg_t);
#else
	vgc.user_regs.eip = tp->t_pc;
	vgc.user_regs.esp = tp->t_sp;
	vgc.user_regs.ebp = tp->t_sp - 2 * sizeof (greg_t);
#endif
	/*
	 * XXPV	Fix resume, if Russ didn't already fix it.
	 *
	 * Note that resume unconditionally puts t->t_stk + sizeof (regs)
	 * into kernel_sp via HYPERVISOR_stack_switch. This anticipates
	 * that only lwps take traps that switch to the kernel stack;
	 * part of creating an lwp adjusts the stack by subtracting
	 * sizeof (struct regs) off t_stk.
	 *
	 * The more interesting question is, why do we do all the work
	 * of a fully fledged lwp for a plain thread?  In particular
	 * we don't have to call HYPERVISOR_stack_switch for lwp-less threads
	 * or futz with the LDT.  This should probably all be done with
	 * an lwp context operator to keep pure thread context switch fast.
	 */
	vgc.kernel_sp = (ulong_t)tp->t_stk;

	err = mp_set_cpu_context(&vgc, cp);

done:
	if (err) {
		mach_cpucontext_free(cp, NULL, err);
		return (NULL);
	}
	return (cp);
}
示例#12
0
/*
 * balloon_free_pages()
 *    free page_cnt pages, using any combination of mfns, pfns, and kva as long
 *    as they refer to the same mapping.  If an array of mfns is passed in, we
 *    assume they were already cleared.  Otherwise, we need to zero the pages
 *    before giving them back to the hypervisor. kva space is not free'd up in
 *    case the caller wants to re-use it.
 */
long
balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
{
	xen_memory_reservation_t memdec;
	mfn_t mfn;
	pfn_t pfn;
	uint_t i;
	long e;


#if DEBUG
	/* make sure kva is page aligned and maps to first pfn */
	if (kva != NULL) {
		ASSERT(((uintptr_t)kva & PAGEOFFSET) == 0);
		if (pfns != NULL) {
			ASSERT(hat_getpfnum(kas.a_hat, kva) == pfns[0]);
		}
	}
#endif

	/* if we have a kva, we can clean all pages with just one bzero */
	if ((kva != NULL) && balloon_zero_memory) {
		bzero(kva, (page_cnt * PAGESIZE));
	}

	/* if we were given a kva and/or a pfn */
	if ((kva != NULL) || (pfns != NULL)) {

		/*
		 * All the current callers only pass 1 page when using kva or
		 * pfns, and use mfns when passing multiple pages.  If that
		 * assumption is changed, the following code will need some
		 * work.  The following ASSERT() guarantees we're respecting
		 * the io locking quota.
		 */
		ASSERT(page_cnt < bln_contig_list_quota);

		/* go through all the pages */
		for (i = 0; i < page_cnt; i++) {

			/* get the next pfn */
			if (pfns == NULL) {
				pfn = hat_getpfnum(kas.a_hat,
				    (kva + (PAGESIZE * i)));
			} else {
				pfn = pfns[i];
			}

			/*
			 * if we didn't already zero this page, do it now. we
			 * need to do this *before* we give back the MFN
			 */
			if ((kva == NULL) && (balloon_zero_memory)) {
				pfnzero(pfn, 0, PAGESIZE);
			}

			/*
			 * unmap the pfn. We don't free up the kva vmem space
			 * so the caller can re-use it. The page must be
			 * unmapped before it is given back to the hypervisor.
			 */
			if (kva != NULL) {
				hat_unload(kas.a_hat, (kva + (PAGESIZE * i)),
				    PAGESIZE, HAT_UNLOAD_UNMAP);
			}

			/* grab the mfn before the pfn is marked as invalid */
			mfn = pfn_to_mfn(pfn);

			/* mark the pfn as invalid */
			reassign_pfn(pfn, MFN_INVALID);

			/*
			 * if we weren't given an array of MFNs, we need to
			 * free them up one at a time. Otherwise, we'll wait
			 * until later and do it in one hypercall
			 */
			if (mfns == NULL) {
				bzero(&memdec, sizeof (memdec));
				/*LINTED: constant in conditional context*/
				set_xen_guest_handle(memdec.extent_start, &mfn);
				memdec.domid = DOMID_SELF;
				memdec.nr_extents = 1;
				e = HYPERVISOR_memory_op(
				    XENMEM_decrease_reservation, &memdec);
				if (e != 1) {
					cmn_err(CE_PANIC, "balloon: unable to "
					    "give a page back to the "
					    "hypervisor.\n");
				}
			}
		}
	}

	/*
	 * if we were passed in MFNs, we haven't free'd them up yet. We can
	 * do it with one call.
	 */
	if (mfns != NULL) {
		bzero(&memdec, sizeof (memdec));
		/*LINTED: constant in conditional context*/
		set_xen_guest_handle(memdec.extent_start, mfns);
		memdec.domid = DOMID_SELF;
		memdec.nr_extents = page_cnt;
		e = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &memdec);
		if (e != page_cnt) {
			cmn_err(CE_PANIC, "balloon: unable to give pages back "
			    "to the hypervisor.\n");
		}
	}

	atomic_add_long((ulong_t *)&bln_stats.bln_hv_pages, page_cnt);
	return (page_cnt);
}
示例#13
0
void
sc_create(pci_t *pci_p)
{
	dev_info_t *dip = pci_p->pci_dip;
	sc_t *sc_p;
	uint64_t paddr;

#ifdef lint
	dip = dip;
#endif

	if (!pci_stream_buf_exists)
		return;

	/*
	 * Allocate streaming cache state structure and link it to
	 * the pci state structure.
	 */
	sc_p = (sc_t *)kmem_zalloc(sizeof (sc_t), KM_SLEEP);
	pci_p->pci_sc_p = sc_p;
	sc_p->sc_pci_p = pci_p;

	pci_sc_setup(sc_p);
	sc_p->sc_sync_reg_pa = va_to_pa((char *)sc_p->sc_sync_reg);

	DEBUG3(DBG_ATTACH, dip, "sc_create: ctrl=%x, invl=%x, sync=%x\n",
		sc_p->sc_ctrl_reg, sc_p->sc_invl_reg,
		sc_p->sc_sync_reg);
	DEBUG2(DBG_ATTACH, dip, "sc_create: ctx_invl=%x ctx_match=%x\n",
		sc_p->sc_ctx_invl_reg, sc_p->sc_ctx_match_reg);
	DEBUG3(DBG_ATTACH, dip,
		"sc_create: data_diag=%x, tag_diag=%x, ltag_diag=%x\n",
		sc_p->sc_data_diag_acc, sc_p->sc_tag_diag_acc,
		sc_p->sc_ltag_diag_acc);

	/*
	 * Allocate the flush/sync buffer.  Make sure it's properly
	 * aligned.
	 */
	sc_p->sc_sync_flag_base =
	    vmem_xalloc(static_alloc_arena, PCI_SYNC_FLAG_SIZE,
		PCI_SYNC_FLAG_SIZE, 0, 0, NULL, NULL, VM_SLEEP);
	sc_p->sc_sync_flag_vaddr = (uint64_t *)sc_p->sc_sync_flag_base;
	paddr = (uint64_t)hat_getpfnum(kas.a_hat,
	    (caddr_t)sc_p->sc_sync_flag_vaddr);
	paddr <<= MMU_PAGESHIFT;
	paddr += (uint64_t)
	    ((uintptr_t)sc_p->sc_sync_flag_vaddr & ~MMU_PAGEMASK);
	sc_p->sc_sync_flag_pa = paddr;
	DEBUG2(DBG_ATTACH, dip, "sc_create: sync buffer - vaddr=%x paddr=%x\n",
	    sc_p->sc_sync_flag_vaddr, sc_p->sc_sync_flag_pa);

	/*
	 * Create a mutex to go along with it.  While the mutex is held,
	 * all interrupts should be blocked.  This will prevent driver
	 * interrupt routines from attempting to acquire the mutex while
	 * held by a lower priority interrupt routine.  Note also that
	 * we now block cross calls as well, to prevent issues with
	 * relocation.
	 */
	mutex_init(&sc_p->sc_sync_mutex, NULL, MUTEX_DRIVER,
	    (void *)ipltospl(XCALL_PIL));

	sc_configure(sc_p);
}
示例#14
0
/*
 * Private ioctl for libkvm to support kvm_physaddr().
 * Given an address space and a VA, compute the PA.
 */
static int
mmioctl_vtop(intptr_t data)
{
#ifdef _SYSCALL32
	mem_vtop32_t vtop32;
#endif
	mem_vtop_t mem_vtop;
	proc_t *p;
	pfn_t pfn = (pfn_t)PFN_INVALID;
	pid_t pid = 0;
	struct as *as;
	struct seg *seg;

	if (get_udatamodel() == DATAMODEL_NATIVE) {
		if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
			return (EFAULT);
	}
#ifdef _SYSCALL32
	else {
		if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
			return (EFAULT);
		mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
		mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;

		if (mem_vtop.m_as != NULL)
			return (EINVAL);
	}
#endif

	if (mem_vtop.m_as == &kas) {
		pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
	} else {
		if (mem_vtop.m_as == NULL) {
			/*
			 * Assume the calling process's address space if the
			 * caller didn't specify one.
			 */
			p = curthread->t_procp;
			if (p == NULL)
				return (EIO);
			mem_vtop.m_as = p->p_as;
		}

		mutex_enter(&pidlock);
		for (p = practive; p != NULL; p = p->p_next) {
			if (p->p_as == mem_vtop.m_as) {
				pid = p->p_pid;
				break;
			}
		}
		mutex_exit(&pidlock);
		if (p == NULL)
			return (EIO);
		p = sprlock(pid);
		if (p == NULL)
			return (EIO);
		as = p->p_as;
		if (as == mem_vtop.m_as) {
			mutex_exit(&p->p_lock);
			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
			for (seg = AS_SEGFIRST(as); seg != NULL;
			    seg = AS_SEGNEXT(as, seg))
				if ((uintptr_t)mem_vtop.m_va -
				    (uintptr_t)seg->s_base < seg->s_size)
					break;
			if (seg != NULL)
				pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
			AS_LOCK_EXIT(as, &as->a_lock);
			mutex_enter(&p->p_lock);
		}
		sprunlock(p);
	}
	mem_vtop.m_pfn = pfn;
	if (pfn == PFN_INVALID)
		return (EIO);

	if (get_udatamodel() == DATAMODEL_NATIVE) {
		if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
			return (EFAULT);
	}
#ifdef _SYSCALL32
	else {
		vtop32.m_pfn = mem_vtop.m_pfn;
		if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
			return (EFAULT);
	}
#endif

	return (0);
}
示例#15
0
/*ARGSUSED3*/
static int
mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
{
	pfn_t v;
	struct iovec *iov;
	int error = 0;
	size_t c;
	ssize_t oresid = uio->uio_resid;
	minor_t minor = getminor(dev);

	while (uio->uio_resid > 0 && error == 0) {
		iov = uio->uio_iov;
		if (iov->iov_len == 0) {
			uio->uio_iov++;
			uio->uio_iovcnt--;
			if (uio->uio_iovcnt < 0)
				panic("mmrw");
			continue;
		}
		switch (minor) {

		case M_MEM:
			memlist_read_lock();
			if (!address_in_memlist(phys_install,
			    (uint64_t)uio->uio_loffset, 1)) {
				memlist_read_unlock();
				error = EFAULT;
				break;
			}
			memlist_read_unlock();

			v = BTOP((u_offset_t)uio->uio_loffset);
			error = mmio(uio, rw, v,
			    uio->uio_loffset & PAGEOFFSET, 0, NULL);
			break;

		case M_KMEM:
		case M_ALLKMEM:
			{
			page_t **ppp = NULL;
			caddr_t vaddr = (caddr_t)uio->uio_offset;
			int try_lock = NEED_LOCK_KVADDR(vaddr);
			int locked = 0;

			if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
				break;

			/*
			 * If vaddr does not map a valid page, as_pagelock()
			 * will return failure. Hence we can't check the
			 * return value and return EFAULT here as we'd like.
			 * seg_kp and seg_kpm do not properly support
			 * as_pagelock() for this context so we avoid it
			 * using the try_lock set check above.  Some day when
			 * the kernel page locking gets redesigned all this
			 * muck can be cleaned up.
			 */
			if (try_lock)
				locked = (as_pagelock(&kas, &ppp, vaddr,
				    PAGESIZE, S_WRITE) == 0);

			v = hat_getpfnum(kas.a_hat,
			    (caddr_t)(uintptr_t)uio->uio_loffset);
			if (v == PFN_INVALID) {
				if (locked)
					as_pageunlock(&kas, ppp, vaddr,
					    PAGESIZE, S_WRITE);
				error = EFAULT;
				break;
			}

			error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
			    minor == M_ALLKMEM || mm_kmem_io_access,
			    (locked && ppp) ? *ppp : NULL);
			if (locked)
				as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
				    S_WRITE);
			}

			break;

		case M_ZERO:
			if (rw == UIO_READ) {
				label_t ljb;

				if (on_fault(&ljb)) {
					no_fault();
					error = EFAULT;
					break;
				}
				uzero(iov->iov_base, iov->iov_len);
				no_fault();
				uio->uio_resid -= iov->iov_len;
				uio->uio_loffset += iov->iov_len;
				break;
			}
			/* else it's a write, fall through to NULL case */
			/*FALLTHROUGH*/

		case M_NULL:
			if (rw == UIO_READ)
				return (0);
			c = iov->iov_len;
			iov->iov_base += c;
			iov->iov_len -= c;
			uio->uio_loffset += c;
			uio->uio_resid -= c;
			break;

		}
	}
	return (uio->uio_resid == oresid ? error : 0);
}
示例#16
0
/*
 * Initialize IOAT relative resources.
 */
static int
fipe_ioat_init(void)
{
	char *buf;
	size_t size;

	bzero(&fipe_ioat_ctrl, sizeof (fipe_ioat_ctrl));
	mutex_init(&fipe_ioat_ctrl.ioat_lock, NULL, MUTEX_DRIVER, NULL);

	/*
	 * Allocate memory for IOAT memory copy operation.
	 * The allocated memory should be page aligned to achieve better power
	 * savings.
	 * Don't use ddi_dma_mem_alloc here to keep thing simple.  This also
	 * makes quiesce easier.
	 */
	size = PAGESIZE;
	buf = kmem_zalloc(size, KM_SLEEP);
	if ((intptr_t)buf & PAGEOFFSET) {
		kmem_free(buf, PAGESIZE);
		size <<= 1;
		buf = kmem_zalloc(size, KM_SLEEP);
	}
	fipe_ioat_ctrl.ioat_buf_size = size;
	fipe_ioat_ctrl.ioat_buf_start = buf;
	buf = (char *)P2ROUNDUP((intptr_t)buf, PAGESIZE);
	fipe_ioat_ctrl.ioat_buf_virtaddr = buf;
	fipe_ioat_ctrl.ioat_buf_physaddr = hat_getpfnum(kas.a_hat, buf);
	fipe_ioat_ctrl.ioat_buf_physaddr <<= PAGESHIFT;

#ifdef	FIPE_IOAT_BUILTIN
	{
		uint64_t bufpa;
		/* IOAT descriptor data structure copied from ioat.h. */
		struct fipe_ioat_cmd_desc {
			uint32_t	dd_size;
			uint32_t	dd_ctrl;
			uint64_t	dd_src_paddr;
			uint64_t	dd_dest_paddr;
			uint64_t	dd_next_desc;
			uint64_t	dd_res4;
			uint64_t	dd_res5;
			uint64_t	dd_res6;
			uint64_t	dd_res7;
		} *desc;

		/*
		 * Build two IOAT command descriptors and chain them into ring.
		 * Control flags as below:
		 *	0x2: disable source snoop
		 *	0x4: disable destination snoop
		 *	0x0 << 24: memory copy operation
		 * The layout for command descriptors and memory buffers are
		 * organized for power saving effect, please don't change it.
		 */
		buf = fipe_ioat_ctrl.ioat_buf_virtaddr;
		bufpa = fipe_ioat_ctrl.ioat_buf_physaddr;
		fipe_ioat_ctrl.ioat_cmd_physaddr = bufpa;

		/* First command descriptor. */
		desc = (struct fipe_ioat_cmd_desc *)(buf);
		desc->dd_size = 128;
		desc->dd_ctrl = 0x6;
		desc->dd_src_paddr = bufpa + 2048;
		desc->dd_dest_paddr = bufpa + 3072;
		/* Point to second descriptor. */
		desc->dd_next_desc = bufpa + 64;

		/* Second command descriptor. */
		desc = (struct fipe_ioat_cmd_desc *)(buf + 64);
		desc->dd_size = 128;
		desc->dd_ctrl = 0x6;
		desc->dd_src_paddr = bufpa + 2048;
		desc->dd_dest_paddr = bufpa + 3072;
		/* Point to first descriptor. */
		desc->dd_next_desc = bufpa;
	}
#endif	/* FIPE_IOAT_BUILTIN */

	return (0);
}