예제 #1
0
파일: balloon.c 프로젝트: bahamas10/openzfs
/*
 * balloon_replace_pages()
 *	Try to replace nextexts blocks of 2^order pages.  addr_bits specifies
 *	how many bits of address the pages must be within (i.e. 16 would mean
 *	that the pages cannot have an address > 64k).  The constrints are on
 *	what the hypervisor gives us -- we are free to give any pages in
 *	exchange.  The array pp is the pages we are giving away.  The caller
 *	provides storage space for mfns, which hold the new physical pages.
 */
long
balloon_replace_pages(uint_t nextents, page_t **pp, uint_t addr_bits,
    uint_t order, mfn_t *mfns)
{
	xen_memory_reservation_t memres;
	long fallback_cnt;
	long cnt;
	uint_t i, j, page_cnt, extlen;
	long e;
	int locked;


	/*
	 * we shouldn't be allocating constrained pages on a guest. It doesn't
	 * make any sense. They won't be constrained after a migration.
	 */
	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));

	extlen = 1 << order;
	page_cnt = nextents * extlen;
	/* Give back the current pages to the hypervisor */
	for (i = 0; i < page_cnt; i++) {
		cnt = balloon_free_pages(1, NULL, NULL, &pp[i]->p_pagenum);
		if (cnt != 1) {
			cmn_err(CE_PANIC, "balloon: unable to give a page back "
			    "to the hypervisor.\n");
		}
	}

	/*
	 * try to allocate the new pages using addr_bits and order. If we can't
	 * get all of the pages, try to get the remaining pages with no
	 * constraints and, if that was successful, return the number of
	 * constrained pages we did allocate.
	 */
	bzero(&memres, sizeof (memres));
	/*LINTED: constant in conditional context*/
	set_xen_guest_handle(memres.extent_start, mfns);
	memres.domid = DOMID_SELF;
	memres.nr_extents = nextents;
	memres.mem_flags = XENMEMF_address_bits(addr_bits);
	memres.extent_order = order;
	cnt = HYPERVISOR_memory_op(XENMEM_increase_reservation, &memres);
	/* assign the new MFNs to the current PFNs */
	locked = balloon_lock_contig_pfnlist(cnt * extlen);
	for (i = 0; i < cnt; i++) {
		for (j = 0; j < extlen; j++) {
			reassign_pfn(pp[i * extlen + j]->p_pagenum,
			    mfns[i] + j);
		}
	}
	if (locked)
		unlock_contig_pfnlist();
	if (cnt != nextents) {
		if (cnt < 0) {
			cnt = 0;
		}

		/*
		 * We couldn't get enough memory to satisfy our requirements.
		 * The above loop will assign the parts of the request that
		 * were successful (this part may be 0).  We need to fill
		 * in the rest.  The bzero below clears out extent_order and
		 * address_bits, so we'll take anything from the hypervisor
		 * to replace the pages we gave away.
		 */
		fallback_cnt = page_cnt - cnt * extlen;
		bzero(&memres, sizeof (memres));
		/*LINTED: constant in conditional context*/
		set_xen_guest_handle(memres.extent_start, mfns);
		memres.domid = DOMID_SELF;
		memres.nr_extents = fallback_cnt;
		e = HYPERVISOR_memory_op(XENMEM_increase_reservation, &memres);
		if (e != fallback_cnt) {
			cmn_err(CE_PANIC, "balloon: unable to recover from "
			    "failed increase_reservation.\n");
		}
		locked = balloon_lock_contig_pfnlist(fallback_cnt);
		for (i = 0; i < fallback_cnt; i++) {
			uint_t offset = page_cnt - fallback_cnt;

			/*
			 * We already used pp[0...(cnt * extlen)] before,
			 * so start at the next entry in the pp array.
			 */
			reassign_pfn(pp[i + offset]->p_pagenum, mfns[i]);
		}
		if (locked)
			unlock_contig_pfnlist();
	}

	/*
	 * balloon_free_pages increments our counter.  Decrement it here.
	 */
	atomic_add_long((ulong_t *)&bln_stats.bln_hv_pages, -(long)page_cnt);

	/*
	 * return the number of extents we were able to replace. If we got
	 * this far, we know all the pp's are valid.
	 */
	return (cnt);
}
/* map fgmfn of domid to lpfn in the current domain */
static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
			    unsigned int domid)
{
	int rc;
	struct xen_add_to_physmap_range xatp = {
		.domid = DOMID_SELF,
		.foreign_domid = domid,
		.size = 1,
		.space = XENMAPSPACE_gmfn_foreign,
	};
	xen_ulong_t idx = fgmfn;
	xen_pfn_t gpfn = lpfn;
	int err = 0;

	set_xen_guest_handle(xatp.idxs, &idx);
	set_xen_guest_handle(xatp.gpfns, &gpfn);
	set_xen_guest_handle(xatp.errs, &err);

	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
	if (rc || err) {
		pr_warn("Failed to map pfn to mfn rc:%d:%d pfn:%lx mfn:%lx\n",
			rc, err, lpfn, fgmfn);
		return 1;
	}
	return 0;
}

struct remap_data {
	xen_pfn_t fgmfn; /* foreign domain's gmfn */
	pgprot_t prot;
	domid_t  domid;
	struct vm_area_struct *vma;
	int index;
	struct page **pages;
	struct xen_remap_mfn_info *info;
};

static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
			void *data)
{
	struct remap_data *info = data;
	struct page *page = info->pages[info->index++];
	unsigned long pfn = page_to_pfn(page);
	pte_t pte = pfn_pte(pfn, info->prot);

	if (map_foreign_page(pfn, info->fgmfn, info->domid))
		return -EFAULT;
	set_pte_at(info->vma->vm_mm, addr, ptep, pte);

	return 0;
}

int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
			       unsigned long addr,
			       xen_pfn_t mfn, int nr,
			       pgprot_t prot, unsigned domid,
			       struct page **pages)
{
	int err;
	struct remap_data data;

	/* TBD: Batching, current sole caller only does page at a time */
	if (nr > 1)
		return -EINVAL;

	data.fgmfn = mfn;
	data.prot = prot;
	data.domid = domid;
	data.vma = vma;
	data.index = 0;
	data.pages = pages;
	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
				  remap_pte_fn, &data);
	return err;
}
예제 #3
0
파일: setup.c 프로젝트: EvanZheng/linux-2.6
/**
 * machine_specific_memory_setup - Hook for machine specific memory setup.
 **/
char * __init xen_memory_setup(void)
{
	static struct e820entry map[E820MAX] __initdata;

	unsigned long max_pfn = xen_start_info->nr_pages;
	unsigned long long mem_end;
	int rc;
	struct xen_memory_map memmap;
	unsigned long extra_pages = 0;
	unsigned long extra_limit;
	int i;
	int op;

	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
	mem_end = PFN_PHYS(max_pfn);

	memmap.nr_entries = E820MAX;
	set_xen_guest_handle(memmap.buffer, map);

	op = xen_initial_domain() ?
		XENMEM_machine_memory_map :
		XENMEM_memory_map;
	rc = HYPERVISOR_memory_op(op, &memmap);
	if (rc == -ENOSYS) {
		BUG_ON(xen_initial_domain());
		memmap.nr_entries = 1;
		map[0].addr = 0ULL;
		map[0].size = mem_end;
		/* 8MB slack (to balance backend allocations). */
		map[0].size += 8ULL << 20;
		map[0].type = E820_RAM;
		rc = 0;
	}
	BUG_ON(rc);

	e820.nr_map = 0;
	xen_extra_mem_start = mem_end;
	for (i = 0; i < memmap.nr_entries; i++) {
		unsigned long long end = map[i].addr + map[i].size;

		if (map[i].type == E820_RAM) {
			if (map[i].addr < mem_end && end > mem_end) {
				/* Truncate region to max_mem. */
				u64 delta = end - mem_end;

				map[i].size -= delta;
				extra_pages += PFN_DOWN(delta);

				end = mem_end;
			}
		}

		if (end > xen_extra_mem_start)
			xen_extra_mem_start = end;

		/* If region is non-RAM or below mem_end, add what remains */
		if ((map[i].type != E820_RAM || map[i].addr < mem_end) &&
		    map[i].size > 0)
			e820_add_region(map[i].addr, map[i].size, map[i].type);
	}

	/*
	 * In domU, the ISA region is normal, usable memory, but we
	 * reserve ISA memory anyway because too many things poke
	 * about in there.
	 *
	 * In Dom0, the host E820 information can leave gaps in the
	 * ISA range, which would cause us to release those pages.  To
	 * avoid this, we unconditionally reserve them here.
	 */
	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
			E820_RESERVED);

	/*
	 * Reserve Xen bits:
	 *  - mfn_list
	 *  - xen_start_info
	 * See comment above "struct start_info" in <xen/interface/xen.h>
	 */
	memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
		      __pa(xen_start_info->pt_base),
			"XEN START INFO");

	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);

	/*
	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
	 * factor the base size.  On non-highmem systems, the base
	 * size is the full initial memory allocation; on highmem it
	 * is limited to the max size of lowmem, so that it doesn't
	 * get completely filled.
	 *
	 * In principle there could be a problem in lowmem systems if
	 * the initial memory is also very large with respect to
	 * lowmem, but we won't try to deal with that here.
	 */
	extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
			  max_pfn + extra_pages);

	if (extra_limit >= max_pfn)
		extra_pages = extra_limit - max_pfn;
	else
		extra_pages = 0;

	if (!xen_initial_domain())
		xen_add_extra_mem(extra_pages);

	return "Xen";
}
예제 #4
0
static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	for(pfn = start; pfn < end; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		/*                                    */
		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
			continue;

		set_xen_guest_handle(reservation.extent_start, &mfn);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
					   &reservation);
		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
		if (ret == 1) {
			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
			len++;
		}
	}
	printk(KERN_INFO "Freeing  %lx-%lx pfn range: %lu pages freed\n",
	       start, end, len);

	return len;
}

static unsigned long __init xen_set_identity_and_release(
	const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
	phys_addr_t start = 0;
	unsigned long released = 0;
	unsigned long identity = 0;
	const struct e820entry *entry;
	int i;

	/*
                                                               
                                                        
                                                              
   
                                                              
                                                            
                                                          
                                                               
                        
  */
	for (i = 0, entry = list; i < map_size; i++, entry++) {
		phys_addr_t end = entry->addr + entry->size;

		if (entry->type == E820_RAM || i == map_size - 1) {
			unsigned long start_pfn = PFN_DOWN(start);
			unsigned long end_pfn = PFN_UP(end);

			if (entry->type == E820_RAM)
				end_pfn = PFN_UP(entry->addr);

			if (start_pfn < end_pfn) {
				if (start_pfn < nr_pages)
					released += xen_release_chunk(
						start_pfn, min(end_pfn, nr_pages));

				identity += set_phys_range_identity(
					start_pfn, end_pfn);
			}
			start = end;
		}
	}

	printk(KERN_INFO "Released %lu pages of unused memory\n", released);
	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);

	return released;
}
예제 #5
0
static int dealloc_pte_fn(
	pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
{
	unsigned long mfn = pte_mfn(*pte);
	int ret;
	struct xen_memory_reservation reservation = {
		.nr_extents   = 1,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	set_xen_guest_handle(reservation.extent_start, &mfn);
	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
	BUG_ON(ret != 1);
	return 0;
}
#endif

struct page **alloc_empty_pages_and_pagevec(int nr_pages)
{
	unsigned long vaddr, flags;
	struct page *page, **pagevec;
	int i, ret;

	pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
	if (pagevec == NULL)
		return NULL;

	for (i = 0; i < nr_pages; i++) {
		page = pagevec[i] = alloc_page(GFP_KERNEL);
		if (page == NULL)
			goto err;

		vaddr = (unsigned long)page_address(page);

		scrub_pages(vaddr, 1);

		balloon_lock(flags);

		if (xen_feature(XENFEAT_auto_translated_physmap)) {
			unsigned long gmfn = page_to_pfn(page);
			struct xen_memory_reservation reservation = {
				.nr_extents   = 1,
				.extent_order = 0,
				.domid        = DOMID_SELF
			};
			set_xen_guest_handle(reservation.extent_start, &gmfn);
			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
						   &reservation);
			if (ret == 1)
				ret = 0; /* success */
		} else {
#ifdef CONFIG_XEN
			ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
						  dealloc_pte_fn, NULL);
#else
			/* Cannot handle non-auto translate mode. */
			ret = 1;
#endif
		}

		if (ret != 0) {
			balloon_unlock(flags);
			__free_page(page);
			goto err;
		}

		totalram_pages = --current_pages;

		balloon_unlock(flags);
	}

 out:
	schedule_work(&balloon_worker);
#ifdef CONFIG_XEN
	flush_tlb_all();
#endif
	return pagevec;

 err:
	balloon_lock(flags);
	while (--i >= 0)
		balloon_append(pagevec[i]);
	balloon_unlock(flags);
	kfree(pagevec);
	pagevec = NULL;
	goto out;
}
예제 #6
0
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
					      phys_addr_t end_addr)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long start, end;
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	start = PFN_UP(start_addr);
	end = PFN_DOWN(end_addr);

	if (end <= start)
		return 0;

	printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
	       start, end);
	for(pfn = start; pfn < end; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		/* Make sure pfn exists to start with */
		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
			continue;

		set_xen_guest_handle(reservation.extent_start, &mfn);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
					   &reservation);
		WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
		     start, end, ret);
		if (ret == 1) {
			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
			len++;
		}
	}
	printk(KERN_CONT "%ld pages freed\n", len);

	return len;
}

static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
						     const struct e820map *e820)
{
	phys_addr_t max_addr = PFN_PHYS(max_pfn);
	phys_addr_t last_end = 0;
	unsigned long released = 0;
	int i;

	for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
		phys_addr_t end = e820->map[i].addr;
		end = min(max_addr, end);

		released += xen_release_chunk(last_end, end);
		last_end = e820->map[i].addr + e820->map[i].size;
	}

	if (last_end < max_addr)
		released += xen_release_chunk(last_end, max_addr);

	printk(KERN_INFO "released %ld pages of unused memory\n", released);
	return released;
}
예제 #7
0
static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	for(pfn = start; pfn < end; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		/* Make sure pfn exists to start with */
		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
			continue;

		set_xen_guest_handle(reservation.extent_start, &mfn);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
					   &reservation);
		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
		if (ret == 1) {
			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
			len++;
		}
	}
	printk(KERN_INFO "Freeing  %lx-%lx pfn range: %lu pages freed\n",
	       start, end, len);

	return len;
}

static unsigned long __init xen_set_identity_and_release(
	const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
	phys_addr_t start = 0;
	unsigned long released = 0;
	unsigned long identity = 0;
	const struct e820entry *entry;
	int i;

	/*
	 * Combine non-RAM regions and gaps until a RAM region (or the
	 * end of the map) is reached, then set the 1:1 map and
	 * release the pages (if available) in those non-RAM regions.
	 *
	 * The combined non-RAM regions are rounded to a whole number
	 * of pages so any partial pages are accessible via the 1:1
	 * mapping.  This is needed for some BIOSes that put (for
	 * example) the DMI tables in a reserved region that begins on
	 * a non-page boundary.
	 */
	for (i = 0, entry = list; i < map_size; i++, entry++) {
		phys_addr_t end = entry->addr + entry->size;

		if (entry->type == E820_RAM || i == map_size - 1) {
			unsigned long start_pfn = PFN_DOWN(start);
			unsigned long end_pfn = PFN_UP(end);

			if (entry->type == E820_RAM)
				end_pfn = PFN_UP(entry->addr);

			if (start_pfn < end_pfn) {
				if (start_pfn < nr_pages)
					released += xen_release_chunk(
						start_pfn, min(end_pfn, nr_pages));

				identity += set_phys_range_identity(
					start_pfn, end_pfn);
			}
			start = end;
		}
	}

	printk(KERN_INFO "Released %lu pages of unused memory\n", released);
	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);

	return released;
}
예제 #8
0
void __init setup_arch(char **cmdline_p)
{
	unsigned long kernel_end;

#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
	struct e820entry *machine_e820;
	struct xen_memory_map memmap;
#endif

#ifdef CONFIG_XEN
	/* Register a call for panic conditions. */
	atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);

 	ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); 
	kernel_end = 0;		/* dummy */
 	screen_info = SCREEN_INFO;

	if (xen_start_info->flags & SIF_INITDOMAIN) {
		/* This is drawn from a dump from vgacon:startup in
		 * standard Linux. */
		screen_info.orig_video_mode = 3;
		screen_info.orig_video_isVGA = 1;
		screen_info.orig_video_lines = 25;
		screen_info.orig_video_cols = 80;
		screen_info.orig_video_ega_bx = 3;
		screen_info.orig_video_points = 16;
	} else
		screen_info.orig_video_isVGA = 0;

	edid_info = EDID_INFO;
	saved_video_mode = SAVED_VIDEO_MODE;
	bootloader_type = LOADER_TYPE;

#ifdef CONFIG_BLK_DEV_RAM
	rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
	rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
	rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);


#endif

	setup_xen_features();

	HYPERVISOR_vm_assist(VMASST_CMD_enable,
			     VMASST_TYPE_writable_pagetables);

	ARCH_SETUP
#else
 	ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
 	screen_info = SCREEN_INFO;
	edid_info = EDID_INFO;
	saved_video_mode = SAVED_VIDEO_MODE;
	bootloader_type = LOADER_TYPE;

#ifdef CONFIG_BLK_DEV_RAM
	rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
	rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
	rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
#endif	/* !CONFIG_XEN */
	setup_memory_region();
	copy_edd();

	if (!MOUNT_ROOT_RDONLY)
		root_mountflags &= ~MS_RDONLY;
	init_mm.start_code = (unsigned long) &_text;
	init_mm.end_code = (unsigned long) &_etext;
	init_mm.end_data = (unsigned long) &_edata;
	init_mm.brk = (unsigned long) &_end;

#ifndef CONFIG_XEN
	code_resource.start = virt_to_phys(&_text);
	code_resource.end = virt_to_phys(&_etext)-1;
	data_resource.start = virt_to_phys(&_etext);
	data_resource.end = virt_to_phys(&_edata)-1;
#endif

	parse_cmdline_early(cmdline_p);

	early_identify_cpu(&boot_cpu_data);

	/*
	 * partially used pages are not usable - thus
	 * we are rounding upwards:
	 */
	end_pfn = e820_end_of_ram();
	num_physpages = end_pfn;		/* for pfn_valid */

	check_efer();

#ifndef CONFIG_XEN
	discover_ebda();
#endif

	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));

#ifdef CONFIG_ACPI_NUMA
	/*
	 * Parse SRAT to discover nodes.
	 */
	acpi_numa_init();
#endif

#ifdef CONFIG_NUMA
	numa_initmem_init(0, end_pfn); 
#else
	contig_initmem_init(0, end_pfn);
#endif

	/* Reserve direct mapping */
	reserve_bootmem_generic(table_start << PAGE_SHIFT, 
				(table_end - table_start) << PAGE_SHIFT);

	/* reserve kernel */
	kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
	reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);

#ifdef CONFIG_XEN
	/* reserve physmap, start info and initial page tables */
	reserve_bootmem(kernel_end, (table_start<<PAGE_SHIFT)-kernel_end);
#else
	/*
	 * reserve physical page 0 - it's a special BIOS page on many boxes,
	 * enabling clean reboots, SMP operation, laptop functions.
	 */
	reserve_bootmem_generic(0, PAGE_SIZE);

	/* reserve ebda region */
	if (ebda_addr)
		reserve_bootmem_generic(ebda_addr, ebda_size);
#endif

#ifdef CONFIG_SMP
	/*
	 * But first pinch a few for the stack/trampoline stuff
	 * FIXME: Don't need the extra page at 4K, but need to fix
	 * trampoline before removing it. (see the GDT stuff)
	 */
	reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);

	/* Reserve SMP trampoline */
	reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
#endif

#ifdef CONFIG_ACPI_SLEEP
       /*
        * Reserve low memory region for sleep support.
        */
       acpi_reserve_bootmem();
#endif
#ifdef CONFIG_XEN
#ifdef CONFIG_BLK_DEV_INITRD
	if (xen_start_info->mod_start) {
		if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
			/*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/
			initrd_start = INITRD_START + PAGE_OFFSET;
			initrd_end = initrd_start+INITRD_SIZE;
			initrd_below_start_ok = 1;
		} else {
			printk(KERN_ERR "initrd extends beyond end of memory "
				"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
				(unsigned long)(INITRD_START + INITRD_SIZE),
				(unsigned long)(end_pfn << PAGE_SHIFT));
			initrd_start = 0;
		}
	}
#endif
#else	/* CONFIG_XEN */
#ifdef CONFIG_BLK_DEV_INITRD
	if (LOADER_TYPE && INITRD_START) {
		if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
			reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
			initrd_start =
				INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
			initrd_end = initrd_start+INITRD_SIZE;
		}
		else {
			printk(KERN_ERR "initrd extends beyond end of memory "
			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
			    (unsigned long)(INITRD_START + INITRD_SIZE),
			    (unsigned long)(end_pfn << PAGE_SHIFT));
			initrd_start = 0;
		}
	}
#endif
#endif	/* !CONFIG_XEN */
#ifdef CONFIG_KEXEC
	if (crashk_res.start != crashk_res.end) {
		reserve_bootmem(crashk_res.start,
			crashk_res.end - crashk_res.start + 1);
	}
#endif

	paging_init();
#ifdef CONFIG_X86_LOCAL_APIC
	/*
	 * Find and reserve possible boot-time SMP configuration:
	 */
	find_smp_config();
#endif
#ifdef CONFIG_XEN
	{
		int i, j, k, fpp;
		unsigned long va;

		/* 'Initial mapping' of initrd must be destroyed. */
		for (va = xen_start_info->mod_start;
		     va < (xen_start_info->mod_start+xen_start_info->mod_len);
		     va += PAGE_SIZE) {
			HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
		}

		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
			/* Make sure we have a large enough P->M table. */
			phys_to_machine_mapping = alloc_bootmem(
				end_pfn * sizeof(unsigned long));
			memset(phys_to_machine_mapping, ~0,
			       end_pfn * sizeof(unsigned long));
			memcpy(phys_to_machine_mapping,
			       (unsigned long *)xen_start_info->mfn_list,
			       xen_start_info->nr_pages * sizeof(unsigned long));
			free_bootmem(
				__pa(xen_start_info->mfn_list),
				PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
						sizeof(unsigned long))));

			/* Destroyed 'initial mapping' of old p2m table. */
			for (va = xen_start_info->mfn_list;
			     va < (xen_start_info->mfn_list +
				   (xen_start_info->nr_pages*sizeof(unsigned long)));
			     va += PAGE_SIZE) {
				HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
			}

			/*
			 * Initialise the list of the frames that specify the
			 * list of frames that make up the p2m table. Used by
                         * save/restore.
			 */
			pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE);
			HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
				virt_to_mfn(pfn_to_mfn_frame_list_list);

			fpp = PAGE_SIZE/sizeof(unsigned long);
			for (i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++) {
				if ((j % fpp) == 0) {
					k++;
					BUG_ON(k>=fpp);
					pfn_to_mfn_frame_list[k] =
						alloc_bootmem(PAGE_SIZE);
					pfn_to_mfn_frame_list_list[k] =
						virt_to_mfn(pfn_to_mfn_frame_list[k]);
					j=0;
				}
				pfn_to_mfn_frame_list[k][j] =
					virt_to_mfn(&phys_to_machine_mapping[i]);
			}
			HYPERVISOR_shared_info->arch.max_pfn = end_pfn;
		}

	}

	if (xen_start_info->flags & SIF_INITDOMAIN)
		dmi_scan_machine();

	if ( ! (xen_start_info->flags & SIF_INITDOMAIN))
	{
		acpi_disabled = 1;
#ifdef  CONFIG_ACPI
		acpi_ht = 0;
#endif
	}
#endif

#ifndef CONFIG_XEN
	check_ioapic();
#endif

	zap_low_mappings(0);

	/*
	 * set this early, so we dont allocate cpu0
	 * if MADT list doesnt list BSP first
	 * mpparse.c/MP_processor_info() allocates logical cpu numbers.
	 */
	cpu_set(0, cpu_present_map);
#ifdef CONFIG_ACPI
	/*
	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
	 * Call this early for SRAT node setup.
	 */
	acpi_boot_table_init();

	/*
	 * Read APIC and some other early information from ACPI tables.
	 */
	acpi_boot_init();
#endif

	init_cpu_to_node();

#ifdef CONFIG_X86_LOCAL_APIC
	/*
	 * get boot-time SMP configuration:
	 */
	if (smp_found_config)
		get_smp_config();
#ifndef CONFIG_XEN
	init_apic_mappings();
#endif
#endif
#if defined(CONFIG_XEN) && defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
	prefill_possible_map();
#endif

	/*
	 * Request address space for all standard RAM and ROM resources
	 * and also for regions reported as reserved by the e820.
	 */
#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
	probe_roms();
	if (xen_start_info->flags & SIF_INITDOMAIN) {
		machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);

		memmap.nr_entries = E820MAX;
		set_xen_guest_handle(memmap.buffer, machine_e820);

		BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));

		e820_reserve_resources(machine_e820, memmap.nr_entries);
	}
#elif !defined(CONFIG_XEN)
	probe_roms();
	e820_reserve_resources(e820.map, e820.nr_map);
#endif

	request_resource(&iomem_resource, &video_ram_resource);

	{
	unsigned i;
	/* request I/O space for devices used on all i[345]86 PCs */
	for (i = 0; i < STANDARD_IO_RESOURCES; i++)
		request_resource(&ioport_resource, &standard_io_resources[i]);
	}

#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
	if (xen_start_info->flags & SIF_INITDOMAIN) {
		e820_setup_gap(machine_e820, memmap.nr_entries);
		free_bootmem(__pa(machine_e820), PAGE_SIZE);
	}
#elif !defined(CONFIG_XEN)
	e820_setup_gap(e820.map, e820.nr_map);
#endif

#ifdef CONFIG_GART_IOMMU
	iommu_hole_init();
#endif

#ifdef CONFIG_XEN
	{
		struct physdev_set_iopl set_iopl;

		set_iopl.iopl = 1;
		HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);

		if (xen_start_info->flags & SIF_INITDOMAIN) {
			if (!(xen_start_info->flags & SIF_PRIVILEGED))
				panic("Xen granted us console access "
				      "but not privileged status");
		       
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
			conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
			conswitchp = &dummy_con;
#endif
#endif
		} else {
			extern int console_use_vt;
			console_use_vt = 0;
		}
	}
#else	/* CONFIG_XEN */

#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
	conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
	conswitchp = &dummy_con;
#endif
#endif

#endif /* !CONFIG_XEN */
}
예제 #9
0
static void
init_mem_alloc(void)
{
	int	local;	/* variables needed to find start region */
	paddr_t	scratch_start;
	xen_memory_map_t map;

	DBG_MSG("Entered init_mem_alloc()\n");

	/*
	 * Free memory follows the stack. There's at least 512KB of scratch
	 * space, rounded up to at least 2Mb alignment.  That should be enough
	 * for the page tables we'll need to build.  The nucleus memory is
	 * allocated last and will be outside the addressible range.  We'll
	 * switch to new page tables before we unpack the kernel
	 */
	scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
	DBG(scratch_start);
	scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
	DBG(scratch_end);

	/*
	 * For paranoia, leave some space between hypervisor data and ours.
	 * Use 500 instead of 512.
	 */
	next_avail_addr = scratch_end - 500 * 1024;
	DBG(next_avail_addr);

	/*
	 * The domain builder gives us at most 1 module
	 */
	DBG(xen_info->mod_len);
	if (xen_info->mod_len > 0) {
		DBG(xen_info->mod_start);
		modules[0].bm_addr = xen_info->mod_start;
		modules[0].bm_size = xen_info->mod_len;
		bi->bi_module_cnt = 1;
		bi->bi_modules = (native_ptr_t)modules;
	} else {
		bi->bi_module_cnt = 0;
		bi->bi_modules = NULL;
	}
	DBG(bi->bi_module_cnt);
	DBG(bi->bi_modules);

	DBG(xen_info->mfn_list);
	DBG(xen_info->nr_pages);
	max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
	DBG(max_mem);

	/*
	 * Using pseudo-physical addresses, so only 1 memlist element
	 */
	memlists[0].addr = 0;
	DBG(memlists[0].addr);
	memlists[0].size = max_mem;
	DBG(memlists[0].size);
	memlists_used = 1;
	DBG(memlists_used);

	/*
	 * finish building physinstall list
	 */
	sort_physinstall();

	/*
	 * build bios reserved memlists
	 */
	build_rsvdmemlists();

	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
		/*
		 * build PCI Memory list
		 */
		map.nr_entries = MAXMAPS;
		/*LINTED: constant in conditional context*/
		set_xen_guest_handle(map.buffer, map_buffer);
		if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
			dboot_panic("getting XENMEM_machine_memory_map failed");
		build_pcimemlists(map_buffer, map.nr_entries);
	}
}
예제 #10
0
파일: balloon.c 프로젝트: OpenXT/xc-windows
static BOOLEAN
BalloonReleasePfnArray(
    IN  ULONG                   Requested,
    OUT PULONG                  pReleased
    )
{
    xen_memory_reservation_t    reservation;
    LARGE_INTEGER               Start;
    LARGE_INTEGER               End;
    ULONGLONG                   TimeDelta;
    BOOLEAN                     Slow;
    ULONG                       Index;
    ULONG                       Registered;
    ULONG                       Released;

    XM_ASSERT(Requested <= BALLOON_PFN_ARRAY_SIZE);

    KeQuerySystemTime(&Start);

    Released = 0;

    if (Requested == 0)
        goto done;

    for (Index = 0; Index < Requested; Index++) {
        if (Balloon.PfnArray[Index] == 0) {
            TraceError(("%s: PFN[%d] == 0\n", __FUNCTION__, Index));
            XM_BUG();
        }
    }

    Registered = RangeSetAddItems(&(Balloon.PfnsBalloonedOut), 
                                  &(Balloon.PfnArray[0]),
                                  Requested);
    if (Registered < Requested) {
        TraceError(("%s: failed to register %d page(s)\n", __FUNCTION__,
                    Requested - Registered));
        if (Registered == 0)
            goto done;
    }

    SET_XEN_GUEST_HANDLE(reservation.extent_start, Balloon.PfnArray);
    reservation.extent_order = 0;
    reservation.mem_flags = 0;   // unused
    reservation.domid = DOMID_SELF;
    reservation.nr_extents = Registered;

    Released = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
    if (Released < Registered) {
        TraceWarning(("%s: partial release (%d < %d)\n", __FUNCTION__, Released, Registered));

        // This should not fail as we're removing ranges we just added
        RangeSetRemoveItems(&(Balloon.PfnsBalloonedOut),
                            &(Balloon.PfnArray[Released]),
                            Registered - Released);
    } else if (Released > Registered) {
        XM_BUG();
    }

    RtlZeroMemory(Balloon.PfnArray, Released * sizeof (PFN_NUMBER));

done:
    RangeSetDropRseCache(&(Balloon.PfnsBalloonedOut));

    TraceVerbose(("%s: %d page(s)\n", __FUNCTION__, Released));

    KeQuerySystemTime(&End);
    TimeDelta = (End.QuadPart - Start.QuadPart) / 10000ull;

    Slow = FALSE;
    if (TimeDelta != 0) {
        ULONGLONG   Rate;

        Rate = (ULONGLONG)(Released * 1000) / TimeDelta;
        if (Rate < MIN_PAGES_PER_S) {
            TraceWarning(("%s: ran for more than %dms\n", __FUNCTION__, TimeDelta));
            Slow = TRUE;
        }
    }

    *pReleased = Released;
    return Slow;
}
예제 #11
0
static int increase_reservation(unsigned long nr_pages)
{
	unsigned long  pfn, i, flags;
	struct page   *page;
	long           rc;
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	if (nr_pages > ARRAY_SIZE(frame_list))
		nr_pages = ARRAY_SIZE(frame_list);

	spin_lock_irqsave(&balloon_lock, flags);

	page = balloon_first_page();
	for (i = 0; i < nr_pages; i++) {
		BUG_ON(page == NULL);
		frame_list[i] = page_to_pfn(page);
		page = balloon_next_page(page);
	}

	set_xen_guest_handle(reservation.extent_start, frame_list);
	reservation.nr_extents = nr_pages;
	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
	if (rc < 0)
		goto out;

	for (i = 0; i < rc; i++) {
		page = balloon_retrieve();
		BUG_ON(page == NULL);

		pfn = page_to_pfn(page);
		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
		       phys_to_machine_mapping_valid(pfn));

		set_phys_to_machine(pfn, frame_list[i]);

		/* Link back into the page tables if not highmem. */
#ifdef CONFIG_PVM
		if (!xen_hvm_domain() && pfn < max_low_pfn) {
			int ret;
			ret = HYPERVISOR_update_va_mapping(
				(unsigned long)__va(pfn << PAGE_SHIFT),
				mfn_pte(frame_list[i], PAGE_KERNEL),
				0);
			BUG_ON(ret);
		}
#endif
		/* Relinquish the page back to the allocator. */
		ClearPageReserved(page);
		init_page_count(page);
		__free_page(page);
	}

	balloon_stats.current_pages += rc;
   	if (old_totalram_pages + rc < totalram_pages)
   	{
        printk(KERN_INFO "old_totalram=%luKB, totalram_pages=%luKB\n", old_totalram_pages*4, totalram_pages*4);
       	balloon_stats.current_pages = totalram_pages + totalram_bias;
        printk(KERN_INFO "when ballooning, the mem online! totalram=%luKB, current=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4);
   	}
   	old_totalram_pages = totalram_pages;
	

 out:
	spin_unlock_irqrestore(&balloon_lock, flags);

	return rc < 0 ? rc : rc != nr_pages;
}

static int decrease_reservation(unsigned long nr_pages)
{
	unsigned long  pfn, i, flags;
	struct page   *page;
	int            need_sleep = 0;
	int ret;
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	if (nr_pages > ARRAY_SIZE(frame_list))
		nr_pages = ARRAY_SIZE(frame_list);

	for (i = 0; i < nr_pages; i++) {
		if ((page = alloc_page(GFP_BALLOON)) == NULL) {
			nr_pages = i;
			need_sleep = 1;
			break;
		}

		pfn = page_to_pfn(page);
		frame_list[i] = pfn_to_mfn(pfn);

		scrub_page(page);

		if (!xen_hvm_domain() && !PageHighMem(page)) {
			ret = HYPERVISOR_update_va_mapping(
				(unsigned long)__va(pfn << PAGE_SHIFT),
				__pte_ma(0), 0);
			BUG_ON(ret);
                }

	}

	/* Ensure that ballooned highmem pages don't have kmaps. */
#ifdef CONFIG_PVM
	kmap_flush_unused();
	flush_tlb_all();
#endif
	spin_lock_irqsave(&balloon_lock, flags);

	/* No more mappings: invalidate P2M and add to balloon. */
	for (i = 0; i < nr_pages; i++) {
		pfn = mfn_to_pfn(frame_list[i]);
		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
		balloon_append(pfn_to_page(pfn));
	}

	set_xen_guest_handle(reservation.extent_start, frame_list);
	reservation.nr_extents   = nr_pages;
	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
	BUG_ON(ret != nr_pages);

	balloon_stats.current_pages -= nr_pages;
   	if(old_totalram_pages < totalram_pages + nr_pages)
   	{
        printk(KERN_INFO "old_totalram=%luKB, totalram_pages=%luKB\n", old_totalram_pages*4, totalram_pages*4);
       	balloon_stats.current_pages = totalram_pages + totalram_bias;
        printk(KERN_INFO "when ballooning, the mem online! totalram=%luKB, current=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4);
   	}
   	old_totalram_pages = totalram_pages;
	
	spin_unlock_irqrestore(&balloon_lock, flags);

	return need_sleep;
}

/*
 * We avoid multiple worker processes conflicting via the balloon mutex.
 * We may of course race updates of the target counts (which are protected
 * by the balloon lock), or with changes to the Xen hard limit, but we will
 * recover from these in time.
 */
static void balloon_process(struct work_struct *work)
{
	int need_sleep = 0;
	long credit;
    long total_increase = 0;
	char buffer[16];

	mutex_lock(&balloon_mutex);
    printk(KERN_INFO "totalram_pages=%luKB, current_pages=%luKB,totalram_bias=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4, totalram_bias*4);

    if (totalram_pages > old_totalram_pages)
    {
        //TODO:Just know that totalram_pages will increase.
        total_increase = (totalram_pages - old_totalram_pages) % GB2PAGE;
        if (totalram_bias > total_increase )
        {
            totalram_bias = totalram_bias - total_increase;
        }
        balloon_stats.current_pages = totalram_pages + totalram_bias;
        old_totalram_pages = totalram_pages;
    }
    printk(KERN_INFO "totalram_pages=%luKB, current_pages=%luKB, totalram_bias=%luKB,total_increase=%ld\n", totalram_pages*4, balloon_stats.current_pages*4, totalram_bias*4, total_increase*4);
	xenbus_write(XBT_NIL, "control/uvp", "Balloon_flag", "1");
	do {
		credit = current_target() - balloon_stats.current_pages;
		if (credit > 0)
			need_sleep = (increase_reservation(credit) != 0);
		if (credit < 0)
			need_sleep = (decrease_reservation(-credit) != 0);

#ifndef CONFIG_PREEMPT
		if (need_resched())
			schedule();
#endif
	} while ((credit != 0) && !need_sleep);

	/* Schedule more work if there is some still to be done. */
	if (current_target() != balloon_stats.current_pages)
	{
		mod_timer(&balloon_timer, jiffies + HZ);
		sprintf(buffer,"%lu",balloon_stats.current_pages<<(PAGE_SHIFT-10));
		xenbus_write(XBT_NIL, "memory", "target", buffer);
	}
	xenbus_write(XBT_NIL, "control/uvp", "Balloon_flag", "0");
	mutex_unlock(&balloon_mutex);
}
예제 #12
0
void __init xen_start_kernel(void)
{
	unsigned int i;
	struct xen_machphys_mapping mapping;
	unsigned long machine_to_phys_nr_ents;
#ifdef CONFIG_X86_32
	struct xen_platform_parameters pp;
	extern pte_t swapper_pg_fixmap[PTRS_PER_PTE];
	unsigned long addr;
#endif

	xen_setup_features();

	if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
		machine_to_phys_mapping = (unsigned long *)mapping.v_start;
		machine_to_phys_nr_ents = mapping.max_mfn + 1;
	} else
		machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
	while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
		machine_to_phys_order++;

	if (!xen_feature(XENFEAT_auto_translated_physmap))
		phys_to_machine_mapping =
			(unsigned long *)xen_start_info->mfn_list;

	WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
				     VMASST_TYPE_writable_pagetables));

	reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
		      __pa(xen_start_info->pt_base)
		      + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
		      "Xen provided");

#ifdef CONFIG_X86_32
	WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
				     VMASST_TYPE_4gb_segments));

	init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base;

	if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
		hypervisor_virt_start = pp.virt_start;
		reserve_top_address(0UL - pp.virt_start);
	}

	BUG_ON(pte_index(hypervisor_virt_start));

	/* Do an early initialization of the fixmap area */
	make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables);
	addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
	set_pmd(pmd_offset(pud_offset(swapper_pg_dir + pgd_index(addr),
				      addr),
			   addr),
		__pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
#else
	check_efer();
	xen_init_pt();
#endif

#define __FIXADDR_TOP (-PAGE_SIZE)
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \
			!= pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE)))
	FIX_BUG_ON(SHARED_INFO);
	FIX_BUG_ON(ISAMAP_BEGIN);
	FIX_BUG_ON(ISAMAP_END);
#undef pmd_index
#undef __FIXADDR_TOP

	/* Switch to the real shared_info page, and clear the dummy page. */
	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
	memset(empty_zero_page, 0, sizeof(empty_zero_page));

	setup_vcpu_info(0);

	/* Set up mapping of lowest 1MB of physical memory. */
	for (i = 0; i < NR_FIX_ISAMAPS; i++)
		if (is_initial_xendomain())
			set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
		else
			__set_fixmap(FIX_ISAMAP_BEGIN - i,
				     virt_to_machine(empty_zero_page),
				     PAGE_KERNEL_RO);

}
예제 #13
0
파일: grant-table.c 프로젝트: mbgg/linux
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
	struct gnttab_setup_table setup;
	unsigned long start_gpfn = 0;
	xen_pfn_t *frames;
	unsigned int nr_gframes = end_idx + 1;
	int rc;

	if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap)) {
		struct xen_add_to_physmap xatp;
		unsigned int i = end_idx;
		rc = 0;

		if (xen_hvm_domain())
			start_gpfn = xen_hvm_resume_frames >> PAGE_SHIFT;
		/*
		 * Loop backwards, so that the first hypercall has the largest
		 * index, ensuring that the table will grow only once.
		 */
		do {
			xatp.domid = DOMID_SELF;
			xatp.idx = i;
			xatp.space = XENMAPSPACE_grant_table;
			if (xen_hvm_domain())
				xatp.gpfn = start_gpfn + i;
			else
				xatp.gpfn = pvh_get_grant_pfn(i);

			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
			if (rc != 0) {
				printk(KERN_WARNING
						"grant table add_to_physmap failed, err=%d\n", rc);
				break;
			}
		} while (i-- > start_idx);

		return rc;
	}

	/* No need for kzalloc as it is initialized in following hypercall
	 * GNTTABOP_setup_table.
	 */
	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
	if (!frames)
		return -ENOMEM;

	setup.dom        = DOMID_SELF;
	setup.nr_frames  = nr_gframes;
	set_xen_guest_handle(setup.frame_list, frames);

	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
	if (rc == -ENOSYS) {
		kfree(frames);
		return -ENOSYS;
	}

	BUG_ON(rc || setup.status);

	rc = gnttab_interface->map_frames(frames, nr_gframes);

	kfree(frames);

	return rc;
}
예제 #14
0
/*
 * Top level routine to direct suspend/resume of a domain.
 */
void
xen_suspend_domain(void)
{
	extern void rtcsync(void);
	extern void ec_resume(void);
	extern kmutex_t ec_lock;
	struct xen_add_to_physmap xatp;
	ulong_t flags;
	int err;

	cmn_err(CE_NOTE, "Domain suspending for save/migrate");

	SUSPEND_DEBUG("xen_suspend_domain\n");

	/*
	 * We only want to suspend the PV devices, since the emulated devices
	 * are suspended by saving the emulated device state.  The PV devices
	 * are all children of the xpvd nexus device.  So we search the
	 * device tree for the xpvd node to use as the root of the tree to
	 * be suspended.
	 */
	if (xpvd_dip == NULL)
		ddi_walk_devs(ddi_root_node(), check_xpvd, NULL);

	/*
	 * suspend interrupts and devices
	 */
	if (xpvd_dip != NULL)
		(void) xen_suspend_devices(ddi_get_child(xpvd_dip));
	else
		cmn_err(CE_WARN, "No PV devices found to suspend");
	SUSPEND_DEBUG("xenbus_suspend\n");
	xenbus_suspend();

	mutex_enter(&cpu_lock);

	/*
	 * Suspend on vcpu 0
	 */
	thread_affinity_set(curthread, 0);
	kpreempt_disable();

	if (ncpus > 1)
		pause_cpus(NULL, NULL);
	/*
	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
	 * any holder would have dropped it to get through pause_cpus().
	 */
	mutex_enter(&ec_lock);

	/*
	 * From here on in, we can't take locks.
	 */

	flags = intr_clear();

	SUSPEND_DEBUG("HYPERVISOR_suspend\n");
	/*
	 * At this point we suspend and sometime later resume.
	 * Note that this call may return with an indication of a cancelled
	 * for now no matter ehat the return we do a full resume of all
	 * suspended drivers, etc.
	 */
	(void) HYPERVISOR_shutdown(SHUTDOWN_suspend);

	/*
	 * Point HYPERVISOR_shared_info to the proper place.
	 */
	xatp.domid = DOMID_SELF;
	xatp.idx = 0;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.gpfn = xen_shared_info_frame;
	if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0)
		panic("Could not set shared_info page. error: %d", err);

	SUSPEND_DEBUG("gnttab_resume\n");
	gnttab_resume();

	SUSPEND_DEBUG("ec_resume\n");
	ec_resume();

	intr_restore(flags);

	if (ncpus > 1)
		start_cpus();

	mutex_exit(&ec_lock);
	mutex_exit(&cpu_lock);

	/*
	 * Now we can take locks again.
	 */

	rtcsync();

	SUSPEND_DEBUG("xenbus_resume\n");
	xenbus_resume();
	SUSPEND_DEBUG("xen_resume_devices\n");
	if (xpvd_dip != NULL)
		(void) xen_resume_devices(ddi_get_child(xpvd_dip), 0);

	thread_affinity_clear(curthread);
	kpreempt_enable();

	SUSPEND_DEBUG("finished xen_suspend_domain\n");

	cmn_err(CE_NOTE, "domain restore/migrate completed");
}
예제 #15
0
void
xen_hvm_init(void)
{
	struct cpuid_regs cp;
	uint32_t xen_signature[4], base;
	char *xen_str;
	struct xen_add_to_physmap xatp;
	xen_capabilities_info_t caps;
	pfn_t pfn;
	uint64_t msrval, val;
	extern int apix_enable;

	if (xen_hvm_inited != 0)
		return;

	xen_hvm_inited = 1;

	/*
	 * Xen's pseudo-cpuid function returns a string representing
	 * the Xen signature in %ebx, %ecx, and %edx.
	 * Loop over the base values, since it may be different if
	 * the hypervisor has hyper-v emulation switched on.
	 *
	 * %eax contains the maximum supported cpuid function.
	 */
	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
		cp.cp_eax = base;
		(void) __cpuid_insn(&cp);
		xen_signature[0] = cp.cp_ebx;
		xen_signature[1] = cp.cp_ecx;
		xen_signature[2] = cp.cp_edx;
		xen_signature[3] = 0;
		xen_str = (char *)xen_signature;
		if (strcmp("XenVMMXenVMM", xen_str)  == 0 &&
		    cp.cp_eax >= (base + 2))
			break;
	}
	if (base >= 0x40010000)
		return;

	/*
	 * cpuid function at base + 1 returns the Xen version in %eax.  The
	 * top 16 bits are the major version, the bottom 16 are the minor
	 * version.
	 */
	cp.cp_eax = base + 1;
	(void) __cpuid_insn(&cp);
	xen_major = cp.cp_eax >> 16;
	xen_minor = cp.cp_eax & 0xffff;

	/*
	 * Below version 3.1 we can't do anything special as a HVM domain;
	 * the PV drivers don't work, many hypercalls are not available,
	 * etc.
	 */
	if (xen_major < 3 || (xen_major == 3 && xen_minor < 1))
		return;

	/*
	 * cpuid function at base + 2 returns information about the
	 * hypercall page.  %eax nominally contains the number of pages
	 * with hypercall code, but according to the Xen guys, "I'll
	 * guarantee that remains one forever more, so you can just
	 * allocate a single page and get quite upset if you ever see CPUID
	 * return more than one page."  %ebx contains an MSR we use to ask
	 * Xen to remap each page at a specific pfn.
	 */
	cp.cp_eax = base + 2;
	(void) __cpuid_insn(&cp);

	/*
	 * Let Xen know where we want the hypercall page mapped.  We
	 * already have a page allocated in the .text section to simplify
	 * the wrapper code.
	 */
	pfn = va_to_pfn(&hypercall_page);
	msrval = mmu_ptob(pfn);
	wrmsr(cp.cp_ebx, msrval);

	/* Fill in the xen_info data */
	xen_info = &__xen_info;
	(void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor);

	if (hvm_get_param(HVM_PARAM_STORE_PFN, &val) < 0)
		return;
	/*
	 * The first hypercall worked, so mark hypercalls as working.
	 */
	xen_hvm_features |= XEN_HVM_HYPERCALLS;

	xen_info->store_mfn = (mfn_t)val;
	if (hvm_get_param(HVM_PARAM_STORE_EVTCHN, &val) < 0)
		return;
	xen_info->store_evtchn = (mfn_t)val;

	/* Figure out whether the hypervisor is 32-bit or 64-bit.  */
	if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) {
		((char *)(caps))[sizeof (caps) - 1] = '\0';
		if (strstr(caps, "x86_64") != NULL)
			xen_bits = 64;
		else if (strstr(caps, "x86_32") != NULL)
			xen_bits = 32;
	}

	if (xen_bits < 0)
		return;
#ifdef __amd64
	ASSERT(xen_bits == 64);
#endif

	/*
	 * Allocate space for the shared_info page and tell Xen where it
	 * is.
	 */
	xen_shared_info_frame = va_to_pfn(&hypercall_shared_info_page);
	xatp.domid = DOMID_SELF;
	xatp.idx = 0;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.gpfn = xen_shared_info_frame;
	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp) != 0)
		return;

	HYPERVISOR_shared_info = (void *)&hypercall_shared_info_page;

	/*
	 * A working HVM tlb flush hypercall was introduced in Xen 3.3.
	 */
	if (xen_major > 3 || (xen_major == 3 && xen_minor >= 3))
		xen_hvm_features |= XEN_HVM_TLBFLUSH;

	/* FIXME Disable apix for the time being */
	apix_enable = 0;
}
예제 #16
0
파일: setup.c 프로젝트: AllenWeb/linux
static unsigned long __init xen_do_chunk(unsigned long start,
					 unsigned long end, bool release)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	for (pfn = start; pfn < end; pfn++) {
		unsigned long frame;
		unsigned long mfn = pfn_to_mfn(pfn);

		if (release) {
			/* Make sure pfn exists to start with */
			if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
				continue;
			frame = mfn;
		} else {
			if (mfn != INVALID_P2M_ENTRY)
				continue;
			frame = pfn;
		}
		set_xen_guest_handle(reservation.extent_start, &frame);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
					   &reservation);
		WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
		     release ? "release" : "populate", pfn, ret);

		if (ret == 1) {
			if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
				if (release)
					break;
				set_xen_guest_handle(reservation.extent_start, &frame);
				reservation.nr_extents = 1;
				ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
							   &reservation);
				break;
			}
			len++;
		} else
			break;
	}
	if (len)
		printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
		       release ? "Freeing" : "Populating",
		       start, end, len,
		       release ? "freed" : "added");

	return len;
}

static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	return xen_do_chunk(start, end, true);
}

static unsigned long __init xen_populate_chunk(
	const struct e820entry *list, size_t map_size,
	unsigned long max_pfn, unsigned long *last_pfn,
	unsigned long credits_left)
{
	const struct e820entry *entry;
	unsigned int i;
	unsigned long done = 0;
	unsigned long dest_pfn;

	for (i = 0, entry = list; i < map_size; i++, entry++) {
		unsigned long credits = credits_left;
		unsigned long s_pfn;
		unsigned long e_pfn;
		unsigned long pfns;
		long capacity;

		if (credits <= 0)
			break;

		if (entry->type != E820_RAM)
			continue;

		e_pfn = PFN_UP(entry->addr + entry->size);

		/* We only care about E820 after the xen_start_info->nr_pages */
		if (e_pfn <= max_pfn)
			continue;

		s_pfn = PFN_DOWN(entry->addr);
		/* If the E820 falls within the nr_pages, we want to start
		 * at the nr_pages PFN.
		 * If that would mean going past the E820 entry, skip it
		 */
		if (s_pfn <= max_pfn) {
			capacity = e_pfn - max_pfn;
			dest_pfn = max_pfn;
		} else {
			/* last_pfn MUST be within E820_RAM regions */
			if (*last_pfn && e_pfn >= *last_pfn)
				s_pfn = *last_pfn;
			capacity = e_pfn - s_pfn;
			dest_pfn = s_pfn;
		}
		/* If we had filled this E820_RAM entry, go to the next one. */
		if (capacity <= 0)
			continue;

		if (credits > capacity)
			credits = capacity;

		pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false);
		done += pfns;
		credits_left -= pfns;
		*last_pfn = (dest_pfn + pfns);
	}
	return done;
}

static void __init xen_set_identity_and_release_chunk(
	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
	unsigned long *released, unsigned long *identity)
{
	unsigned long pfn;

	/*
	 * If the PFNs are currently mapped, the VA mapping also needs
	 * to be updated to be 1:1.
	 */
	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
		(void)HYPERVISOR_update_va_mapping(
			(unsigned long)__va(pfn << PAGE_SHIFT),
			mfn_pte(pfn, PAGE_KERNEL_IO), 0);

	if (start_pfn < nr_pages)
		*released += xen_release_chunk(
			start_pfn, min(end_pfn, nr_pages));

	*identity += set_phys_range_identity(start_pfn, end_pfn);
}
예제 #17
0
파일: setup.c 프로젝트: 0-T-0/ps4-linux
/**
 * machine_specific_memory_setup - Hook for machine specific memory setup.
 **/
char * __init xen_memory_setup(void)
{
	unsigned long max_pfn, pfn_s, n_pfns;
	phys_addr_t mem_end, addr, size, chunk_size;
	u32 type;
	int rc;
	struct xen_memory_map memmap;
	unsigned long max_pages;
	unsigned long extra_pages = 0;
	int i;
	int op;

	xen_parse_512gb();
	max_pfn = xen_get_pages_limit();
	max_pfn = min(max_pfn, xen_start_info->nr_pages);
	mem_end = PFN_PHYS(max_pfn);

	memmap.nr_entries = E820MAX;
	set_xen_guest_handle(memmap.buffer, xen_e820_map);

	op = xen_initial_domain() ?
		XENMEM_machine_memory_map :
		XENMEM_memory_map;
	rc = HYPERVISOR_memory_op(op, &memmap);
	if (rc == -ENOSYS) {
		BUG_ON(xen_initial_domain());
		memmap.nr_entries = 1;
		xen_e820_map[0].addr = 0ULL;
		xen_e820_map[0].size = mem_end;
		/* 8MB slack (to balance backend allocations). */
		xen_e820_map[0].size += 8ULL << 20;
		xen_e820_map[0].type = E820_RAM;
		rc = 0;
	}
	BUG_ON(rc);
	BUG_ON(memmap.nr_entries == 0);
	xen_e820_map_entries = memmap.nr_entries;

	/*
	 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
	 * regions, so if we're using the machine memory map leave the
	 * region as RAM as it is in the pseudo-physical map.
	 *
	 * UNUSABLE regions in domUs are not handled and will need
	 * a patch in the future.
	 */
	if (xen_initial_domain())
		xen_ignore_unusable();

	/* Make sure the Xen-supplied memory map is well-ordered. */
	sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
			  &xen_e820_map_entries);

	max_pages = xen_get_max_pages();

	/* How many extra pages do we need due to remapping? */
	max_pages += xen_count_remap_pages(max_pfn);

	if (max_pages > max_pfn)
		extra_pages += max_pages - max_pfn;

	/*
	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
	 * factor the base size.  On non-highmem systems, the base
	 * size is the full initial memory allocation; on highmem it
	 * is limited to the max size of lowmem, so that it doesn't
	 * get completely filled.
	 *
	 * Make sure we have no memory above max_pages, as this area
	 * isn't handled by the p2m management.
	 *
	 * In principle there could be a problem in lowmem systems if
	 * the initial memory is also very large with respect to
	 * lowmem, but we won't try to deal with that here.
	 */
	extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
			   extra_pages, max_pages - max_pfn);
	i = 0;
	addr = xen_e820_map[0].addr;
	size = xen_e820_map[0].size;
	while (i < xen_e820_map_entries) {
		bool discard = false;

		chunk_size = size;
		type = xen_e820_map[i].type;

		if (type == E820_RAM) {
			if (addr < mem_end) {
				chunk_size = min(size, mem_end - addr);
			} else if (extra_pages) {
				chunk_size = min(size, PFN_PHYS(extra_pages));
				pfn_s = PFN_UP(addr);
				n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
				extra_pages -= n_pfns;
				xen_add_extra_mem(pfn_s, n_pfns);
				xen_max_p2m_pfn = pfn_s + n_pfns;
			} else
				discard = true;
		}

		if (!discard)
			xen_align_and_add_e820_region(addr, chunk_size, type);

		addr += chunk_size;
		size -= chunk_size;
		if (size == 0) {
			i++;
			if (i < xen_e820_map_entries) {
				addr = xen_e820_map[i].addr;
				size = xen_e820_map[i].size;
			}
		}
	}

	/*
	 * Set the rest as identity mapped, in case PCI BARs are
	 * located here.
	 */
	set_phys_range_identity(addr / PAGE_SIZE, ~0ul);

	/*
	 * In domU, the ISA region is normal, usable memory, but we
	 * reserve ISA memory anyway because too many things poke
	 * about in there.
	 */
	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
			E820_RESERVED);

	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

	/*
	 * Check whether the kernel itself conflicts with the target E820 map.
	 * Failing now is better than running into weird problems later due
	 * to relocating (and even reusing) pages with kernel text or data.
	 */
	if (xen_is_e820_reserved(__pa_symbol(_text),
			__pa_symbol(__bss_stop) - __pa_symbol(_text))) {
		xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n");
		BUG();
	}

	/*
	 * Check for a conflict of the hypervisor supplied page tables with
	 * the target E820 map.
	 */
	xen_pt_check_e820();

	xen_reserve_xen_mfnlist();

	/* Check for a conflict of the initrd with the target E820 map. */
	if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
				 boot_params.hdr.ramdisk_size)) {
		phys_addr_t new_area, start, size;

		new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
		if (!new_area) {
			xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
			BUG();
		}

		start = boot_params.hdr.ramdisk_image;
		size = boot_params.hdr.ramdisk_size;
		xen_phys_memcpy(new_area, start, size);
		pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
			start, start + size, new_area, new_area + size);
		memblock_free(start, size);
		boot_params.hdr.ramdisk_image = new_area;
		boot_params.ext_ramdisk_image = new_area >> 32;
	}
예제 #18
0
static unsigned long __init xen_do_chunk(unsigned long start,
					 unsigned long end, bool release)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
	unsigned long pfn;
	int ret;

	for (pfn = start; pfn < end; pfn++) {
		unsigned long frame;
		unsigned long mfn = pfn_to_mfn(pfn);

		if (release) {
			/* Make sure pfn exists to start with */
			if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
				continue;
			frame = mfn;
		} else {
			if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
				continue;
			frame = pfn;
		}
		set_xen_guest_handle(reservation.extent_start, &frame);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
					   &reservation);
		WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
		     release ? "release" : "populate", pfn, ret);

		if (ret == 1) {
			if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
				if (release)
					break;
				set_xen_guest_handle(reservation.extent_start, &frame);
				reservation.nr_extents = 1;
				ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
							   &reservation);
				break;
			}
			len++;
		} else
			break;
	}
	if (len)
		printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
		       release ? "Freeing" : "Populating",
		       start, end, len,
		       release ? "freed" : "added");

	return len;
}

static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	/*
	 * Xen already ballooned out the E820 non RAM regions for us
	 * and set them up properly in EPT.
	 */
	if (xen_feature(XENFEAT_auto_translated_physmap))
		return end - start;

	return xen_do_chunk(start, end, true);
}

static unsigned long __init xen_populate_chunk(
	const struct e820entry *list, size_t map_size,
	unsigned long max_pfn, unsigned long *last_pfn,
	unsigned long credits_left)
{
	const struct e820entry *entry;
	unsigned int i;
	unsigned long done = 0;
	unsigned long dest_pfn;

	for (i = 0, entry = list; i < map_size; i++, entry++) {
		unsigned long s_pfn;
		unsigned long e_pfn;
		unsigned long pfns;
		long capacity;

		if (credits_left <= 0)
			break;

		if (entry->type != E820_RAM)
			continue;

		e_pfn = PFN_DOWN(entry->addr + entry->size);

		/* We only care about E820 after the xen_start_info->nr_pages */
		if (e_pfn <= max_pfn)
			continue;

		s_pfn = PFN_UP(entry->addr);
		/* If the E820 falls within the nr_pages, we want to start
		 * at the nr_pages PFN.
		 * If that would mean going past the E820 entry, skip it
		 */
		if (s_pfn <= max_pfn) {
			capacity = e_pfn - max_pfn;
			dest_pfn = max_pfn;
		} else {
			capacity = e_pfn - s_pfn;
			dest_pfn = s_pfn;
		}

		if (credits_left < capacity)
			capacity = credits_left;

		pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
		done += pfns;
		*last_pfn = (dest_pfn + pfns);
		if (pfns < capacity)
			break;
		credits_left -= pfns;
	}
	return done;
}

static void __init xen_set_identity_and_release_chunk(
	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
	unsigned long *released, unsigned long *identity)
{
	unsigned long pfn;

	/*
	 * If the PFNs are currently mapped, clear the mappings
	 * (except for the ISA region which must be 1:1 mapped) to
	 * release the refcounts (in Xen) on the original frames.
	 */

	/*
	 * PVH E820 matches the hypervisor's P2M which means we need to
	 * account for the proper values of *release and *identity.
	 */
	for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) &&
	     pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
		pte_t pte = __pte_ma(0);

		if (pfn < PFN_UP(ISA_END_ADDRESS))
			pte = mfn_pte(pfn, PAGE_KERNEL_IO);

		(void)HYPERVISOR_update_va_mapping(
			(unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
	}

	if (start_pfn < nr_pages)
		*released += xen_release_chunk(
			start_pfn, min(end_pfn, nr_pages));

	*identity += set_phys_range_identity(start_pfn, end_pfn);
}
예제 #19
0
static int
privcmd_HYPERVISOR_memory_op(int cmd, void *arg)
{
	int error = 0;
	import_export_t op_ie, sub_ie, gpfn_ie, mfn_ie;
	union {
		domid_t domid;
		struct xen_memory_reservation resv;
		struct xen_machphys_mfn_list xmml;
		struct xen_add_to_physmap xatp;
		struct xen_memory_map mm;
		struct xen_foreign_memory_map fmm;
	} op_arg;

	op_ie = sub_ie = gpfn_ie = mfn_ie = null_ie;

	switch (cmd) {
	case XENMEM_increase_reservation:
	case XENMEM_decrease_reservation:
	case XENMEM_populate_physmap: {
		ulong_t *taddr;

		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.resv),
		    IE_IMPEXP) != 0)
			return (-X_EFAULT);

		error = import_handle(&sub_ie, &op_arg.resv.extent_start,
		    (op_arg.resv.nr_extents * sizeof (ulong_t)), IE_IMPEXP);

		if (error == -X_EFAULT)
			/*LINTED: constant in conditional context*/
			get_xen_guest_handle(taddr, op_arg.resv.extent_start);
		else
			taddr = sub_ie.ie_kaddr;

		switch (cmd) {
		case XENMEM_increase_reservation:
			DTRACE_XPV4(increase__reservation__start,
			    domid_t, op_arg.resv.domid,
			    ulong_t, op_arg.resv.nr_extents,
			    uint_t, op_arg.resv.extent_order,
			    ulong_t *, taddr);
			break;
		case XENMEM_decrease_reservation:
			DTRACE_XPV4(decrease__reservation__start,
			    domid_t, op_arg.resv.domid,
			    ulong_t, op_arg.resv.nr_extents,
			    uint_t, op_arg.resv.extent_order,
			    ulong_t *, taddr);
			break;
		case XENMEM_populate_physmap:
			DTRACE_XPV3(populate__physmap__start,
			    domid_t, op_arg.resv.domid,
			    ulong_t, op_arg.resv.nr_extents,
			    ulong_t *, taddr);
			break;
		}

		break;
	}

	case XENMEM_maximum_ram_page:
		break;

	case XENMEM_current_reservation:
	case XENMEM_maximum_reservation:
	case XENMEM_maximum_gpfn:
		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.domid),
		    IE_IMPEXP) != 0)
			return (-X_EFAULT);
		break;

	case XENMEM_machphys_mfn_list: {
		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xmml),
		    IE_IMPEXP) != 0)
			return (-X_EFAULT);

		error = import_handle(&sub_ie, &op_arg.xmml.extent_start,
		    (op_arg.xmml.max_extents * sizeof (ulong_t)), IE_IMPEXP);
		break;
	}

	case XENMEM_add_to_physmap:
		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xatp),
		    IE_IMPEXP) != 0)
			return (-X_EFAULT);
		DTRACE_XPV4(add__to__physmap__start, domid_t,
		    op_arg.xatp.domid, uint_t, op_arg.xatp.space, ulong_t,
		    op_arg.xatp.idx, ulong_t, op_arg.xatp.gpfn);
		break;

	case XENMEM_memory_map:
	case XENMEM_machine_memory_map: {
		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.mm),
		    IE_EXPORT) != 0)
			return (-X_EFAULT);

		/*
		 * XXPV: ugh. e820entry is packed, but not in the kernel, since
		 * we remove all attributes; seems like this is a nice way to
		 * break mysteriously.
		 */
		error = import_handle(&sub_ie, &op_arg.mm.buffer,
		    (op_arg.mm.nr_entries * 20), IE_IMPEXP);
		break;
	}

	case XENMEM_set_memory_map: {
		struct xen_memory_map *taddr;
		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.fmm),
		    IE_IMPORT) != 0)
			return (-X_EFAULT);

		/*
		 * As above.
		 */
		error = import_handle(&sub_ie, &op_arg.fmm.map.buffer,
		    (op_arg.fmm.map.nr_entries * 20), IE_IMPEXP);

		if (error == -X_EFAULT)
			/*LINTED: constant in conditional context*/
			get_xen_guest_handle(taddr, op_arg.fmm.map.buffer);
		else
			taddr = sub_ie.ie_kaddr;
		DTRACE_XPV3(set__memory__map__start, domid_t,
		    op_arg.fmm.domid, int, op_arg.fmm.map.nr_entries,
		    struct xen_memory_map *, taddr);
		break;
	}

	default:
#ifdef DEBUG
		printf("unrecognized HYPERVISOR_memory_op %d\n", cmd);
#endif
		return (-X_EINVAL);
	}

	if (error == 0)
		error = HYPERVISOR_memory_op(cmd,
		    (arg == NULL) ? NULL: &op_arg);

	export_buffer(&op_ie, &error);
	export_buffer(&sub_ie, &error);
	export_buffer(&gpfn_ie, &error);
	export_buffer(&mfn_ie, &error);

	switch (cmd) {
	case XENMEM_increase_reservation:
		DTRACE_XPV1(increase__reservation__end, int, error);
		break;
	case XENMEM_decrease_reservation:
		DTRACE_XPV1(decrease__reservation__end, int, error);
		break;
	case XENMEM_populate_physmap:
		DTRACE_XPV1(populate__physmap__end, int, error);
		break;
	case XENMEM_add_to_physmap:
		DTRACE_XPV1(add__to__physmap__end, int, error);
		break;
	case XENMEM_set_memory_map:
		DTRACE_XPV1(set__memory__map__end, int, error);
		break;
	}
	return (error);
}
예제 #20
0
/**
 * machine_specific_memory_setup - Hook for machine specific memory setup.
 **/
char * __init xen_memory_setup(void)
{
	static struct e820entry map[E820MAX] __initdata;

	unsigned long max_pfn = xen_start_info->nr_pages;
	unsigned long long mem_end;
	int rc;
	struct xen_memory_map memmap;
	unsigned long max_pages;
	unsigned long last_pfn = 0;
	unsigned long extra_pages = 0;
	unsigned long populated;
	int i;
	int op;

	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
	mem_end = PFN_PHYS(max_pfn);

	memmap.nr_entries = E820MAX;
	set_xen_guest_handle(memmap.buffer, map);

	op = xen_initial_domain() ?
		XENMEM_machine_memory_map :
		XENMEM_memory_map;
	rc = HYPERVISOR_memory_op(op, &memmap);
	if (rc == -ENOSYS) {
		BUG_ON(xen_initial_domain());
		memmap.nr_entries = 1;
		map[0].addr = 0ULL;
		map[0].size = mem_end;
		/* 8MB slack (to balance backend allocations). */
		map[0].size += 8ULL << 20;
		map[0].type = E820_RAM;
		rc = 0;
	}
	BUG_ON(rc);

	/*
	 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
	 * regions, so if we're using the machine memory map leave the
	 * region as RAM as it is in the pseudo-physical map.
	 *
	 * UNUSABLE regions in domUs are not handled and will need
	 * a patch in the future.
	 */
	if (xen_initial_domain())
		xen_ignore_unusable(map, memmap.nr_entries);

	/* Make sure the Xen-supplied memory map is well-ordered. */
	sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);

	max_pages = xen_get_max_pages();
	if (max_pages > max_pfn)
		extra_pages += max_pages - max_pfn;

	/*
	 * Set P2M for all non-RAM pages and E820 gaps to be identity
	 * type PFNs.  Any RAM pages that would be made inaccesible by
	 * this are first released.
	 */
	xen_released_pages = xen_set_identity_and_release(
		map, memmap.nr_entries, max_pfn);

	/*
	 * Populate back the non-RAM pages and E820 gaps that had been
	 * released. */
	populated = xen_populate_chunk(map, memmap.nr_entries,
			max_pfn, &last_pfn, xen_released_pages);

	xen_released_pages -= populated;
	extra_pages += xen_released_pages;

	if (last_pfn > max_pfn) {
		max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);
		mem_end = PFN_PHYS(max_pfn);
	}
	/*
	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
	 * factor the base size.  On non-highmem systems, the base
	 * size is the full initial memory allocation; on highmem it
	 * is limited to the max size of lowmem, so that it doesn't
	 * get completely filled.
	 *
	 * In principle there could be a problem in lowmem systems if
	 * the initial memory is also very large with respect to
	 * lowmem, but we won't try to deal with that here.
	 */
	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
			  extra_pages);
	i = 0;
	while (i < memmap.nr_entries) {
		u64 addr = map[i].addr;
		u64 size = map[i].size;
		u32 type = map[i].type;

		if (type == E820_RAM) {
			if (addr < mem_end) {
				size = min(size, mem_end - addr);
			} else if (extra_pages) {
				size = min(size, (u64)extra_pages * PAGE_SIZE);
				extra_pages -= size / PAGE_SIZE;
				xen_add_extra_mem(addr, size);
			} else
				type = E820_UNUSABLE;
		}

		xen_align_and_add_e820_region(addr, size, type);

		map[i].addr += size;
		map[i].size -= size;
		if (map[i].size == 0)
			i++;
	}

	/*
	 * In domU, the ISA region is normal, usable memory, but we
	 * reserve ISA memory anyway because too many things poke
	 * about in there.
	 */
	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
			E820_RESERVED);

	/*
	 * Reserve Xen bits:
	 *  - mfn_list
	 *  - xen_start_info
	 * See comment above "struct start_info" in <xen/interface/xen.h>
	 * We tried to make the the memblock_reserve more selective so
	 * that it would be clear what region is reserved. Sadly we ran
	 * in the problem wherein on a 64-bit hypervisor with a 32-bit
	 * initial domain, the pt_base has the cr3 value which is not
	 * neccessarily where the pagetable starts! As Jan put it: "
	 * Actually, the adjustment turns out to be correct: The page
	 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
	 * "first L2", "first L3", so the offset to the page table base is
	 * indeed 2. When reading xen/include/public/xen.h's comment
	 * very strictly, this is not a violation (since there nothing is said
	 * that the first thing in the page table space is pointed to by
	 * pt_base; I admit that this seems to be implied though, namely
	 * do I think that it is implied that the page table space is the
	 * range [pt_base, pt_base + nt_pt_frames), whereas that
	 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
	 * which - without a priori knowledge - the kernel would have
	 * difficulty to figure out)." - so lets just fall back to the
	 * easy way and reserve the whole region.
	 */
	memblock_reserve(__pa(xen_start_info->mfn_list),
			 xen_start_info->pt_base - xen_start_info->mfn_list);

	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

	return "Xen";
}
예제 #21
0
char * __init xen_memory_setup(void)
{
	static struct e820entry map[E820MAX] __initdata;

	unsigned long max_pfn = xen_start_info->nr_pages;
	unsigned long long mem_end;
	int rc;
	struct xen_memory_map memmap;
	unsigned long max_pages;
	unsigned long extra_pages = 0;
	int i;
	int op;

	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
	mem_end = PFN_PHYS(max_pfn);

	memmap.nr_entries = E820MAX;
	set_xen_guest_handle(memmap.buffer, map);

	op = xen_initial_domain() ?
		XENMEM_machine_memory_map :
		XENMEM_memory_map;
	rc = HYPERVISOR_memory_op(op, &memmap);
	if (rc == -ENOSYS) {
		BUG_ON(xen_initial_domain());
		memmap.nr_entries = 1;
		map[0].addr = 0ULL;
		map[0].size = mem_end;
		/*                                             */
		map[0].size += 8ULL << 20;
		map[0].type = E820_RAM;
		rc = 0;
	}
	BUG_ON(rc);

	/*                                                        */
	sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);

	max_pages = xen_get_max_pages();
	if (max_pages > max_pfn)
		extra_pages += max_pages - max_pfn;

	/*
                                                              
                                                               
                            
  */
	xen_released_pages = xen_set_identity_and_release(
		map, memmap.nr_entries, max_pfn);
	extra_pages += xen_released_pages;

	/*
                                                         
                                                           
                                                             
                                                            
                          
   
                                                              
                                                         
                                                    
  */
	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
			  extra_pages);

	i = 0;
	while (i < memmap.nr_entries) {
		u64 addr = map[i].addr;
		u64 size = map[i].size;
		u32 type = map[i].type;

		if (type == E820_RAM) {
			if (addr < mem_end) {
				size = min(size, mem_end - addr);
			} else if (extra_pages) {
				size = min(size, (u64)extra_pages * PAGE_SIZE);
				extra_pages -= size / PAGE_SIZE;
				xen_add_extra_mem(addr, size);
			} else
				type = E820_UNUSABLE;
		}

		xen_align_and_add_e820_region(addr, size, type);

		map[i].addr += size;
		map[i].size -= size;
		if (map[i].size == 0)
			i++;
	}

	/*
                                                            
                                                          
                   
  */
	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
			E820_RESERVED);

	/*
                     
               
                     
                                                                  
  */
	memblock_reserve(__pa(xen_start_info->mfn_list),
			 xen_start_info->pt_base - xen_start_info->mfn_list);

	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

	return "Xen";
}
예제 #22
0
/**
 * machine_specific_memory_setup - Hook for machine specific memory setup.
 **/
char * __init xen_memory_setup(void)
{
	static struct e820entry map[E820MAX] __initdata;
	static struct e820entry map_raw[E820MAX] __initdata;

	unsigned long max_pfn = xen_start_info->nr_pages;
	unsigned long long mem_end;
	int rc;
	struct xen_memory_map memmap;
	unsigned long extra_pages = 0;
	unsigned long extra_limit;
	unsigned long identity_pages = 0;
	int i;
	int op;

	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
	mem_end = PFN_PHYS(max_pfn);

	memmap.nr_entries = E820MAX;
	set_xen_guest_handle(memmap.buffer, map);

	op = xen_initial_domain() ?
		XENMEM_machine_memory_map :
		XENMEM_memory_map;
	rc = HYPERVISOR_memory_op(op, &memmap);
	if (rc == -ENOSYS) {
		BUG_ON(xen_initial_domain());
		memmap.nr_entries = 1;
		map[0].addr = 0ULL;
		map[0].size = mem_end;
		/* 8MB slack (to balance backend allocations). */
		map[0].size += 8ULL << 20;
		map[0].type = E820_RAM;
		rc = 0;
	}
	BUG_ON(rc);

	memcpy(map_raw, map, sizeof(map));
	e820.nr_map = 0;
	xen_extra_mem_start = mem_end;
	for (i = 0; i < memmap.nr_entries; i++) {
		unsigned long long end;

		/* Guard against non-page aligned E820 entries. */
		if (map[i].type == E820_RAM)
			map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;

		end = map[i].addr + map[i].size;
		if (map[i].type == E820_RAM && end > mem_end) {
			/* RAM off the end - may be partially included */
			u64 delta = min(map[i].size, end - mem_end);

			map[i].size -= delta;
			end -= delta;

			extra_pages += PFN_DOWN(delta);
			/*
			 * Set RAM below 4GB that is not for us to be unusable.
			 * This prevents "System RAM" address space from being
			 * used as potential resource for I/O address (happens
			 * when 'allocate_resource' is called).
			 */
			if (delta &&
				(xen_initial_domain() && end < 0x100000000ULL))
				e820_add_region(end, delta, E820_UNUSABLE);
		}

		if (map[i].size > 0 && end > xen_extra_mem_start)
			xen_extra_mem_start = end;

		/* Add region if any remains */
		if (map[i].size > 0)
			e820_add_region(map[i].addr, map[i].size, map[i].type);
	}
	/* Align the balloon area so that max_low_pfn does not get set
	 * to be at the _end_ of the PCI gap at the far end (fee01000).
	 * Note that xen_extra_mem_start gets set in the loop above to be
	 * past the last E820 region. */
	if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
		xen_extra_mem_start = (1ULL<<32);

	/*
	 * In domU, the ISA region is normal, usable memory, but we
	 * reserve ISA memory anyway because too many things poke
	 * about in there.
	 *
	 * In Dom0, the host E820 information can leave gaps in the
	 * ISA range, which would cause us to release those pages.  To
	 * avoid this, we unconditionally reserve them here.
	 */
	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
			E820_RESERVED);

	/*
	 * Reserve Xen bits:
	 *  - mfn_list
	 *  - xen_start_info
	 * See comment above "struct start_info" in <xen/interface/xen.h>
	 */
	memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
		      __pa(xen_start_info->pt_base),
			"XEN START INFO");

	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

	extra_limit = xen_get_max_pages();
	if (max_pfn + extra_pages > extra_limit) {
		if (extra_limit > max_pfn)
			extra_pages = extra_limit - max_pfn;
		else
			extra_pages = 0;
	}

	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);

	/*
	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
	 * factor the base size.  On non-highmem systems, the base
	 * size is the full initial memory allocation; on highmem it
	 * is limited to the max size of lowmem, so that it doesn't
	 * get completely filled.
	 *
	 * In principle there could be a problem in lowmem systems if
	 * the initial memory is also very large with respect to
	 * lowmem, but we won't try to deal with that here.
	 */
	extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
			  max_pfn + extra_pages);

	if (extra_limit >= max_pfn)
		extra_pages = extra_limit - max_pfn;
	else
		extra_pages = 0;

	xen_add_extra_mem(extra_pages);

	/*
	 * Set P2M for all non-RAM pages and E820 gaps to be identity
	 * type PFNs. We supply it with the non-sanitized version
	 * of the E820.
	 */
	identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
	return "Xen";
}
예제 #23
0
static int increase_reservation(unsigned long nr_pages)
{
	unsigned long  pfn, i, flags;
	struct page   *page;
	long           rc;
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	if (nr_pages > ARRAY_SIZE(frame_list))
		nr_pages = ARRAY_SIZE(frame_list);

	balloon_lock(flags);

	page = balloon_first_page();
	for (i = 0; i < nr_pages; i++) {
		BUG_ON(page == NULL);
		frame_list[i] = page_to_pfn(page);;
		page = balloon_next_page(page);
	}

	set_xen_guest_handle(reservation.extent_start, frame_list);
	reservation.nr_extents   = nr_pages;
	rc = HYPERVISOR_memory_op(
		XENMEM_populate_physmap, &reservation);
	if (rc < nr_pages) {
		int ret;
		/* We hit the Xen hard limit: reprobe. */
		set_xen_guest_handle(reservation.extent_start, frame_list);
		reservation.nr_extents   = rc;
		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
				&reservation);
		BUG_ON(ret != rc);
		hard_limit = current_pages + rc - driver_pages;
		goto out;
	}

	for (i = 0; i < nr_pages; i++) {
		page = balloon_retrieve();
		BUG_ON(page == NULL);

		pfn = page_to_pfn(page);
		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
		       phys_to_machine_mapping_valid(pfn));

		/* Update P->M and M->P tables. */
		set_phys_to_machine(pfn, frame_list[i]);

#ifdef CONFIG_XEN
		xen_machphys_update(frame_list[i], pfn);

		/* Link back into the page tables if not highmem. */
		if (pfn < max_low_pfn) {
			int ret;
			ret = HYPERVISOR_update_va_mapping(
				(unsigned long)__va(pfn << PAGE_SHIFT),
				pfn_pte_ma(frame_list[i], PAGE_KERNEL),
				0);
			BUG_ON(ret);
		}
#endif
		/* Relinquish the page back to the allocator. */
		ClearPageReserved(page);
		set_page_count(page, 1);
		__free_page(page);
	}

	current_pages += nr_pages;
	totalram_pages = current_pages;

 out:
	balloon_unlock(flags);

	return 0;
}

static int decrease_reservation(unsigned long nr_pages)
{
	unsigned long  pfn, i, flags;
	struct page   *page;
	void          *v;
	int            need_sleep = 0;
	int ret;
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	if (nr_pages > ARRAY_SIZE(frame_list))
		nr_pages = ARRAY_SIZE(frame_list);

	for (i = 0; i < nr_pages; i++) {
		if ((page = alloc_page(GFP_BALLOON)) == NULL) {
			nr_pages = i;
			need_sleep = 1;
			break;
		}

		pfn = page_to_pfn(page);
		frame_list[i] = pfn_to_mfn(pfn);

		if (!PageHighMem(page)) {
			v = phys_to_virt(pfn << PAGE_SHIFT);
			scrub_pages(v, 1);
#ifdef CONFIG_XEN
			ret = HYPERVISOR_update_va_mapping(
				(unsigned long)v, __pte_ma(0), 0);
			BUG_ON(ret);
#endif
		}
#ifdef CONFIG_XEN_SCRUB_PAGES
		else {
			v = kmap(page);
			scrub_pages(v, 1);
			kunmap(page);
		}
#endif
	}

#ifdef CONFIG_XEN
	/* Ensure that ballooned highmem pages don't have kmaps. */
	kmap_flush_unused();
	flush_tlb_all();
#endif

	balloon_lock(flags);

	/* No more mappings: invalidate P2M and add to balloon. */
	for (i = 0; i < nr_pages; i++) {
		pfn = mfn_to_pfn(frame_list[i]);
		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
		balloon_append(pfn_to_page(pfn));
	}

	set_xen_guest_handle(reservation.extent_start, frame_list);
	reservation.nr_extents   = nr_pages;
	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
	BUG_ON(ret != nr_pages);

	current_pages -= nr_pages;
	totalram_pages = current_pages;

	balloon_unlock(flags);

	return need_sleep;
}

/*
 * We avoid multiple worker processes conflicting via the balloon mutex.
 * We may of course race updates of the target counts (which are protected
 * by the balloon lock), or with changes to the Xen hard limit, but we will
 * recover from these in time.
 */
static void balloon_process(void *unused)
{
	int need_sleep = 0;
	long credit;

	down(&balloon_mutex);

	do {
		credit = current_target() - current_pages;
		if (credit > 0)
			need_sleep = (increase_reservation(credit) != 0);
		if (credit < 0)
			need_sleep = (decrease_reservation(-credit) != 0);

#ifndef CONFIG_PREEMPT
		if (need_resched())
			schedule();
#endif
	} while ((credit != 0) && !need_sleep);

	/* Schedule more work if there is some still to be done. */
	if (current_target() != current_pages)
		mod_timer(&balloon_timer, jiffies + HZ);

	up(&balloon_mutex);
}
예제 #24
0
static int xen_map_device_mmio(const struct resource *resources,
			       unsigned int count)
{
	unsigned int i, j, nr;
	int rc = 0;
	const struct resource *r;
	xen_pfn_t *gpfns;
	xen_ulong_t *idxs;
	int *errs;

	for (i = 0; i < count; i++) {
		struct xen_add_to_physmap_range xatp = {
			.domid = DOMID_SELF,
			.space = XENMAPSPACE_dev_mmio
		};

		r = &resources[i];
		nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE);
		if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0))
			continue;

		gpfns = kzalloc(sizeof(xen_pfn_t) * nr, GFP_KERNEL);
		idxs = kzalloc(sizeof(xen_ulong_t) * nr, GFP_KERNEL);
		errs = kzalloc(sizeof(int) * nr, GFP_KERNEL);
		if (!gpfns || !idxs || !errs) {
			kfree(gpfns);
			kfree(idxs);
			kfree(errs);
			rc = -ENOMEM;
			goto unmap;
		}

		for (j = 0; j < nr; j++) {
			/*
			 * The regions are always mapped 1:1 to DOM0 and this is
			 * fine because the memory map for DOM0 is the same as
			 * the host (except for the RAM).
			 */
			gpfns[j] = XEN_PFN_DOWN(r->start) + j;
			idxs[j] = XEN_PFN_DOWN(r->start) + j;
		}

		xatp.size = nr;

		set_xen_guest_handle(xatp.gpfns, gpfns);
		set_xen_guest_handle(xatp.idxs, idxs);
		set_xen_guest_handle(xatp.errs, errs);

		rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp);
		kfree(gpfns);
		kfree(idxs);
		kfree(errs);
		if (rc)
			goto unmap;
	}

	return rc;

unmap:
	xen_unmap_device_mmio(resources, i);
	return rc;
}
예제 #25
0
static int __init xen_guest_init(void)
{
	struct xen_add_to_physmap xatp;
	static struct shared_info *shared_info_page = 0;
	struct device_node *node;
	int len;
	const char *s = NULL;
	const char *version = NULL;
	const char *xen_prefix = "xen,xen-";
	struct resource res;

	node = of_find_compatible_node(NULL, NULL, "xen,xen");
	if (!node) {
		pr_debug("No Xen support\n");
		return 0;
	}
	s = of_get_property(node, "compatible", &len);
	if (strlen(xen_prefix) + 3  < len &&
			!strncmp(xen_prefix, s, strlen(xen_prefix)))
		version = s + strlen(xen_prefix);
	if (version == NULL) {
		pr_debug("Xen version not found\n");
		return 0;
	}
	if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
		return 0;
	xen_hvm_resume_frames = res.start >> PAGE_SHIFT;
	xen_events_irq = irq_of_parse_and_map(node, 0);
	pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n",
			version, xen_events_irq, xen_hvm_resume_frames);
	xen_domain_type = XEN_HVM_DOMAIN;

	xen_setup_features();
	if (xen_feature(XENFEAT_dom0))
		xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
	else
		xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);

	if (!shared_info_page)
		shared_info_page = (struct shared_info *)
			get_zeroed_page(GFP_KERNEL);
	if (!shared_info_page) {
		pr_err("not enough memory\n");
		return -ENOMEM;
	}
	xatp.domid = DOMID_SELF;
	xatp.idx = 0;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
		BUG();

	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;

	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
	 * page, we use it in the event channel upcall and in some pvclock
	 * related functions. 
	 * The shared info contains exactly 1 CPU (the boot CPU). The guest
	 * is required to use VCPUOP_register_vcpu_info to place vcpu info
	 * for secondary CPUs as they are brought up.
	 * For uniformity we use VCPUOP_register_vcpu_info even on cpu0.
	 */
	xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info),
			                       sizeof(struct vcpu_info));
	if (xen_vcpu_info == NULL)
		return -ENOMEM;

	gnttab_init();
	if (!xen_initial_domain())
		xenbus_probe(NULL);

	return 0;
}
예제 #26
0
파일: balloon.c 프로젝트: bahamas10/openzfs
/*
 * balloon_free_pages()
 *    free page_cnt pages, using any combination of mfns, pfns, and kva as long
 *    as they refer to the same mapping.  If an array of mfns is passed in, we
 *    assume they were already cleared.  Otherwise, we need to zero the pages
 *    before giving them back to the hypervisor. kva space is not free'd up in
 *    case the caller wants to re-use it.
 */
long
balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
{
	xen_memory_reservation_t memdec;
	mfn_t mfn;
	pfn_t pfn;
	uint_t i;
	long e;


#if DEBUG
	/* make sure kva is page aligned and maps to first pfn */
	if (kva != NULL) {
		ASSERT(((uintptr_t)kva & PAGEOFFSET) == 0);
		if (pfns != NULL) {
			ASSERT(hat_getpfnum(kas.a_hat, kva) == pfns[0]);
		}
	}
#endif

	/* if we have a kva, we can clean all pages with just one bzero */
	if ((kva != NULL) && balloon_zero_memory) {
		bzero(kva, (page_cnt * PAGESIZE));
	}

	/* if we were given a kva and/or a pfn */
	if ((kva != NULL) || (pfns != NULL)) {

		/*
		 * All the current callers only pass 1 page when using kva or
		 * pfns, and use mfns when passing multiple pages.  If that
		 * assumption is changed, the following code will need some
		 * work.  The following ASSERT() guarantees we're respecting
		 * the io locking quota.
		 */
		ASSERT(page_cnt < bln_contig_list_quota);

		/* go through all the pages */
		for (i = 0; i < page_cnt; i++) {

			/* get the next pfn */
			if (pfns == NULL) {
				pfn = hat_getpfnum(kas.a_hat,
				    (kva + (PAGESIZE * i)));
			} else {
				pfn = pfns[i];
			}

			/*
			 * if we didn't already zero this page, do it now. we
			 * need to do this *before* we give back the MFN
			 */
			if ((kva == NULL) && (balloon_zero_memory)) {
				pfnzero(pfn, 0, PAGESIZE);
			}

			/*
			 * unmap the pfn. We don't free up the kva vmem space
			 * so the caller can re-use it. The page must be
			 * unmapped before it is given back to the hypervisor.
			 */
			if (kva != NULL) {
				hat_unload(kas.a_hat, (kva + (PAGESIZE * i)),
				    PAGESIZE, HAT_UNLOAD_UNMAP);
			}

			/* grab the mfn before the pfn is marked as invalid */
			mfn = pfn_to_mfn(pfn);

			/* mark the pfn as invalid */
			reassign_pfn(pfn, MFN_INVALID);

			/*
			 * if we weren't given an array of MFNs, we need to
			 * free them up one at a time. Otherwise, we'll wait
			 * until later and do it in one hypercall
			 */
			if (mfns == NULL) {
				bzero(&memdec, sizeof (memdec));
				/*LINTED: constant in conditional context*/
				set_xen_guest_handle(memdec.extent_start, &mfn);
				memdec.domid = DOMID_SELF;
				memdec.nr_extents = 1;
				e = HYPERVISOR_memory_op(
				    XENMEM_decrease_reservation, &memdec);
				if (e != 1) {
					cmn_err(CE_PANIC, "balloon: unable to "
					    "give a page back to the "
					    "hypervisor.\n");
				}
			}
		}
	}

	/*
	 * if we were passed in MFNs, we haven't free'd them up yet. We can
	 * do it with one call.
	 */
	if (mfns != NULL) {
		bzero(&memdec, sizeof (memdec));
		/*LINTED: constant in conditional context*/
		set_xen_guest_handle(memdec.extent_start, mfns);
		memdec.domid = DOMID_SELF;
		memdec.nr_extents = page_cnt;
		e = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &memdec);
		if (e != page_cnt) {
			cmn_err(CE_PANIC, "balloon: unable to give pages back "
			    "to the hypervisor.\n");
		}
	}

	atomic_add_long((ulong_t *)&bln_stats.bln_hv_pages, page_cnt);
	return (page_cnt);
}
예제 #27
0
파일: setup.c 프로젝트: F4uzan/skernel_u0
/**
 * machine_specific_memory_setup - Hook for machine specific memory setup.
 **/
char * __init xen_memory_setup(void)
{
	static struct e820entry map[E820MAX] __initdata;

	unsigned long max_pfn = xen_start_info->nr_pages;
	unsigned long long mem_end;
	int rc;
	struct xen_memory_map memmap;
	unsigned long max_pages;
	unsigned long extra_pages = 0;
	int i;
	int op;

	max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
	mem_end = PFN_PHYS(max_pfn);

	memmap.nr_entries = E820MAX;
	set_xen_guest_handle(memmap.buffer, map);

	op = xen_initial_domain() ?
		XENMEM_machine_memory_map :
		XENMEM_memory_map;
	rc = HYPERVISOR_memory_op(op, &memmap);
	if (rc == -ENOSYS) {
		BUG_ON(xen_initial_domain());
		memmap.nr_entries = 1;
		map[0].addr = 0ULL;
		map[0].size = mem_end;
		/* 8MB slack (to balance backend allocations). */
		map[0].size += 8ULL << 20;
		map[0].type = E820_RAM;
		rc = 0;
	}
	BUG_ON(rc);

	/* Make sure the Xen-supplied memory map is well-ordered. */
	sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);

	max_pages = xen_get_max_pages();
	if (max_pages > max_pfn)
		extra_pages += max_pages - max_pfn;

	/*
	 * Set P2M for all non-RAM pages and E820 gaps to be identity
	 * type PFNs.  Any RAM pages that would be made inaccesible by
	 * this are first released.
	 */
	xen_released_pages = xen_set_identity_and_release(
		map, memmap.nr_entries, max_pfn);
	extra_pages += xen_released_pages;

	/*
	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
	 * factor the base size.  On non-highmem systems, the base
	 * size is the full initial memory allocation; on highmem it
	 * is limited to the max size of lowmem, so that it doesn't
	 * get completely filled.
	 *
	 * In principle there could be a problem in lowmem systems if
	 * the initial memory is also very large with respect to
	 * lowmem, but we won't try to deal with that here.
	 */
	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
			  extra_pages);

	i = 0;
	while (i < memmap.nr_entries) {
		u64 addr = map[i].addr;
		u64 size = map[i].size;
		u32 type = map[i].type;

		if (type == E820_RAM) {
			if (addr < mem_end) {
				size = min(size, mem_end - addr);
			} else if (extra_pages) {
				size = min(size, (u64)extra_pages * PAGE_SIZE);
				extra_pages -= size / PAGE_SIZE;
				xen_add_extra_mem(addr, size);
			} else
				type = E820_UNUSABLE;
		}

		xen_align_and_add_e820_region(addr, size, type);

		map[i].addr += size;
		map[i].size -= size;
		if (map[i].size == 0)
			i++;
	}

	/*
	 * In domU, the ISA region is normal, usable memory, but we
	 * reserve ISA memory anyway because too many things poke
	 * about in there.
	 */
	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
			E820_RESERVED);

	/*
	 * Reserve Xen bits:
	 *  - mfn_list
	 *  - xen_start_info
	 * See comment above "struct start_info" in <xen/interface/xen.h>
	 */
	memblock_reserve(__pa(xen_start_info->mfn_list),
			 xen_start_info->pt_base - xen_start_info->mfn_list);

	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

	return "Xen";
}
예제 #28
0
static int
privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
	      int mode, struct thread *td)
{
	int error, i;

	switch (cmd) {
	case IOCTL_PRIVCMD_HYPERCALL: {
		struct ioctl_privcmd_hypercall *hcall;

		hcall = (struct ioctl_privcmd_hypercall *)arg;
#ifdef __amd64__
		/*
		 * The hypervisor page table walker will refuse to access
		 * user-space pages if SMAP is enabled, so temporary disable it
		 * while performing the hypercall.
		 */
		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
			stac();
#endif
		error = privcmd_hypercall(hcall->op, hcall->arg[0],
		    hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
#ifdef __amd64__
		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
			clac();
#endif
		if (error >= 0) {
			hcall->retval = error;
			error = 0;
		} else {
			error = xen_translate_error(error);
			hcall->retval = 0;
		}
		break;
	}
	case IOCTL_PRIVCMD_MMAPBATCH: {
		struct ioctl_privcmd_mmapbatch *mmap;
		vm_map_t map;
		vm_map_entry_t entry;
		vm_object_t mem;
		vm_pindex_t pindex;
		vm_prot_t prot;
		boolean_t wired;
		struct xen_add_to_physmap_range add;
		xen_ulong_t *idxs;
		xen_pfn_t *gpfns;
		int *errs, index;
		struct privcmd_map *umap;
		uint16_t num;

		mmap = (struct ioctl_privcmd_mmapbatch *)arg;

		if ((mmap->num == 0) ||
		    ((mmap->addr & PAGE_MASK) != 0)) {
			error = EINVAL;
			break;
		}

		map = &td->td_proc->p_vmspace->vm_map;
		error = vm_map_lookup(&map, mmap->addr, VM_PROT_NONE, &entry,
		    &mem, &pindex, &prot, &wired);
		if (error != KERN_SUCCESS) {
			error = EINVAL;
			break;
		}
		if ((entry->start != mmap->addr) ||
		    (entry->end != mmap->addr + (mmap->num * PAGE_SIZE))) {
			vm_map_lookup_done(map, entry);
			error = EINVAL;
			break;
		}
		vm_map_lookup_done(map, entry);
		if ((mem->type != OBJT_MGTDEVICE) ||
		    (mem->un_pager.devp.ops != &privcmd_pg_ops)) {
			error = EINVAL;
			break;
		}
		umap = mem->handle;

		add.domid = DOMID_SELF;
		add.space = XENMAPSPACE_gmfn_foreign;
		add.foreign_domid = mmap->dom;

		/*
		 * The 'size' field in the xen_add_to_physmap_range only
		 * allows for UINT16_MAX mappings in a single hypercall.
		 */
		num = MIN(mmap->num, UINT16_MAX);

		idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
		gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
		errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);

		set_xen_guest_handle(add.idxs, idxs);
		set_xen_guest_handle(add.gpfns, gpfns);
		set_xen_guest_handle(add.errs, errs);

		/* Allocate a bitset to store broken page mappings. */
		umap->err = BITSET_ALLOC(mmap->num, M_PRIVCMD,
		    M_WAITOK | M_ZERO);

		for (index = 0; index < mmap->num; index += num) {
			num = MIN(mmap->num - index, UINT16_MAX);
			add.size = num;

			error = copyin(&mmap->arr[index], idxs,
			    sizeof(idxs[0]) * num);
			if (error != 0)
				goto mmap_out;

			for (i = 0; i < num; i++)
				gpfns[i] = atop(umap->phys_base_addr +
				    (i + index) * PAGE_SIZE);

			bzero(errs, sizeof(*errs) * num);

			error = HYPERVISOR_memory_op(
			    XENMEM_add_to_physmap_range, &add);
			if (error != 0) {
				error = xen_translate_error(error);
				goto mmap_out;
			}

			for (i = 0; i < num; i++) {
				if (errs[i] != 0) {
					errs[i] = xen_translate_error(errs[i]);

					/* Mark the page as invalid. */
					BIT_SET(mmap->num, index + i,
					    umap->err);
				}
			}

			error = copyout(errs, &mmap->err[index],
			    sizeof(errs[0]) * num);
			if (error != 0)
				goto mmap_out;
		}

		umap->mapped = true;

mmap_out:
		free(idxs, M_PRIVCMD);
		free(gpfns, M_PRIVCMD);
		free(errs, M_PRIVCMD);
		if (!umap->mapped)
			free(umap->err, M_PRIVCMD);

		break;
	}

	default:
		error = ENOSYS;
		break;
	}

	return (error);
}