int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *map, unsigned int count) { unsigned int i; int rc = 0; for (i = 0; i < count && rc == 0; ++i, ++map) { pte_t pte; if (!(map->flags & GNTMAP_host_map) || !(map->flags & GNTMAP_application_map)) continue; #ifdef CONFIG_X86 pte = __pte_ma((map->dev_bus_addr | _PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX | _PAGE_SPECIAL) & __supported_pte_mask); #else #error Architecture not yet supported. #endif if (!(map->flags & GNTMAP_readonly)) pte = pte_mkwrite(pte); if (map->flags & GNTMAP_contains_pte) { mmu_update_t u; u.ptr = map->host_addr; u.val = __pte_val(pte); rc = HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); } else rc = HYPERVISOR_update_va_mapping(map->host_addr, pte, 0); } return rc; }
static int __init #else int __devinit #endif gnttab_init(void) { int i; unsigned int max_nr_glist_frames, nr_glist_frames; unsigned int nr_init_grefs; if (!is_running_on_xen()) return -ENODEV; nr_grant_frames = 1; boot_max_nr_grant_frames = __max_nr_grant_frames(); /* Determine the maximum number of frames required for the * grant reference free list on the current hypervisor. */ max_nr_glist_frames = nr_freelist_frames(boot_max_nr_grant_frames); gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), GFP_KERNEL); if (gnttab_list == NULL) return -ENOMEM; nr_glist_frames = nr_freelist_frames(nr_grant_frames); for (i = 0; i < nr_glist_frames; i++) { gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); if (gnttab_list[i] == NULL) goto ini_nomem; } if (gnttab_resume() < 0) return -ENODEV; nr_init_grefs = nr_grant_frames * ENTRIES_PER_GRANT_FRAME; for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; #if defined(CONFIG_XEN) && defined(__HAVE_ARCH_PTE_SPECIAL) if (!xen_feature(XENFEAT_auto_translated_physmap) && xen_feature(XENFEAT_gnttab_map_avail_bits)) { #ifdef CONFIG_X86 GNTMAP_pte_special = (__pte_val(pte_mkspecial(__pte_ma(0))) >> _PAGE_BIT_UNUSED1) << _GNTMAP_guest_avail0; #else #error Architecture not yet supported. #endif }
void xen_arch_pre_suspend(void) { xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->console.domU.mfn = mfn_to_pfn(xen_start_info->console.domU.mfn); BUG_ON(!irqs_disabled()); HYPERVISOR_shared_info = &xen_dummy_shared_info; if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), __pte_ma(0), 0)) BUG(); }
static int dealloc_pte_fn( pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { unsigned long mfn = pte_mfn(*pte); int ret; struct xen_memory_reservation reservation = { .nr_extents = 1, .extent_order = 0, .domid = DOMID_SELF }; set_xen_guest_handle(reservation.extent_start, &mfn); set_pte_at(&init_mm, addr, pte, __pte_ma(0)); set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != 1); return 0; } #endif struct page **alloc_empty_pages_and_pagevec(int nr_pages) { unsigned long vaddr, flags; struct page *page, **pagevec; int i, ret; pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); if (pagevec == NULL) return NULL; for (i = 0; i < nr_pages; i++) { page = pagevec[i] = alloc_page(GFP_KERNEL); if (page == NULL) goto err; vaddr = (unsigned long)page_address(page); scrub_pages(vaddr, 1); balloon_lock(flags); if (xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long gmfn = page_to_pfn(page); struct xen_memory_reservation reservation = { .nr_extents = 1, .extent_order = 0, .domid = DOMID_SELF }; set_xen_guest_handle(reservation.extent_start, &gmfn); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); if (ret == 1) ret = 0; /* success */ } else { #ifdef CONFIG_XEN ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, dealloc_pte_fn, NULL); #else /* Cannot handle non-auto translate mode. */ ret = 1; #endif } if (ret != 0) { balloon_unlock(flags); __free_page(page); goto err; } totalram_pages = --current_pages; balloon_unlock(flags); } out: schedule_work(&balloon_worker); #ifdef CONFIG_XEN flush_tlb_all(); #endif return pagevec; err: balloon_lock(flags); while (--i >= 0) balloon_append(pagevec[i]); balloon_unlock(flags); kfree(pagevec); pagevec = NULL; goto out; }
static int increase_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); balloon_lock(flags); page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); frame_list[i] = page_to_pfn(page);; page = balloon_next_page(page); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { int ret; /* We hit the Xen hard limit: reprobe. */ set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != rc); hard_limit = current_pages + rc - driver_pages; goto out; } for (i = 0; i < nr_pages; i++) { page = balloon_retrieve(); BUG_ON(page == NULL); pfn = page_to_pfn(page); BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); /* Update P->M and M->P tables. */ set_phys_to_machine(pfn, frame_list[i]); #ifdef CONFIG_XEN xen_machphys_update(frame_list[i], pfn); /* Link back into the page tables if not highmem. */ if (pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte_ma(frame_list[i], PAGE_KERNEL), 0); BUG_ON(ret); } #endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); set_page_count(page, 1); __free_page(page); } current_pages += nr_pages; totalram_pages = current_pages; out: balloon_unlock(flags); return 0; } static int decrease_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; void *v; int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { if ((page = alloc_page(GFP_BALLOON)) == NULL) { nr_pages = i; need_sleep = 1; break; } pfn = page_to_pfn(page); frame_list[i] = pfn_to_mfn(pfn); if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); scrub_pages(v, 1); #ifdef CONFIG_XEN ret = HYPERVISOR_update_va_mapping( (unsigned long)v, __pte_ma(0), 0); BUG_ON(ret); #endif } #ifdef CONFIG_XEN_SCRUB_PAGES else { v = kmap(page); scrub_pages(v, 1); kunmap(page); } #endif } #ifdef CONFIG_XEN /* Ensure that ballooned highmem pages don't have kmaps. */ kmap_flush_unused(); flush_tlb_all(); #endif balloon_lock(flags); /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(pfn_to_page(pfn)); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); current_pages -= nr_pages; totalram_pages = current_pages; balloon_unlock(flags); return need_sleep; } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(void *unused) { int need_sleep = 0; long credit; down(&balloon_mutex); do { credit = current_target() - current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); #ifndef CONFIG_PREEMPT if (need_resched()) schedule(); #endif } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != current_pages) mod_timer(&balloon_timer, jiffies + HZ); up(&balloon_mutex); }
static unsigned long __init xen_do_chunk(unsigned long start, unsigned long end, bool release) { struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; unsigned long len = 0; int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); unsigned long pfn; int ret; for (pfn = start; pfn < end; pfn++) { unsigned long frame; unsigned long mfn = pfn_to_mfn(pfn); if (release) { /* Make sure pfn exists to start with */ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) continue; frame = mfn; } else { if (!xlated_phys && mfn != INVALID_P2M_ENTRY) continue; frame = pfn; } set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, &reservation); WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", release ? "release" : "populate", pfn, ret); if (ret == 1) { if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { if (release) break; set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); break; } len++; } else break; } if (len) printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", release ? "Freeing" : "Populating", start, end, len, release ? "freed" : "added"); return len; } static unsigned long __init xen_release_chunk(unsigned long start, unsigned long end) { /* * Xen already ballooned out the E820 non RAM regions for us * and set them up properly in EPT. */ if (xen_feature(XENFEAT_auto_translated_physmap)) return end - start; return xen_do_chunk(start, end, true); } static unsigned long __init xen_populate_chunk( const struct e820entry *list, size_t map_size, unsigned long max_pfn, unsigned long *last_pfn, unsigned long credits_left) { const struct e820entry *entry; unsigned int i; unsigned long done = 0; unsigned long dest_pfn; for (i = 0, entry = list; i < map_size; i++, entry++) { unsigned long s_pfn; unsigned long e_pfn; unsigned long pfns; long capacity; if (credits_left <= 0) break; if (entry->type != E820_RAM) continue; e_pfn = PFN_DOWN(entry->addr + entry->size); /* We only care about E820 after the xen_start_info->nr_pages */ if (e_pfn <= max_pfn) continue; s_pfn = PFN_UP(entry->addr); /* If the E820 falls within the nr_pages, we want to start * at the nr_pages PFN. * If that would mean going past the E820 entry, skip it */ if (s_pfn <= max_pfn) { capacity = e_pfn - max_pfn; dest_pfn = max_pfn; } else { capacity = e_pfn - s_pfn; dest_pfn = s_pfn; } if (credits_left < capacity) capacity = credits_left; pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); done += pfns; *last_pfn = (dest_pfn + pfns); if (pfns < capacity) break; credits_left -= pfns; } return done; } static void __init xen_set_identity_and_release_chunk( unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long *released, unsigned long *identity) { unsigned long pfn; /* * If the PFNs are currently mapped, clear the mappings * (except for the ISA region which must be 1:1 mapped) to * release the refcounts (in Xen) on the original frames. */ /* * PVH E820 matches the hypervisor's P2M which means we need to * account for the proper values of *release and *identity. */ for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) && pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { pte_t pte = __pte_ma(0); if (pfn < PFN_UP(ISA_END_ADDRESS)) pte = mfn_pte(pfn, PAGE_KERNEL_IO); (void)HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); } if (start_pfn < nr_pages) *released += xen_release_chunk( start_pfn, min(end_pfn, nr_pages)); *identity += set_phys_range_identity(start_pfn, end_pfn); }
void __init setup_arch(char **cmdline_p) { unsigned long kernel_end; #if defined(CONFIG_XEN_PRIVILEGED_GUEST) struct e820entry *machine_e820; struct xen_memory_map memmap; #endif #ifdef CONFIG_XEN /* Register a call for panic conditions. */ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); kernel_end = 0; /* dummy */ screen_info = SCREEN_INFO; if (xen_start_info->flags & SIF_INITDOMAIN) { /* This is drawn from a dump from vgacon:startup in * standard Linux. */ screen_info.orig_video_mode = 3; screen_info.orig_video_isVGA = 1; screen_info.orig_video_lines = 25; screen_info.orig_video_cols = 80; screen_info.orig_video_ega_bx = 3; screen_info.orig_video_points = 16; } else screen_info.orig_video_isVGA = 0; edid_info = EDID_INFO; saved_video_mode = SAVED_VIDEO_MODE; bootloader_type = LOADER_TYPE; #ifdef CONFIG_BLK_DEV_RAM rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); #endif setup_xen_features(); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); ARCH_SETUP #else ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); screen_info = SCREEN_INFO; edid_info = EDID_INFO; saved_video_mode = SAVED_VIDEO_MODE; bootloader_type = LOADER_TYPE; #ifdef CONFIG_BLK_DEV_RAM rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); #endif #endif /* !CONFIG_XEN */ setup_memory_region(); copy_edd(); if (!MOUNT_ROOT_RDONLY) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; #ifndef CONFIG_XEN code_resource.start = virt_to_phys(&_text); code_resource.end = virt_to_phys(&_etext)-1; data_resource.start = virt_to_phys(&_etext); data_resource.end = virt_to_phys(&_edata)-1; #endif parse_cmdline_early(cmdline_p); early_identify_cpu(&boot_cpu_data); /* * partially used pages are not usable - thus * we are rounding upwards: */ end_pfn = e820_end_of_ram(); num_physpages = end_pfn; /* for pfn_valid */ check_efer(); #ifndef CONFIG_XEN discover_ebda(); #endif init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. */ acpi_numa_init(); #endif #ifdef CONFIG_NUMA numa_initmem_init(0, end_pfn); #else contig_initmem_init(0, end_pfn); #endif /* Reserve direct mapping */ reserve_bootmem_generic(table_start << PAGE_SHIFT, (table_end - table_start) << PAGE_SHIFT); /* reserve kernel */ kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE); reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY); #ifdef CONFIG_XEN /* reserve physmap, start info and initial page tables */ reserve_bootmem(kernel_end, (table_start<<PAGE_SHIFT)-kernel_end); #else /* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ reserve_bootmem_generic(0, PAGE_SIZE); /* reserve ebda region */ if (ebda_addr) reserve_bootmem_generic(ebda_addr, ebda_size); #endif #ifdef CONFIG_SMP /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE); /* Reserve SMP trampoline */ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); #endif #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. */ acpi_reserve_bootmem(); #endif #ifdef CONFIG_XEN #ifdef CONFIG_BLK_DEV_INITRD if (xen_start_info->mod_start) { if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/ initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start+INITRD_SIZE; initrd_below_start_ok = 1; } else { printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", (unsigned long)(INITRD_START + INITRD_SIZE), (unsigned long)(end_pfn << PAGE_SHIFT)); initrd_start = 0; } } #endif #else /* CONFIG_XEN */ #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { reserve_bootmem_generic(INITRD_START, INITRD_SIZE); initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; initrd_end = initrd_start+INITRD_SIZE; } else { printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", (unsigned long)(INITRD_START + INITRD_SIZE), (unsigned long)(end_pfn << PAGE_SHIFT)); initrd_start = 0; } } #endif #endif /* !CONFIG_XEN */ #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) { reserve_bootmem(crashk_res.start, crashk_res.end - crashk_res.start + 1); } #endif paging_init(); #ifdef CONFIG_X86_LOCAL_APIC /* * Find and reserve possible boot-time SMP configuration: */ find_smp_config(); #endif #ifdef CONFIG_XEN { int i, j, k, fpp; unsigned long va; /* 'Initial mapping' of initrd must be destroyed. */ for (va = xen_start_info->mod_start; va < (xen_start_info->mod_start+xen_start_info->mod_len); va += PAGE_SIZE) { HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); } if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Make sure we have a large enough P->M table. */ phys_to_machine_mapping = alloc_bootmem( end_pfn * sizeof(unsigned long)); memset(phys_to_machine_mapping, ~0, end_pfn * sizeof(unsigned long)); memcpy(phys_to_machine_mapping, (unsigned long *)xen_start_info->mfn_list, xen_start_info->nr_pages * sizeof(unsigned long)); free_bootmem( __pa(xen_start_info->mfn_list), PFN_PHYS(PFN_UP(xen_start_info->nr_pages * sizeof(unsigned long)))); /* Destroyed 'initial mapping' of old p2m table. */ for (va = xen_start_info->mfn_list; va < (xen_start_info->mfn_list + (xen_start_info->nr_pages*sizeof(unsigned long))); va += PAGE_SIZE) { HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); } /* * Initialise the list of the frames that specify the * list of frames that make up the p2m table. Used by * save/restore. */ pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = virt_to_mfn(pfn_to_mfn_frame_list_list); fpp = PAGE_SIZE/sizeof(unsigned long); for (i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++) { if ((j % fpp) == 0) { k++; BUG_ON(k>=fpp); pfn_to_mfn_frame_list[k] = alloc_bootmem(PAGE_SIZE); pfn_to_mfn_frame_list_list[k] = virt_to_mfn(pfn_to_mfn_frame_list[k]); j=0; } pfn_to_mfn_frame_list[k][j] = virt_to_mfn(&phys_to_machine_mapping[i]); } HYPERVISOR_shared_info->arch.max_pfn = end_pfn; } } if (xen_start_info->flags & SIF_INITDOMAIN) dmi_scan_machine(); if ( ! (xen_start_info->flags & SIF_INITDOMAIN)) { acpi_disabled = 1; #ifdef CONFIG_ACPI acpi_ht = 0; #endif } #endif #ifndef CONFIG_XEN check_ioapic(); #endif zap_low_mappings(0); /* * set this early, so we dont allocate cpu0 * if MADT list doesnt list BSP first * mpparse.c/MP_processor_info() allocates logical cpu numbers. */ cpu_set(0, cpu_present_map); #ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). * Call this early for SRAT node setup. */ acpi_boot_table_init(); /* * Read APIC and some other early information from ACPI tables. */ acpi_boot_init(); #endif init_cpu_to_node(); #ifdef CONFIG_X86_LOCAL_APIC /* * get boot-time SMP configuration: */ if (smp_found_config) get_smp_config(); #ifndef CONFIG_XEN init_apic_mappings(); #endif #endif #if defined(CONFIG_XEN) && defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) prefill_possible_map(); #endif /* * Request address space for all standard RAM and ROM resources * and also for regions reported as reserved by the e820. */ #if defined(CONFIG_XEN_PRIVILEGED_GUEST) probe_roms(); if (xen_start_info->flags & SIF_INITDOMAIN) { machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); memmap.nr_entries = E820MAX; set_xen_guest_handle(memmap.buffer, machine_e820); BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); e820_reserve_resources(machine_e820, memmap.nr_entries); } #elif !defined(CONFIG_XEN) probe_roms(); e820_reserve_resources(e820.map, e820.nr_map); #endif request_resource(&iomem_resource, &video_ram_resource); { unsigned i; /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); } #if defined(CONFIG_XEN_PRIVILEGED_GUEST) if (xen_start_info->flags & SIF_INITDOMAIN) { e820_setup_gap(machine_e820, memmap.nr_entries); free_bootmem(__pa(machine_e820), PAGE_SIZE); } #elif !defined(CONFIG_XEN) e820_setup_gap(e820.map, e820.nr_map); #endif #ifdef CONFIG_GART_IOMMU iommu_hole_init(); #endif #ifdef CONFIG_XEN { struct physdev_set_iopl set_iopl; set_iopl.iopl = 1; HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); if (xen_start_info->flags & SIF_INITDOMAIN) { if (!(xen_start_info->flags & SIF_PRIVILEGED)) panic("Xen granted us console access " "but not privileged status"); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; #elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; #endif #endif } else { extern int console_use_vt; console_use_vt = 0; } } #else /* CONFIG_XEN */ #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; #elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; #endif #endif #endif /* !CONFIG_XEN */ }
static int increase_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); spin_lock_irqsave(&balloon_lock, flags); page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); frame_list[i] = page_to_pfn(page); page = balloon_next_page(page); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc < 0) goto out; for (i = 0; i < rc; i++) { page = balloon_retrieve(); BUG_ON(page == NULL); pfn = page_to_pfn(page); BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ #ifdef CONFIG_PVM if (!xen_hvm_domain() && pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(frame_list[i], PAGE_KERNEL), 0); BUG_ON(ret); } #endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); init_page_count(page); __free_page(page); } balloon_stats.current_pages += rc; if (old_totalram_pages + rc < totalram_pages) { printk(KERN_INFO "old_totalram=%luKB, totalram_pages=%luKB\n", old_totalram_pages*4, totalram_pages*4); balloon_stats.current_pages = totalram_pages + totalram_bias; printk(KERN_INFO "when ballooning, the mem online! totalram=%luKB, current=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4); } old_totalram_pages = totalram_pages; out: spin_unlock_irqrestore(&balloon_lock, flags); return rc < 0 ? rc : rc != nr_pages; } static int decrease_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { if ((page = alloc_page(GFP_BALLOON)) == NULL) { nr_pages = i; need_sleep = 1; break; } pfn = page_to_pfn(page); frame_list[i] = pfn_to_mfn(pfn); scrub_page(page); if (!xen_hvm_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); } } /* Ensure that ballooned highmem pages don't have kmaps. */ #ifdef CONFIG_PVM kmap_flush_unused(); flush_tlb_all(); #endif spin_lock_irqsave(&balloon_lock, flags); /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(pfn_to_page(pfn)); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); balloon_stats.current_pages -= nr_pages; if(old_totalram_pages < totalram_pages + nr_pages) { printk(KERN_INFO "old_totalram=%luKB, totalram_pages=%luKB\n", old_totalram_pages*4, totalram_pages*4); balloon_stats.current_pages = totalram_pages + totalram_bias; printk(KERN_INFO "when ballooning, the mem online! totalram=%luKB, current=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4); } old_totalram_pages = totalram_pages; spin_unlock_irqrestore(&balloon_lock, flags); return need_sleep; } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(struct work_struct *work) { int need_sleep = 0; long credit; long total_increase = 0; char buffer[16]; mutex_lock(&balloon_mutex); printk(KERN_INFO "totalram_pages=%luKB, current_pages=%luKB,totalram_bias=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4, totalram_bias*4); if (totalram_pages > old_totalram_pages) { //TODO:Just know that totalram_pages will increase. total_increase = (totalram_pages - old_totalram_pages) % GB2PAGE; if (totalram_bias > total_increase ) { totalram_bias = totalram_bias - total_increase; } balloon_stats.current_pages = totalram_pages + totalram_bias; old_totalram_pages = totalram_pages; } printk(KERN_INFO "totalram_pages=%luKB, current_pages=%luKB, totalram_bias=%luKB,total_increase=%ld\n", totalram_pages*4, balloon_stats.current_pages*4, totalram_bias*4, total_increase*4); xenbus_write(XBT_NIL, "control/uvp", "Balloon_flag", "1"); do { credit = current_target() - balloon_stats.current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); #ifndef CONFIG_PREEMPT if (need_resched()) schedule(); #endif } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != balloon_stats.current_pages) { mod_timer(&balloon_timer, jiffies + HZ); sprintf(buffer,"%lu",balloon_stats.current_pages<<(PAGE_SHIFT-10)); xenbus_write(XBT_NIL, "memory", "target", buffer); } xenbus_write(XBT_NIL, "control/uvp", "Balloon_flag", "0"); mutex_unlock(&balloon_mutex); }