int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) { int i, ret = 0; pte_t *pte; if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; if (kmap_ops) { ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, kmap_ops, count); if (ret) goto out; } for (i = 0; i < count; i++) { unsigned long mfn, pfn; /* Do not add to override if the map failed. */ if (map_ops[i].status) continue; if (map_ops[i].flags & GNTMAP_contains_pte) { pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + (map_ops[i].host_addr & ~PAGE_MASK)); mfn = pte_mfn(*pte); } else { mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } pfn = page_to_pfn(pages[i]); WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { ret = -ENOMEM; goto out; } } out: return ret; }
int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) { int i, ret = 0; bool lazy = false; if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; if (kmap_ops && !in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); lazy = true; } for (i = 0; i < count; i++) { unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsigned long pfn = page_to_pfn(pages[i]); if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { ret = -EINVAL; goto out; } set_page_private(pages[i], INVALID_P2M_ENTRY); WARN_ON(!PagePrivate(pages[i])); ClearPagePrivate(pages[i]); set_phys_to_machine(pfn, pages[i]->index); if (kmap_ops) ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); if (ret) goto out; } out: if (lazy) arch_leave_lazy_mmu_mode(); return ret; }
/* * Must not be called with IRQs off. This should only be used on the * slow path. * * Copy a foreign granted page to local memory. */ int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep) { struct gnttab_unmap_and_replace unmap; mmu_update_t mmu; struct page *page; struct page *new_page; void *new_addr; void *addr; paddr_t pfn; maddr_t mfn; maddr_t new_mfn; int err; page = *pagep; if (!get_page_unless_zero(page)) return -ENOENT; err = -ENOMEM; new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); if (!new_page) goto out; new_addr = page_address(new_page); addr = page_address(page); copy_page(new_addr, addr); pfn = page_to_pfn(page); mfn = pfn_to_mfn(pfn); new_mfn = virt_to_mfn(new_addr); write_seqlock_bh(&gnttab_dma_lock); /* Make seq visible before checking page_mapped. */ smp_mb(); /* Has the page been DMA-mapped? */ if (unlikely(page_mapped(page))) { write_sequnlock_bh(&gnttab_dma_lock); put_page(new_page); err = -EBUSY; goto out; } if (!xen_feature(XENFEAT_auto_translated_physmap)) set_phys_to_machine(pfn, new_mfn); gnttab_set_replace_op(&unmap, (unsigned long)addr, (unsigned long)new_addr, ref); err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, &unmap, 1); BUG_ON(err); BUG_ON(unmap.status != GNTST_okay); write_sequnlock_bh(&gnttab_dma_lock); if (!xen_feature(XENFEAT_auto_translated_physmap)) { set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY); mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu.val = pfn; err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF); BUG_ON(err); } new_page->mapping = page->mapping; new_page->index = page->index; set_bit(PG_foreign, &new_page->flags); if (PageReserved(page)) SetPageReserved(new_page); *pagep = new_page; SetPageForeign(page, gnttab_page_free); page->mapping = NULL; out: put_page(page); return err; }
static int increase_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); balloon_lock(flags); page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); frame_list[i] = page_to_pfn(page);; page = balloon_next_page(page); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { int ret; /* We hit the Xen hard limit: reprobe. */ set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != rc); hard_limit = current_pages + rc - driver_pages; goto out; } for (i = 0; i < nr_pages; i++) { page = balloon_retrieve(); BUG_ON(page == NULL); pfn = page_to_pfn(page); BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); /* Update P->M and M->P tables. */ set_phys_to_machine(pfn, frame_list[i]); #ifdef CONFIG_XEN xen_machphys_update(frame_list[i], pfn); /* Link back into the page tables if not highmem. */ if (pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte_ma(frame_list[i], PAGE_KERNEL), 0); BUG_ON(ret); } #endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); set_page_count(page, 1); __free_page(page); } current_pages += nr_pages; totalram_pages = current_pages; out: balloon_unlock(flags); return 0; } static int decrease_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; void *v; int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { if ((page = alloc_page(GFP_BALLOON)) == NULL) { nr_pages = i; need_sleep = 1; break; } pfn = page_to_pfn(page); frame_list[i] = pfn_to_mfn(pfn); if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); scrub_pages(v, 1); #ifdef CONFIG_XEN ret = HYPERVISOR_update_va_mapping( (unsigned long)v, __pte_ma(0), 0); BUG_ON(ret); #endif } #ifdef CONFIG_XEN_SCRUB_PAGES else { v = kmap(page); scrub_pages(v, 1); kunmap(page); } #endif } #ifdef CONFIG_XEN /* Ensure that ballooned highmem pages don't have kmaps. */ kmap_flush_unused(); flush_tlb_all(); #endif balloon_lock(flags); /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(pfn_to_page(pfn)); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); current_pages -= nr_pages; totalram_pages = current_pages; balloon_unlock(flags); return need_sleep; } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(void *unused) { int need_sleep = 0; long credit; down(&balloon_mutex); do { credit = current_target() - current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); #ifndef CONFIG_PREEMPT if (need_resched()) schedule(); #endif } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != current_pages) mod_timer(&balloon_timer, jiffies + HZ); up(&balloon_mutex); }
static unsigned long __init xen_release_chunk(phys_addr_t start_addr, phys_addr_t end_addr) { struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; unsigned long start, end; unsigned long len = 0; unsigned long pfn; int ret; start = PFN_UP(start_addr); end = PFN_DOWN(end_addr); if (end <= start) return 0; printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", start, end); for(pfn = start; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); /* Make sure pfn exists to start with */ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) continue; set_xen_guest_handle(reservation.extent_start, &mfn); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", start, end, ret); if (ret == 1) { set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } printk(KERN_CONT "%ld pages freed\n", len); return len; } static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, const struct e820map *e820) { phys_addr_t max_addr = PFN_PHYS(max_pfn); phys_addr_t last_end = 0; unsigned long released = 0; int i; for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { phys_addr_t end = e820->map[i].addr; end = min(max_addr, end); released += xen_release_chunk(last_end, end); last_end = e820->map[i].addr + e820->map[i].size; } if (last_end < max_addr) released += xen_release_chunk(last_end, max_addr); printk(KERN_INFO "released %ld pages of unused memory\n", released); return released; }
/* * Helper function to update the p2m and m2p tables and kernel mapping. */ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) { struct mmu_update update = { .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, .val = pfn }; /* Update p2m */ if (!set_phys_to_machine(pfn, mfn)) { WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", pfn, mfn); BUG(); } /* Update m2p */ if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", mfn, pfn); BUG(); } /* Update kernel mapping, but not for highmem. */ if (pfn >= PFN_UP(__pa(high_memory - 1))) return; if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(mfn, PAGE_KERNEL), 0)) { WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", mfn, pfn); BUG(); } } /* * This function updates the p2m and m2p tables with an identity map from * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the * original allocation at remap_pfn. The information needed for remapping is * saved in the memory itself to avoid the need for allocating buffers. The * complete remap information is contained in a list of MFNs each containing * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. * This enables us to preserve the original mfn sequence while doing the * remapping at a time when the memory management is capable of allocating * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and * its callers. */ static void __init xen_do_set_identity_and_remap_chunk( unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) { unsigned long buf = (unsigned long)&xen_remap_buf; unsigned long mfn_save, mfn; unsigned long ident_pfn_iter, remap_pfn_iter; unsigned long ident_end_pfn = start_pfn + size; unsigned long left = size; unsigned int i, chunk; WARN_ON(size == 0); BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); mfn_save = virt_to_mfn(buf); for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; ident_pfn_iter < ident_end_pfn; ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; /* Map first pfn to xen_remap_buf */ mfn = pfn_to_mfn(ident_pfn_iter); set_pte_mfn(buf, mfn, PAGE_KERNEL); /* Save mapping information in page */ xen_remap_buf.next_area_mfn = xen_remap_mfn; xen_remap_buf.target_pfn = remap_pfn_iter; xen_remap_buf.size = chunk; for (i = 0; i < chunk; i++) xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); /* Put remap buf into list. */ xen_remap_mfn = mfn; /* Set identity map */ set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk); left -= chunk; } /* Restore old xen_remap_buf mapping */ set_pte_mfn(buf, mfn_save, PAGE_KERNEL); }
/* Set up the grant operations for this fragment. If it's a flipping interface, we also set up the unmap request from here. */ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, int i, struct netrx_pending_operations *npo, struct page *page, unsigned long size, unsigned long offset) { mmu_update_t *mmu; gnttab_transfer_t *gop; gnttab_copy_t *copy_gop; multicall_entry_t *mcl; netif_rx_request_t *req; unsigned long old_mfn, new_mfn; old_mfn = virt_to_mfn(page_address(page)); req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); if (netif->copying_receiver) { /* The fragment needs to be copied rather than flipped. */ meta->copy = 1; copy_gop = npo->copy + npo->copy_prod++; copy_gop->flags = GNTCOPY_dest_gref; if (PageForeign(page)) { struct pending_tx_info *src_pend = &pending_tx_info[page->index]; copy_gop->source.domid = src_pend->netif->domid; copy_gop->source.u.ref = src_pend->req.gref; copy_gop->flags |= GNTCOPY_source_gref; } else { copy_gop->source.domid = DOMID_SELF; copy_gop->source.u.gmfn = old_mfn; } copy_gop->source.offset = offset; copy_gop->dest.domid = netif->domid; copy_gop->dest.offset = 0; copy_gop->dest.u.ref = req->gref; copy_gop->len = size; } else { meta->copy = 0; if (!xen_feature(XENFEAT_auto_translated_physmap)) { new_mfn = alloc_mfn(); /* * Set the new P2M table entry before * reassigning the old data page. Heed the * comment in pgtable-2level.h:pte_page(). :-) */ set_phys_to_machine(page_to_pfn(page), new_mfn); mcl = npo->mcl + npo->mcl_prod++; MULTI_update_va_mapping(mcl, (unsigned long)page_address(page), pfn_pte_ma(new_mfn, PAGE_KERNEL), 0); mmu = npo->mmu + npo->mmu_prod++; mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = page_to_pfn(page); } gop = npo->trans + npo->trans_prod++; gop->mfn = old_mfn; gop->domid = netif->domid; gop->ref = req->gref; } return req->id; }
int m2p_remove_override(struct page *page, bool clear_pte) { unsigned long flags; unsigned long mfn; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) return -EINVAL; if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_remove_override: pfn %lx not mapped", pfn)) return -EINVAL; } spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); WARN_ON(!PagePrivate(page)); ClearPagePrivate(page); if (clear_pte) { struct gnttab_map_grant_ref *map_op = (struct gnttab_map_grant_ref *) page->index; set_phys_to_machine(pfn, map_op->dev_bus_addr); if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_grant_ref *unmap_op; /* * It might be that we queued all the m2p grant table * hypercalls in a multicall, then m2p_remove_override * get called before the multicall has actually been * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ if (map_op->handle == -1) xen_mc_flush(); /* * Now if map_op->handle is negative it means that the * hypercall actually returned an error. */ if (map_op->handle == GNTST_general_error) { printk(KERN_WARNING "m2p_remove_override: " "pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); return -1; } mcs = xen_mc_entry( sizeof(struct gnttab_unmap_grant_ref)); unmap_op = mcs.args; unmap_op->host_addr = map_op->host_addr; unmap_op->handle = map_op->handle; unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, GNTTABOP_unmap_grant_ref, unmap_op, 1); xen_mc_issue(PARAVIRT_LAZY_MMU); set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); __flush_tlb_single(address); map_op->host_addr = 0; } } else set_phys_to_machine(pfn, page->index); /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the * m2p_override (see comment above in m2p_add_override). * If there are no other entries in the m2p_override corresponding * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for * the original pfn (the one shared by the frontend): the backend * cannot do any IO on this page anymore because it has been * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of * the original pfn causes mfn_to_pfn(mfn) to return the frontend * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); return 0; }
/* Add an MFN override for a particular page */ int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } WARN_ON(PagePrivate(page)); SetPagePrivate(page); set_page_private(page, mfn); page->index = pfn_to_mfn(pfn); if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) return -ENOMEM; if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs = xen_mc_entry(sizeof(*kmap_op)); MULTI_grant_table_op(mcs.mc, GNTTABOP_map_grant_ref, kmap_op, 1); xen_mc_issue(PARAVIRT_LAZY_MMU); } /* let's use dev_bus_addr to record the old mfn instead */ kmap_op->dev_bus_addr = page->index; page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other * pfn so that the following mfn_to_pfn(mfn) calls will return the * pfn from the m2p_override (the backend pfn) instead. * We need to do this because the pages shared by the frontend * (xen-blkfront) can be already locked (lock_page, called by * do_read_cache_page); when the userspace backend tries to use them * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so * do_blockdev_direct_IO is going to try to lock the same pages * again resulting in a deadlock. * As a side effect get_user_pages_fast might not be safe on the * frontend pages while they are being shared with the backend, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); if (ret == 0 && get_phys_to_machine(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; }
void __init adjust_boot_vcpu_info(void) { unsigned long lpfn, rpfn, lmfn, rmfn; pte_t *lpte, *rpte; unsigned int level; mmu_update_t mmu[2]; /* * setup_vcpu_info() cannot be used more than once for a given (v)CPU, * hence we must swap the underlying MFNs of the two pages holding old * and new vcpu_info of the boot CPU. * * Do *not* use __get_cpu_var() or percpu_{write,...}() here, as the per- * CPU segment didn't get reloaded yet. Using percpu_read(), as in * arch_use_lazy_mmu_mode(), though undesirable, is safe except for the * accesses to variables that were updated in setup_percpu_areas(). */ lpte = lookup_address((unsigned long)&per_cpu_var(vcpu_info) + (__per_cpu_load - __per_cpu_start), &level); rpte = lookup_address((unsigned long)&per_cpu(vcpu_info, 0), &level); BUG_ON(!lpte || !(pte_flags(*lpte) & _PAGE_PRESENT)); BUG_ON(!rpte || !(pte_flags(*rpte) & _PAGE_PRESENT)); lmfn = __pte_mfn(*lpte); rmfn = __pte_mfn(*rpte); if (lmfn == rmfn) return; lpfn = mfn_to_local_pfn(lmfn); rpfn = mfn_to_local_pfn(rmfn); printk(KERN_INFO "Swapping MFNs for PFN %lx and %lx (MFN %lx and %lx)\n", lpfn, rpfn, lmfn, rmfn); xen_l1_entry_update(lpte, pfn_pte_ma(rmfn, pte_pgprot(*lpte))); xen_l1_entry_update(rpte, pfn_pte_ma(lmfn, pte_pgprot(*rpte))); #ifdef CONFIG_X86_64 if (HYPERVISOR_update_va_mapping((unsigned long)__va(lpfn<<PAGE_SHIFT), pfn_pte_ma(rmfn, PAGE_KERNEL_RO), 0)) BUG(); #endif if (HYPERVISOR_update_va_mapping((unsigned long)__va(rpfn<<PAGE_SHIFT), pfn_pte_ma(lmfn, PAGE_KERNEL), UVMF_TLB_FLUSH)) BUG(); set_phys_to_machine(lpfn, rmfn); set_phys_to_machine(rpfn, lmfn); mmu[0].ptr = ((uint64_t)lmfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu[0].val = rpfn; mmu[1].ptr = ((uint64_t)rmfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu[1].val = lpfn; if (HYPERVISOR_mmu_update(mmu, 2, NULL, DOMID_SELF)) BUG(); /* * Copy over all contents of the page just replaced, except for the * vcpu_info itself, as it may have got updated after having been * copied from __per_cpu_load[]. */ memcpy(__va(rpfn << PAGE_SHIFT), __va(lpfn << PAGE_SHIFT), (unsigned long)&per_cpu_var(vcpu_info) & (PAGE_SIZE - 1)); level = (unsigned long)(&per_cpu_var(vcpu_info) + 1) & (PAGE_SIZE - 1); if (level) memcpy(__va(rpfn << PAGE_SHIFT) + level, __va(lpfn << PAGE_SHIFT) + level, PAGE_SIZE - level); }
static int increase_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); spin_lock_irqsave(&balloon_lock, flags); page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); frame_list[i] = page_to_pfn(page); page = balloon_next_page(page); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc < 0) goto out; for (i = 0; i < rc; i++) { page = balloon_retrieve(); BUG_ON(page == NULL); pfn = page_to_pfn(page); BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ #ifdef CONFIG_PVM if (!xen_hvm_domain() && pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(frame_list[i], PAGE_KERNEL), 0); BUG_ON(ret); } #endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); init_page_count(page); __free_page(page); } balloon_stats.current_pages += rc; if (old_totalram_pages + rc < totalram_pages) { printk(KERN_INFO "old_totalram=%luKB, totalram_pages=%luKB\n", old_totalram_pages*4, totalram_pages*4); balloon_stats.current_pages = totalram_pages + totalram_bias; printk(KERN_INFO "when ballooning, the mem online! totalram=%luKB, current=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4); } old_totalram_pages = totalram_pages; out: spin_unlock_irqrestore(&balloon_lock, flags); return rc < 0 ? rc : rc != nr_pages; } static int decrease_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { if ((page = alloc_page(GFP_BALLOON)) == NULL) { nr_pages = i; need_sleep = 1; break; } pfn = page_to_pfn(page); frame_list[i] = pfn_to_mfn(pfn); scrub_page(page); if (!xen_hvm_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); } } /* Ensure that ballooned highmem pages don't have kmaps. */ #ifdef CONFIG_PVM kmap_flush_unused(); flush_tlb_all(); #endif spin_lock_irqsave(&balloon_lock, flags); /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(pfn_to_page(pfn)); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); balloon_stats.current_pages -= nr_pages; if(old_totalram_pages < totalram_pages + nr_pages) { printk(KERN_INFO "old_totalram=%luKB, totalram_pages=%luKB\n", old_totalram_pages*4, totalram_pages*4); balloon_stats.current_pages = totalram_pages + totalram_bias; printk(KERN_INFO "when ballooning, the mem online! totalram=%luKB, current=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4); } old_totalram_pages = totalram_pages; spin_unlock_irqrestore(&balloon_lock, flags); return need_sleep; } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(struct work_struct *work) { int need_sleep = 0; long credit; long total_increase = 0; char buffer[16]; mutex_lock(&balloon_mutex); printk(KERN_INFO "totalram_pages=%luKB, current_pages=%luKB,totalram_bias=%luKB\n", totalram_pages*4, balloon_stats.current_pages*4, totalram_bias*4); if (totalram_pages > old_totalram_pages) { //TODO:Just know that totalram_pages will increase. total_increase = (totalram_pages - old_totalram_pages) % GB2PAGE; if (totalram_bias > total_increase ) { totalram_bias = totalram_bias - total_increase; } balloon_stats.current_pages = totalram_pages + totalram_bias; old_totalram_pages = totalram_pages; } printk(KERN_INFO "totalram_pages=%luKB, current_pages=%luKB, totalram_bias=%luKB,total_increase=%ld\n", totalram_pages*4, balloon_stats.current_pages*4, totalram_bias*4, total_increase*4); xenbus_write(XBT_NIL, "control/uvp", "Balloon_flag", "1"); do { credit = current_target() - balloon_stats.current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); #ifndef CONFIG_PREEMPT if (need_resched()) schedule(); #endif } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != balloon_stats.current_pages) { mod_timer(&balloon_timer, jiffies + HZ); sprintf(buffer,"%lu",balloon_stats.current_pages<<(PAGE_SHIFT-10)); xenbus_write(XBT_NIL, "memory", "target", buffer); } xenbus_write(XBT_NIL, "control/uvp", "Balloon_flag", "0"); mutex_unlock(&balloon_mutex); }
static int m2p_remove_override(struct page *page, struct gnttab_map_grant_ref *kmap_op, unsigned long mfn) { unsigned long flags; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; pfn = page_to_pfn(page); if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_remove_override: pfn %lx not mapped", pfn)) return -EINVAL; } spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_and_replace *unmap_op; struct page *scratch_page = get_balloon_scratch_page(); unsigned long scratch_page_address = (unsigned long) __va(page_to_pfn(scratch_page) << PAGE_SHIFT); /* * It might be that we queued all the m2p grant table * hypercalls in a multicall, then m2p_remove_override * get called before the multicall has actually been * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ if (kmap_op->handle == -1) xen_mc_flush(); /* * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ if (kmap_op->handle == GNTST_general_error) { pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); put_balloon_scratch_page(); return -1; } xen_mc_batch(); mcs = __xen_mc_entry( sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; unmap_op->host_addr = kmap_op->host_addr; unmap_op->new_addr = scratch_page_address; unmap_op->handle = kmap_op->handle; MULTI_grant_table_op(mcs.mc, GNTTABOP_unmap_and_replace, unmap_op, 1); mcs = __xen_mc_entry(0); MULTI_update_va_mapping(mcs.mc, scratch_page_address, pfn_pte(page_to_pfn(scratch_page), PAGE_KERNEL_RO), 0); xen_mc_issue(PARAVIRT_LAZY_MMU); kmap_op->host_addr = 0; put_balloon_scratch_page(); } } /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the * m2p_override (see comment above in m2p_add_override). * If there are no other entries in the m2p_override corresponding * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for * the original pfn (the one shared by the frontend): the backend * cannot do any IO on this page anymore because it has been * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of * the original pfn causes mfn_to_pfn(mfn) to return the frontend * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; pfn = mfn_to_pfn_no_overrides(mfn); if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); return 0; }