static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; unsigned long frames[pages]; int f; /* * A GDT can be up to 64k in size, which corresponds to 8192 * 8-byte entries, or 16 4k pages.. */ BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { int level; pte_t *ptep; unsigned long pfn, mfn; void *virt; /* * The GDT is per-cpu and is in the percpu data area. * That can be virtually mapped, so we need to do a * page-walk to get the underlying MFN for the * hypercall. The page can also be in the kernel's * linear range, so we need to RO that mapping too. */ ptep = lookup_address(va, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); mfn = pfn_to_mfn(pfn); virt = __va(PFN_PHYS(pfn)); frames[f] = mfn; make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) BUG(); }
static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; unsigned long frames[pages]; int f; /* */ BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { int level; pte_t *ptep; unsigned long pfn, mfn; void *virt; /* */ ptep = lookup_address(va, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); mfn = pfn_to_mfn(pfn); virt = __va(PFN_PHYS(pfn)); frames[f] = mfn; make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) BUG(); }
void clear_highpage(struct page *page) { void *kaddr; if (likely(xen_feature(XENFEAT_highmem_assist)) && PageHighMem(page)) { struct mmuext_op meo; meo.cmd = MMUEXT_CLEAR_PAGE; meo.arg1.mfn = pfn_to_mfn(page_to_pfn(page)); if (HYPERVISOR_mmuext_op(&meo, 1, NULL, DOMID_SELF) == 0) return; } kaddr = kmap_atomic(page, KM_USER0); clear_page(kaddr); kunmap_atomic(kaddr, KM_USER0); }
int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) { int i, ret = 0; pte_t *pte; if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; if (kmap_ops) { ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, kmap_ops, count); if (ret) goto out; } for (i = 0; i < count; i++) { unsigned long mfn, pfn; /* Do not add to override if the map failed. */ if (map_ops[i].status) continue; if (map_ops[i].flags & GNTMAP_contains_pte) { pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + (map_ops[i].host_addr & ~PAGE_MASK)); mfn = pte_mfn(*pte); } else { mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } pfn = page_to_pfn(pages[i]); WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { ret = -ENOMEM; goto out; } } out: return ret; }
/* Release a pagetables pages back as normal RW */ static void xen_pgd_unpin(pgd_t *pgd) { struct mmuext_op *op; struct multicall_space mcs; xen_mc_batch(); mcs = __xen_mc_entry(sizeof(*op)); op = mcs.args; op->cmd = MMUEXT_UNPIN_TABLE; op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); pgd_walk(pgd, unpin_page, TASK_SIZE); xen_mc_issue(0); }
int kexec_allocate(struct xc_dom_image *dom, xen_vaddr_t up_to) { unsigned long new_allocated = (up_to - dom->parms.virt_base) / PAGE_SIZE; unsigned long i; pages = realloc(pages, new_allocated * sizeof(*pages)); pages_mfns = realloc(pages_mfns, new_allocated * sizeof(*pages_mfns)); pages_moved2pfns = realloc(pages_moved2pfns, new_allocated * sizeof(*pages_moved2pfns)); for (i = allocated; i < new_allocated; i++) { /* Exchange old page of PFN i with a newly allocated page. */ xen_pfn_t old_mfn = dom->p2m_host[i]; xen_pfn_t new_pfn; xen_pfn_t new_mfn; pages[i] = alloc_page(); memset((void*) pages[i], 0, PAGE_SIZE); new_pfn = PHYS_PFN(to_phys(pages[i])); pages_mfns[i] = new_mfn = pfn_to_mfn(new_pfn); /* * If PFN of newly allocated page (new_pfn) is less then currently * requested PFN (i) then look for relevant PFN/MFN pair. In this * situation dom->p2m_host[new_pfn] no longer contains proper MFN * because original page with new_pfn was moved earlier * to different location. */ for (; new_pfn < i; new_pfn = pages_moved2pfns[new_pfn]); /* Store destination PFN of currently requested page. */ pages_moved2pfns[i] = new_pfn; /* Put old page at new PFN */ dom->p2m_host[new_pfn] = old_mfn; /* Put new page at PFN i */ dom->p2m_host[i] = new_mfn; } allocated = new_allocated; return 0; }
int gnttable_init(void) { int dom0_id = 0; unsigned long vaddr; unsigned long mfn; vaddr = alloc_pages(0); mfn = pfn_to_mfn(virt_to_pfn(vaddr)); // printf("[gnttab_test_dom1] set grant table entry %d\n", GNTTAB_REF_NUM); // printf("[gnttab_test_dom1] vaddr = 0x%lx, mfn = 0x%lx\n", vaddr, mfn); gnttab_grant_foreign_access_ref(GNTTAB_REF_NUM, dom0_id, mfn, 1); shared_ring = (shared_ring_t *) vaddr; shared_ring->start = 0; shared_ring->end = 0; return 0; }
void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) { unsigned long start_address, end_address; unsigned long pfn_to_map, pt_pfn = *start_pfn; static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); unsigned long offset; int count = 0; pfn_to_map = (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES; if (*max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START)) { printk("WARNING: Mini-OS trying to use Xen virtual space. " "Truncating memory from %dMB to ", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); printk("%dMB\n", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); }
static void __init xen_add_extra_mem(u64 start, u64 size) { unsigned long pfn; int i; for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { /* Add new region. */ if (xen_extra_mem[i].size == 0) { xen_extra_mem[i].start = start; xen_extra_mem[i].size = size; break; } /* Append to existing region. */ if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { xen_extra_mem[i].size += size; break; } } if (i == XEN_EXTRA_MEM_MAX_REGIONS) printk(KERN_WARNING "Warning: not enough extra memory regions\n"); memblock_reserve(start, size); if (xen_feature(XENFEAT_auto_translated_physmap)) return; xen_max_p2m_pfn = PFN_DOWN(start + size); for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) continue; WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", pfn, mfn); __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } }
static unsigned long __init xen_release_chunk(unsigned long start, unsigned long end) { struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; unsigned long len = 0; unsigned long pfn; int ret; for(pfn = start; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); /* Make sure pfn exists to start with */ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) continue; set_xen_guest_handle(reservation.extent_start, &mfn); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); if (ret == 1) { __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", start, end, len); return len; } static unsigned long __init xen_set_identity_and_release( const struct e820entry *list, size_t map_size, unsigned long nr_pages) { phys_addr_t start = 0; unsigned long released = 0; unsigned long identity = 0; const struct e820entry *entry; int i; /* * Combine non-RAM regions and gaps until a RAM region (or the * end of the map) is reached, then set the 1:1 map and * release the pages (if available) in those non-RAM regions. * * The combined non-RAM regions are rounded to a whole number * of pages so any partial pages are accessible via the 1:1 * mapping. This is needed for some BIOSes that put (for * example) the DMI tables in a reserved region that begins on * a non-page boundary. */ for (i = 0, entry = list; i < map_size; i++, entry++) { phys_addr_t end = entry->addr + entry->size; if (entry->type == E820_RAM || i == map_size - 1) { unsigned long start_pfn = PFN_DOWN(start); unsigned long end_pfn = PFN_UP(end); if (entry->type == E820_RAM) end_pfn = PFN_UP(entry->addr); if (start_pfn < end_pfn) { if (start_pfn < nr_pages) released += xen_release_chunk( start_pfn, min(end_pfn, nr_pages)); identity += set_phys_range_identity( start_pfn, end_pfn); } start = end; } } printk(KERN_INFO "Released %lu pages of unused memory\n", released); printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); return released; }
void free_pmd_page(unsigned long addr) { struct ptrpmd *newstruct = NULL; struct ptrpmd *temp_head = NULL; int i = 0; int counter = 0; newstruct = (struct ptrpmd *)kmalloc(sizeof(struct ptrpmd), GFP_KERNEL); newstruct -> content = addr; spin_lock(&pmd_cache_lock); newstruct -> next = pmd_head; pmd_head = newstruct; temp_head = pmd_head; /*free node */ if(pmd_used_counter) pmd_used_counter--; pmd_free_counter++; if(pmd_used_counter) { //if((pmd_free_counter/pmd_used_counter>=3) && ((pmd_used_counter + pmd_free_counter) >= 1800)) //if((pmd_used_counter/pmd_free_counter < 8) && ((pmd_used_counter + pmd_free_counter) >= 600)) //if((pmd_used_counter/pmd_free_counter < 1) && (pmd_used_counter >= 42)) //if((pmd_free_counter/pmd_used_counter >= 4) && (pmd_used_counter >= 80)) //if((pmd_free_counter/pmd_used_counter >= 6) && ((pgd_used_counter + pgd_free_counter) >= 230)) //if((pmd_used_counter/pmd_free_counter < 2) && ((pgd_used_counter + pgd_free_counter) >= 80)) if((pmd_free_counter/pmd_used_counter > 1) && ((pmd_used_counter + pmd_free_counter) >= 40)) //if((pmd_free_counter/pmd_used_counter >= 5) && ((pmd_used_counter + pmd_free_counter) >= 200)) { //counter = pmd_free_counter * 3 / 10; counter = 0; for(i=0;i<counter;i++) { pmd_head = pmd_head->next; } pmd_free_counter -= counter; } } spin_unlock(&pmd_cache_lock); if(counter != 0) { struct ptrpmd * newstructarray = NULL; struct ptrpmd * newstructarray_head = NULL; int rc = 1; newstructarray = (struct ptrpmd *)kmalloc(sizeof(struct ptrpmd) * counter, GFP_KERNEL); newstructarray_head = newstructarray; for (i=0;i<counter;i++) { newstruct = temp_head; temp_head = temp_head->next; newstructarray[i].content = pfn_to_mfn(PFN_DOWN(__pa(newstruct->content))); kfree(newstruct); } //hypercall newstructarray rc = HYPERVISOR_pmd_op(newstructarray, counter); //if (rc == 0) //printk("pmd cache free success\n"); //else //printk("pmd cache free error\n"); //free page to the buddy system newstructarray = newstructarray_head; for(i=0;i<counter;i++) { free_page(newstructarray[i].content); } //free newstructarray kfree(newstructarray); } return; }
static int map_data_for_request(struct vscsifrnt_info *info, struct scsi_cmnd *sc, struct vscsiif_request *ring_req, struct vscsifrnt_shadow *shadow) { grant_ref_t gref_head; struct page *page; int err, ref, ref_cnt = 0; int grant_ro = (sc->sc_data_direction == DMA_TO_DEVICE); unsigned int i, off, len, bytes; unsigned int data_len = scsi_bufflen(sc); unsigned int data_grants = 0, seg_grants = 0; struct scatterlist *sg; unsigned long mfn; struct scsiif_request_segment *seg; ring_req->nr_segments = 0; if (sc->sc_data_direction == DMA_NONE || !data_len) return 0; scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i) data_grants += PFN_UP(sg->offset + sg->length); if (data_grants > VSCSIIF_SG_TABLESIZE) { if (data_grants > info->host->sg_tablesize) { shost_printk(KERN_ERR, info->host, KBUILD_MODNAME "Unable to map request_buffer for command!\n"); return -E2BIG; } seg_grants = vscsiif_grants_sg(data_grants); shadow->sg = kcalloc(data_grants, sizeof(struct scsiif_request_segment), GFP_ATOMIC); if (!shadow->sg) return -ENOMEM; } seg = shadow->sg ? : ring_req->seg; err = gnttab_alloc_grant_references(seg_grants + data_grants, &gref_head); if (err) { kfree(shadow->sg); shost_printk(KERN_ERR, info->host, KBUILD_MODNAME "gnttab_alloc_grant_references() error\n"); return -ENOMEM; } if (seg_grants) { page = virt_to_page(seg); off = (unsigned long)seg & ~PAGE_MASK; len = sizeof(struct scsiif_request_segment) * data_grants; while (len > 0) { bytes = min_t(unsigned int, len, PAGE_SIZE - off); ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); mfn = pfn_to_mfn(page_to_pfn(page)); gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id, mfn, 1); shadow->gref[ref_cnt] = ref; ring_req->seg[ref_cnt].gref = ref; ring_req->seg[ref_cnt].offset = (uint16_t)off; ring_req->seg[ref_cnt].length = (uint16_t)bytes; page++; len -= bytes; off = 0; ref_cnt++; } BUG_ON(seg_grants < ref_cnt); seg_grants = ref_cnt; } scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i) { page = sg_page(sg); off = sg->offset; len = sg->length; while (len > 0 && data_len > 0) { /* * sg sends a scatterlist that is larger than * the data_len it wants transferred for certain * IO sizes. */ bytes = min_t(unsigned int, len, PAGE_SIZE - off); bytes = min(bytes, data_len); ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); mfn = pfn_to_mfn(page_to_pfn(page)); gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id, mfn, grant_ro); shadow->gref[ref_cnt] = ref; seg->gref = ref; seg->offset = (uint16_t)off; seg->length = (uint16_t)bytes; page++; seg++; len -= bytes; data_len -= bytes; off = 0; ref_cnt++; } }
static unsigned long __init xen_do_chunk(unsigned long start, unsigned long end, bool release) { struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; unsigned long len = 0; int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); unsigned long pfn; int ret; for (pfn = start; pfn < end; pfn++) { unsigned long frame; unsigned long mfn = pfn_to_mfn(pfn); if (release) { /* Make sure pfn exists to start with */ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) continue; frame = mfn; } else { if (!xlated_phys && mfn != INVALID_P2M_ENTRY) continue; frame = pfn; } set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, &reservation); WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", release ? "release" : "populate", pfn, ret); if (ret == 1) { if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { if (release) break; set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); break; } len++; } else break; } if (len) printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", release ? "Freeing" : "Populating", start, end, len, release ? "freed" : "added"); return len; } static unsigned long __init xen_release_chunk(unsigned long start, unsigned long end) { /* * Xen already ballooned out the E820 non RAM regions for us * and set them up properly in EPT. */ if (xen_feature(XENFEAT_auto_translated_physmap)) return end - start; return xen_do_chunk(start, end, true); } static unsigned long __init xen_populate_chunk( const struct e820entry *list, size_t map_size, unsigned long max_pfn, unsigned long *last_pfn, unsigned long credits_left) { const struct e820entry *entry; unsigned int i; unsigned long done = 0; unsigned long dest_pfn; for (i = 0, entry = list; i < map_size; i++, entry++) { unsigned long s_pfn; unsigned long e_pfn; unsigned long pfns; long capacity; if (credits_left <= 0) break; if (entry->type != E820_RAM) continue; e_pfn = PFN_DOWN(entry->addr + entry->size); /* We only care about E820 after the xen_start_info->nr_pages */ if (e_pfn <= max_pfn) continue; s_pfn = PFN_UP(entry->addr); /* If the E820 falls within the nr_pages, we want to start * at the nr_pages PFN. * If that would mean going past the E820 entry, skip it */ if (s_pfn <= max_pfn) { capacity = e_pfn - max_pfn; dest_pfn = max_pfn; } else { capacity = e_pfn - s_pfn; dest_pfn = s_pfn; } if (credits_left < capacity) capacity = credits_left; pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); done += pfns; *last_pfn = (dest_pfn + pfns); if (pfns < capacity) break; credits_left -= pfns; } return done; } static void __init xen_set_identity_and_release_chunk( unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long *released, unsigned long *identity) { unsigned long pfn; /* * If the PFNs are currently mapped, clear the mappings * (except for the ISA region which must be 1:1 mapped) to * release the refcounts (in Xen) on the original frames. */ /* * PVH E820 matches the hypervisor's P2M which means we need to * account for the proper values of *release and *identity. */ for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) && pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { pte_t pte = __pte_ma(0); if (pfn < PFN_UP(ISA_END_ADDRESS)) pte = mfn_pte(pfn, PAGE_KERNEL_IO); (void)HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); } if (start_pfn < nr_pages) *released += xen_release_chunk( start_pfn, min(end_pfn, nr_pages)); *identity += set_phys_range_identity(start_pfn, end_pfn); }
/* * Top level routine to direct suspend/resume of a domain. */ void xen_suspend_domain(void) { extern void rtcsync(void); extern hrtime_t hres_last_tick; mfn_t start_info_mfn; ulong_t flags; pfn_t pfn; int i; /* * Check that we are happy to suspend on this hypervisor. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) { cpr_err(CE_WARN, "Cannot suspend on this hypervisor " "version: v%lu.%lu%s, need at least version v3.0.4 or " "-xvm based hypervisor", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); return; } /* * XXPV - Are we definitely OK to suspend by the time we've connected * the handler? */ cpr_err(CE_NOTE, "Domain suspending for save/migrate"); SUSPEND_DEBUG("xen_suspend_domain\n"); /* * suspend interrupts and devices * XXPV - we use suspend/resume for both save/restore domains (like sun * cpr) and for migration. Would be nice to know the difference if * possible. For save/restore where down time may be a long time, we * may want to do more of the things that cpr does. (i.e. notify user * processes, shrink memory footprint for faster restore, etc.) */ xen_suspend_devices(); SUSPEND_DEBUG("xenbus_suspend\n"); xenbus_suspend(); pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info); start_info_mfn = pfn_to_mfn(pfn); /* * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe * wrt xenbus being suspended here? */ mutex_enter(&cpu_lock); /* * Suspend must be done on vcpu 0, as no context for other CPUs is * saved. * * XXPV - add to taskq API ? */ thread_affinity_set(curthread, 0); kpreempt_disable(); SUSPEND_DEBUG("xen_start_migrate\n"); xen_start_migrate(); if (ncpus > 1) suspend_cpus(); /* * We can grab the ec_lock as it's a spinlock with a high SPL. Hence * any holder would have dropped it to get through suspend_cpus(). */ mutex_enter(&ec_lock); /* * From here on in, we can't take locks. */ SUSPEND_DEBUG("ec_suspend\n"); ec_suspend(); SUSPEND_DEBUG("gnttab_suspend\n"); gnttab_suspend(); flags = intr_clear(); xpv_time_suspend(); /* * Currently, the hypervisor incorrectly fails to bring back * powered-down VCPUs. Thus we need to record any powered-down VCPUs * to prevent any attempts to operate on them. But we have to do this * *after* the very first time we do ec_suspend(). */ for (i = 1; i < ncpus; i++) { if (cpu[i] == NULL) continue; if (cpu_get_state(cpu[i]) == P_POWEROFF) CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i); } /* * The dom0 save/migrate code doesn't automatically translate * these into PFNs, but expects them to be, so we do it here. * We don't use mfn_to_pfn() because so many OS services have * been disabled at this point. */ xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn]; xen_info->console.domU.mfn = mfn_to_pfn_mapping[xen_info->console.domU.mfn]; if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) { prom_printf("xen_suspend_domain(): " "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, 0, UVMF_INVLPG)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_update_va_mapping() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } SUSPEND_DEBUG("HYPERVISOR_suspend\n"); /* * At this point we suspend and sometime later resume. */ if (HYPERVISOR_suspend(start_info_mfn)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_suspend() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } /* * Point HYPERVISOR_shared_info to its new value. */ if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE, UVMF_INVLPG)) (void) HYPERVISOR_shutdown(SHUTDOWN_crash); if (xen_info->nr_pages != mfn_count) { prom_printf("xen_suspend_domain(): number of pages" " changed, was 0x%lx, now 0x%lx\n", mfn_count, xen_info->nr_pages); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } xpv_time_resume(); cached_max_mfn = 0; SUSPEND_DEBUG("gnttab_resume\n"); gnttab_resume(); /* XXPV: add a note that this must be lockless. */ SUSPEND_DEBUG("ec_resume\n"); ec_resume(); intr_restore(flags); if (ncpus > 1) resume_cpus(); mutex_exit(&ec_lock); xen_end_migrate(); mutex_exit(&cpu_lock); /* * Now we can take locks again. */ /* * Force the tick value used for tv_nsec in hres_tick() to be up to * date. rtcsync() will reset the hrestime value appropriately. */ hres_last_tick = xpv_gethrtime(); /* * XXPV: we need to have resumed the CPUs since this takes locks, but * can remote CPUs see bad state? Presumably yes. Should probably nest * taking of todlock inside of cpu_lock, or vice versa, then provide an * unlocked version. Probably need to call clkinitf to reset cpu freq * and re-calibrate if we migrated to a different speed cpu. Also need * to make a (re)init_cpu_info call to update processor info structs * and device tree info. That remains to be written at the moment. */ rtcsync(); rebuild_mfn_list(); SUSPEND_DEBUG("xenbus_resume\n"); xenbus_resume(); SUSPEND_DEBUG("xenbus_resume_devices\n"); xen_resume_devices(); thread_affinity_clear(curthread); kpreempt_enable(); SUSPEND_DEBUG("finished xen_suspend_domain\n"); /* * We have restarted our suspended domain, update the hypervisor * details. NB: This must be done at the end of this function, * since we need the domain to be completely resumed before * these functions will work correctly. */ xen_set_version(XENVER_CURRENT_IDX); /* * We can check and report a warning, but we don't stop the * process. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s " "but need at least version v3.0.4", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); cmn_err(CE_NOTE, "domain restore/migrate completed"); }
static void xennet_alloc_rx_buffers(struct net_device *dev) { unsigned short id; struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; struct page *page; int i, batch_target, notify; RING_IDX req_prod = np->rx.req_prod_pvt; grant_ref_t ref; unsigned long pfn; void *vaddr; struct xen_netif_rx_request *req; if (unlikely(!netif_carrier_ok(dev))) return; /* * Allocate skbuffs greedily, even though we batch updates to the * receive ring. This creates a less bursty demand on the memory * allocator, so should reduce the chance of failed allocation requests * both for ourself and for other kernel subsystems. */ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto no_skb; page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); if (!page) { kfree_skb(skb); no_skb: /* Any skbuffs queued for refill? Force them out. */ if (i != 0) goto refill; /* Could not allocate any skbuffs. Try again later. */ mod_timer(&np->rx_refill_timer, jiffies + (HZ/10)); break; } skb_shinfo(skb)->frags[0].page = page; skb_shinfo(skb)->nr_frags = 1; __skb_queue_tail(&np->rx_batch, skb); } /* Is the batch large enough to be worthwhile? */ if (i < (np->rx_target/2)) { if (req_prod > np->rx.sring->req_prod) goto push; return; } /* Adjust our fill target if we risked running out of buffers. */ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && ((np->rx_target *= 2) > np->rx_max_target)) np->rx_target = np->rx_max_target; refill: for (i = 0; ; i++) { skb = __skb_dequeue(&np->rx_batch); if (skb == NULL) break; skb->dev = dev; id = xennet_rxidx(req_prod + i); BUG_ON(np->rx_skbs[id]); np->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&np->gref_rx_head); BUG_ON((signed short)ref < 0); np->grant_rx_ref[id] = ref; pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); vaddr = page_address(skb_shinfo(skb)->frags[0].page); req = RING_GET_REQUEST(&np->rx, req_prod + i); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, pfn_to_mfn(pfn), 0); req->id = id; req->gref = ref; } wmb(); /* barrier so backend seens requests */ /* Above is a suitable barrier to ensure backend will see requests. */ np->rx.req_prod_pvt = req_prod + i; push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); if (notify) notify_remote_via_irq(np->netdev->irq); }
static unsigned long __init xen_release_chunk(phys_addr_t start_addr, phys_addr_t end_addr) { struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; unsigned long start, end; unsigned long len = 0; unsigned long pfn; int ret; start = PFN_UP(start_addr); end = PFN_DOWN(end_addr); if (end <= start) return 0; printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", start, end); for(pfn = start; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); /* Make sure pfn exists to start with */ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) continue; set_xen_guest_handle(reservation.extent_start, &mfn); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", start, end, ret); if (ret == 1) { __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } printk(KERN_CONT "%ld pages freed\n", len); return len; } static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, const struct e820map *e820) { phys_addr_t max_addr = PFN_PHYS(max_pfn); phys_addr_t last_end = ISA_END_ADDRESS; unsigned long released = 0; int i; /* Free any unused memory above the low 1Mbyte. */ for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { phys_addr_t end = e820->map[i].addr; end = min(max_addr, end); if (last_end < end) released += xen_release_chunk(last_end, end); last_end = max(last_end, e820->map[i].addr + e820->map[i].size); } if (last_end < max_addr) released += xen_release_chunk(last_end, max_addr); printk(KERN_INFO "released %ld pages of unused memory\n", released); return released; }
void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline, unsigned long flags) { struct xc_dom_image *dom; int rc; domid_t domid = DOMID_SELF; xen_pfn_t pfn; xc_interface *xc_handle; unsigned long i; void *seg; xen_pfn_t boot_page_mfn = virt_to_mfn(&_boot_page); char features[] = ""; struct mmu_update *m2p_updates; unsigned long nr_m2p_updates; DEBUG("booting with cmdline %s\n", cmdline); xc_handle = xc_interface_open(0,0,0); dom = xc_dom_allocate(xc_handle, cmdline, features); dom->allocate = kexec_allocate; /* We are using guest owned memory, therefore no limits. */ xc_dom_kernel_max_size(dom, 0); xc_dom_ramdisk_max_size(dom, 0); dom->kernel_blob = kernel; dom->kernel_size = kernel_size; dom->ramdisk_blob = module; dom->ramdisk_size = module_size; dom->flags = flags; dom->console_evtchn = start_info.console.domU.evtchn; dom->xenstore_evtchn = start_info.store_evtchn; tpm_hash2pcr(dom, cmdline); if ( (rc = xc_dom_boot_xen_init(dom, xc_handle, domid)) != 0 ) { grub_printf("xc_dom_boot_xen_init returned %d\n", rc); errnum = ERR_BOOT_FAILURE; goto out; } if ( (rc = xc_dom_parse_image(dom)) != 0 ) { grub_printf("xc_dom_parse_image returned %d\n", rc); errnum = ERR_BOOT_FAILURE; goto out; } #ifdef __i386__ if (strcmp(dom->guest_type, "xen-3.0-x86_32p")) { grub_printf("can only boot x86 32 PAE kernels, not %s\n", dom->guest_type); errnum = ERR_EXEC_FORMAT; goto out; } #endif #ifdef __x86_64__ if (strcmp(dom->guest_type, "xen-3.0-x86_64")) { grub_printf("can only boot x86 64 kernels, not %s\n", dom->guest_type); errnum = ERR_EXEC_FORMAT; goto out; } #endif /* equivalent of xc_dom_mem_init */ dom->arch_hooks = xc_dom_find_arch_hooks(xc_handle, dom->guest_type); dom->total_pages = start_info.nr_pages; /* equivalent of arch_setup_meminit */ /* setup initial p2m */ dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->total_pages); /* Start with our current P2M */ for (i = 0; i < dom->total_pages; i++) dom->p2m_host[i] = pfn_to_mfn(i); if ( (rc = xc_dom_build_image(dom)) != 0 ) { grub_printf("xc_dom_build_image returned %d\n", rc); errnum = ERR_BOOT_FAILURE; goto out; } /* copy hypercall page */ /* TODO: domctl instead, but requires privileges */ if (dom->parms.virt_hypercall != -1) { pfn = PHYS_PFN(dom->parms.virt_hypercall - dom->parms.virt_base); memcpy((void *) pages[pfn], hypercall_page, PAGE_SIZE); } /* Equivalent of xc_dom_boot_image */ dom->shared_info_mfn = PHYS_PFN(start_info.shared_info); if (!xc_dom_compat_check(dom)) { grub_printf("xc_dom_compat_check failed\n"); errnum = ERR_EXEC_FORMAT; goto out; } /* Move current console, xenstore and boot MFNs to the allocated place */ do_exchange(dom, dom->console_pfn, start_info.console.domU.mfn); do_exchange(dom, dom->xenstore_pfn, start_info.store_mfn); DEBUG("virt base at %llx\n", dom->parms.virt_base); DEBUG("bootstack_pfn %lx\n", dom->bootstack_pfn); _boot_target = dom->parms.virt_base + PFN_PHYS(dom->bootstack_pfn); DEBUG("_boot_target %lx\n", _boot_target); do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base), virt_to_mfn(&_boot_page)); /* Make sure the bootstrap page table does not RW-map any of our current * page table frames */ kexec_allocate(dom, dom->virt_pgtab_end); if ( (rc = xc_dom_update_guest_p2m(dom))) { grub_printf("xc_dom_update_guest_p2m returned %d\n", rc); errnum = ERR_BOOT_FAILURE; goto out; } if ( dom->arch_hooks->setup_pgtables ) if ( (rc = dom->arch_hooks->setup_pgtables(dom))) { grub_printf("setup_pgtables returned %d\n", rc); errnum = ERR_BOOT_FAILURE; goto out; } /* start info page */ #undef start_info if ( dom->arch_hooks->start_info ) dom->arch_hooks->start_info(dom); #define start_info (start_info_union.start_info) xc_dom_log_memory_footprint(dom); /* Unmap libxc's projection of the boot page table */ seg = xc_dom_seg_to_ptr(dom, &dom->pgtables_seg); munmap(seg, dom->pgtables_seg.vend - dom->pgtables_seg.vstart); /* Unmap day0 pages to avoid having a r/w mapping of the future page table */ for (pfn = 0; pfn < allocated; pfn++) munmap((void*) pages[pfn], PAGE_SIZE); /* Pin the boot page table base */ if ( (rc = pin_table(dom->xch, #ifdef __i386__ MMUEXT_PIN_L3_TABLE, #endif #ifdef __x86_64__ MMUEXT_PIN_L4_TABLE, #endif xc_dom_p2m_host(dom, dom->pgtables_seg.pfn), dom->guest_domid)) != 0 ) { grub_printf("pin_table(%lx) returned %d\n", xc_dom_p2m_host(dom, dom->pgtables_seg.pfn), rc); errnum = ERR_BOOT_FAILURE; goto out_remap; } /* We populate the Mini-OS page table here so that boot.S can just call * update_va_mapping to project itself there. */ need_pgt(_boot_target); DEBUG("day0 pages %lx\n", allocated); DEBUG("boot target page %lx\n", _boot_target); DEBUG("boot page %p\n", &_boot_page); DEBUG("boot page mfn %lx\n", boot_page_mfn); _boot_page_entry = PFN_PHYS(boot_page_mfn) | L1_PROT; DEBUG("boot page entry %llx\n", _boot_page_entry); _boot_oldpdmfn = virt_to_mfn(start_info.pt_base); DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn); DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart); _boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)]; DEBUG("boot pd mfn %lx\n", _boot_pdmfn); _boot_stack = _boot_target + PAGE_SIZE; DEBUG("boot stack %lx\n", _boot_stack); _boot_start_info = dom->parms.virt_base + PFN_PHYS(dom->start_info_pfn); DEBUG("boot start info %lx\n", _boot_start_info); _boot_start = dom->parms.virt_entry; DEBUG("boot start %lx\n", _boot_start); /* Keep only useful entries */ for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++) if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) nr_m2p_updates++; m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates); for (i = pfn = 0; pfn < start_info.nr_pages; pfn++) if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) { m2p_updates[i].ptr = PFN_PHYS(dom->p2m_host[pfn]) | MMU_MACHPHYS_UPDATE; m2p_updates[i].val = pfn; i++; } for (i = 0; i < blk_nb; i++) shutdown_blkfront(blk_dev[i]); if (net_dev) shutdown_netfront(net_dev); if (kbd_dev) shutdown_kbdfront(kbd_dev); stop_kernel(); /* Update M2P */ if ((rc = HYPERVISOR_mmu_update(m2p_updates, nr_m2p_updates, NULL, DOMID_SELF)) < 0) { xprintk("Could not update M2P\n"); ASSERT(0); } xprintk("go!\n"); /* Jump to trampoline boot page */ _boot(); ASSERT(0); out_remap: for (pfn = 0; pfn < allocated; pfn++) do_map_frames(pages[pfn], &pages_mfns[pfn], 1, 0, 0, DOMID_SELF, 0, L1_PROT); out: xc_dom_release(dom); for (pfn = 0; pfn < allocated; pfn++) free_page((void*)pages[pfn]); free(pages); free(pages_mfns); pages = NULL; pages_mfns = NULL; allocated = 0; xc_interface_close(xc_handle ); }
/* * Fill in the remaining CPU context and initialize it. */ static int mp_set_cpu_context(vcpu_guest_context_t *vgc, cpu_t *cp) { uint_t vec, iopl; vgc->flags = VGCF_IN_KERNEL; /* * fpu_ctx we leave as zero; on first fault we'll store * sse_initial into it anyway. */ #if defined(__amd64) vgc->user_regs.cs = KCS_SEL | SEL_KPL; /* force to ring 3 */ #else vgc->user_regs.cs = KCS_SEL; #endif vgc->user_regs.ds = KDS_SEL; vgc->user_regs.es = KDS_SEL; vgc->user_regs.ss = KDS_SEL; vgc->kernel_ss = KDS_SEL; /* * Allow I/O privilege level for Dom0 kernel. */ if (DOMAIN_IS_INITDOMAIN(xen_info)) iopl = (PS_IOPL & 0x1000); /* ring 1 */ else iopl = 0; #if defined(__amd64) vgc->user_regs.fs = 0; vgc->user_regs.gs = 0; vgc->user_regs.rflags = F_OFF | iopl; #elif defined(__i386) vgc->user_regs.fs = KFS_SEL; vgc->user_regs.gs = KGS_SEL; vgc->user_regs.eflags = F_OFF | iopl; vgc->event_callback_cs = vgc->user_regs.cs; vgc->failsafe_callback_cs = vgc->user_regs.cs; #endif /* * Initialize the trap_info_t from the IDT */ #if !defined(__lint) ASSERT(NIDT == sizeof (vgc->trap_ctxt) / sizeof (vgc->trap_ctxt[0])); #endif for (vec = 0; vec < NIDT; vec++) { trap_info_t *ti = &vgc->trap_ctxt[vec]; if (xen_idt_to_trap_info(vec, &cp->cpu_m.mcpu_idt[vec], ti) == 0) { ti->cs = KCS_SEL; ti->vector = vec; } } /* * No LDT */ /* * (We assert in various places that the GDT is (a) aligned on a * page boundary and (b) one page long, so this really should fit..) */ #ifdef CRASH_XEN vgc->gdt_frames[0] = pa_to_ma(mmu_btop(cp->cpu_m.mcpu_gdtpa)); #else vgc->gdt_frames[0] = pfn_to_mfn(mmu_btop(cp->cpu_m.mcpu_gdtpa)); #endif vgc->gdt_ents = NGDT; vgc->ctrlreg[0] = CR0_ENABLE_FPU_FLAGS(getcr0()); #if defined(__i386) if (mmu.pae_hat) vgc->ctrlreg[3] = xen_pfn_to_cr3(pfn_to_mfn(kas.a_hat->hat_htable->ht_pfn)); else #endif vgc->ctrlreg[3] = pa_to_ma(mmu_ptob(kas.a_hat->hat_htable->ht_pfn)); vgc->ctrlreg[4] = getcr4(); vgc->event_callback_eip = (uintptr_t)xen_callback; vgc->failsafe_callback_eip = (uintptr_t)xen_failsafe_callback; vgc->flags |= VGCF_failsafe_disables_events; #if defined(__amd64) /* * XXPV should this be moved to init_cpu_syscall? */ vgc->syscall_callback_eip = (uintptr_t)sys_syscall; vgc->flags |= VGCF_syscall_disables_events; ASSERT(vgc->user_regs.gs == 0); vgc->gs_base_kernel = (uintptr_t)cp; #endif return (xen_vcpu_initialize(cp->cpu_id, vgc)); }
/* * Helper function to update the p2m and m2p tables and kernel mapping. */ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) { struct mmu_update update = { .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, .val = pfn }; /* Update p2m */ if (!set_phys_to_machine(pfn, mfn)) { WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n", pfn, mfn); BUG(); } /* Update m2p */ if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) { WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n", mfn, pfn); BUG(); } /* Update kernel mapping, but not for highmem. */ if (pfn >= PFN_UP(__pa(high_memory - 1))) return; if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(mfn, PAGE_KERNEL), 0)) { WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n", mfn, pfn); BUG(); } } /* * This function updates the p2m and m2p tables with an identity map from * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the * original allocation at remap_pfn. The information needed for remapping is * saved in the memory itself to avoid the need for allocating buffers. The * complete remap information is contained in a list of MFNs each containing * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. * This enables us to preserve the original mfn sequence while doing the * remapping at a time when the memory management is capable of allocating * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and * its callers. */ static void __init xen_do_set_identity_and_remap_chunk( unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) { unsigned long buf = (unsigned long)&xen_remap_buf; unsigned long mfn_save, mfn; unsigned long ident_pfn_iter, remap_pfn_iter; unsigned long ident_end_pfn = start_pfn + size; unsigned long left = size; unsigned int i, chunk; WARN_ON(size == 0); BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); mfn_save = virt_to_mfn(buf); for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; ident_pfn_iter < ident_end_pfn; ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; /* Map first pfn to xen_remap_buf */ mfn = pfn_to_mfn(ident_pfn_iter); set_pte_mfn(buf, mfn, PAGE_KERNEL); /* Save mapping information in page */ xen_remap_buf.next_area_mfn = xen_remap_mfn; xen_remap_buf.target_pfn = remap_pfn_iter; xen_remap_buf.size = chunk; for (i = 0; i < chunk; i++) xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i); /* Put remap buf into list. */ xen_remap_mfn = mfn; /* Set identity map */ set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk); left -= chunk; } /* Restore old xen_remap_buf mapping */ set_pte_mfn(buf, mfn_save, PAGE_KERNEL); }
/* * balloon_free_pages() * free page_cnt pages, using any combination of mfns, pfns, and kva as long * as they refer to the same mapping. If an array of mfns is passed in, we * assume they were already cleared. Otherwise, we need to zero the pages * before giving them back to the hypervisor. kva space is not free'd up in * case the caller wants to re-use it. */ long balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns) { xen_memory_reservation_t memdec; mfn_t mfn; pfn_t pfn; uint_t i; long e; #if DEBUG /* make sure kva is page aligned and maps to first pfn */ if (kva != NULL) { ASSERT(((uintptr_t)kva & PAGEOFFSET) == 0); if (pfns != NULL) { ASSERT(hat_getpfnum(kas.a_hat, kva) == pfns[0]); } } #endif /* if we have a kva, we can clean all pages with just one bzero */ if ((kva != NULL) && balloon_zero_memory) { bzero(kva, (page_cnt * PAGESIZE)); } /* if we were given a kva and/or a pfn */ if ((kva != NULL) || (pfns != NULL)) { /* * All the current callers only pass 1 page when using kva or * pfns, and use mfns when passing multiple pages. If that * assumption is changed, the following code will need some * work. The following ASSERT() guarantees we're respecting * the io locking quota. */ ASSERT(page_cnt < bln_contig_list_quota); /* go through all the pages */ for (i = 0; i < page_cnt; i++) { /* get the next pfn */ if (pfns == NULL) { pfn = hat_getpfnum(kas.a_hat, (kva + (PAGESIZE * i))); } else { pfn = pfns[i]; } /* * if we didn't already zero this page, do it now. we * need to do this *before* we give back the MFN */ if ((kva == NULL) && (balloon_zero_memory)) { pfnzero(pfn, 0, PAGESIZE); } /* * unmap the pfn. We don't free up the kva vmem space * so the caller can re-use it. The page must be * unmapped before it is given back to the hypervisor. */ if (kva != NULL) { hat_unload(kas.a_hat, (kva + (PAGESIZE * i)), PAGESIZE, HAT_UNLOAD_UNMAP); } /* grab the mfn before the pfn is marked as invalid */ mfn = pfn_to_mfn(pfn); /* mark the pfn as invalid */ reassign_pfn(pfn, MFN_INVALID); /* * if we weren't given an array of MFNs, we need to * free them up one at a time. Otherwise, we'll wait * until later and do it in one hypercall */ if (mfns == NULL) { bzero(&memdec, sizeof (memdec)); /*LINTED: constant in conditional context*/ set_xen_guest_handle(memdec.extent_start, &mfn); memdec.domid = DOMID_SELF; memdec.nr_extents = 1; e = HYPERVISOR_memory_op( XENMEM_decrease_reservation, &memdec); if (e != 1) { cmn_err(CE_PANIC, "balloon: unable to " "give a page back to the " "hypervisor.\n"); } } } } /* * if we were passed in MFNs, we haven't free'd them up yet. We can * do it with one call. */ if (mfns != NULL) { bzero(&memdec, sizeof (memdec)); /*LINTED: constant in conditional context*/ set_xen_guest_handle(memdec.extent_start, mfns); memdec.domid = DOMID_SELF; memdec.nr_extents = page_cnt; e = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &memdec); if (e != page_cnt) { cmn_err(CE_PANIC, "balloon: unable to give pages back " "to the hypervisor.\n"); } } atomic_add_long((ulong_t *)&bln_stats.bln_hv_pages, page_cnt); return (page_cnt); }
/* * This function is called when we want to decrease the memory reservation * of our domain. Allocate the memory and make a hypervisor call to give * it back. */ static spgcnt_t balloon_dec_reservation(ulong_t debit) { int i, locked; long rv; ulong_t request; page_t *pp; bzero(mfn_frames, sizeof (mfn_frames)); bzero(pfn_frames, sizeof (pfn_frames)); if (debit > FRAME_ARRAY_SIZE) { debit = FRAME_ARRAY_SIZE; } request = debit; /* * Don't bother if there isn't a safe amount of kmem left. */ if (kmem_avail() < balloon_minkmem) { kmem_reap(); if (kmem_avail() < balloon_minkmem) return (0); } if (page_resv(request, KM_NOSLEEP) == 0) { return (0); } xen_block_migrate(); for (i = 0; i < debit; i++) { pp = page_get_high_mfn(new_high_mfn); new_high_mfn = 0; if (pp == NULL) { /* * Call kmem_reap(), then try once more, * but only if there is a safe amount of * kmem left. */ kmem_reap(); if (kmem_avail() < balloon_minkmem || (pp = page_get_high_mfn(0)) == NULL) { debit = i; break; } } ASSERT(PAGE_EXCL(pp)); ASSERT(!hat_page_is_mapped(pp)); balloon_page_add(pp); pfn_frames[i] = pp->p_pagenum; mfn_frames[i] = pfn_to_mfn(pp->p_pagenum); } if (debit == 0) { xen_allow_migrate(); page_unresv(request); return (0); } /* * We zero all the pages before we start reassigning them in order to * minimize the time spent holding the lock on the contig pfn list. */ if (balloon_zero_memory) { for (i = 0; i < debit; i++) { pfnzero(pfn_frames[i], 0, PAGESIZE); } } /* * Remove all mappings for the pfns from the system */ locked = balloon_lock_contig_pfnlist(debit); for (i = 0; i < debit; i++) { reassign_pfn(pfn_frames[i], MFN_INVALID); } if (locked) unlock_contig_pfnlist(); rv = balloon_free_pages(debit, mfn_frames, NULL, NULL); if (rv < 0) { cmn_err(CE_WARN, "Attempt to return pages to the hypervisor " "failed - up to %lu pages lost (error = %ld)", debit, rv); rv = 0; } else if (rv != debit) { panic("Unexpected return value (%ld) from decrease reservation " "hypervisor call", rv); } xen_allow_migrate(); if (debit != request) page_unresv(request - debit); return (rv); }
/* Add an MFN override for a particular page */ int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long pfn; unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } WARN_ON(PagePrivate(page)); SetPagePrivate(page); set_page_private(page, mfn); page->index = pfn_to_mfn(pfn); if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) return -ENOMEM; if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs = xen_mc_entry(sizeof(*kmap_op)); MULTI_grant_table_op(mcs.mc, GNTTABOP_map_grant_ref, kmap_op, 1); xen_mc_issue(PARAVIRT_LAZY_MMU); } /* let's use dev_bus_addr to record the old mfn instead */ kmap_op->dev_bus_addr = page->index; page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other * pfn so that the following mfn_to_pfn(mfn) calls will return the * pfn from the m2p_override (the backend pfn) instead. * We need to do this because the pages shared by the frontend * (xen-blkfront) can be already locked (lock_page, called by * do_read_cache_page); when the userspace backend tries to use them * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so * do_blockdev_direct_IO is going to try to lock the same pages * again resulting in a deadlock. * As a side effect get_user_pages_fast might not be safe on the * frontend pages while they are being shared with the backend, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); if (ret == 0 && get_phys_to_machine(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; }
static unsigned long __init xen_do_chunk(unsigned long start, unsigned long end, bool release) { struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; unsigned long len = 0; unsigned long pfn; int ret; for (pfn = start; pfn < end; pfn++) { unsigned long frame; unsigned long mfn = pfn_to_mfn(pfn); if (release) { /* Make sure pfn exists to start with */ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) continue; frame = mfn; } else { if (mfn != INVALID_P2M_ENTRY) continue; frame = pfn; } set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, &reservation); WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", release ? "release" : "populate", pfn, ret); if (ret == 1) { if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { if (release) break; set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); break; } len++; } else break; } if (len) printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", release ? "Freeing" : "Populating", start, end, len, release ? "freed" : "added"); return len; } static unsigned long __init xen_release_chunk(unsigned long start, unsigned long end) { return xen_do_chunk(start, end, true); } static unsigned long __init xen_populate_chunk( const struct e820entry *list, size_t map_size, unsigned long max_pfn, unsigned long *last_pfn, unsigned long credits_left) { const struct e820entry *entry; unsigned int i; unsigned long done = 0; unsigned long dest_pfn; for (i = 0, entry = list; i < map_size; i++, entry++) { unsigned long s_pfn; unsigned long e_pfn; unsigned long pfns; long capacity; if (credits_left <= 0) break; if (entry->type != E820_RAM) continue; e_pfn = PFN_DOWN(entry->addr + entry->size); /* We only care about E820 after the xen_start_info->nr_pages */ if (e_pfn <= max_pfn) continue; s_pfn = PFN_UP(entry->addr); /* If the E820 falls within the nr_pages, we want to start * at the nr_pages PFN. * If that would mean going past the E820 entry, skip it */ if (s_pfn <= max_pfn) { capacity = e_pfn - max_pfn; dest_pfn = max_pfn; } else { capacity = e_pfn - s_pfn; dest_pfn = s_pfn; } if (credits_left < capacity) capacity = credits_left; pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); done += pfns; *last_pfn = (dest_pfn + pfns); if (pfns < capacity) break; credits_left -= pfns; } return done; } static void __init xen_set_identity_and_release_chunk( unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long *released, unsigned long *identity) { unsigned long pfn; /* * If the PFNs are currently mapped, the VA mapping also needs * to be updated to be 1:1. */ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) (void)HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), mfn_pte(pfn, PAGE_KERNEL_IO), 0); if (start_pfn < nr_pages) *released += xen_release_chunk( start_pfn, min(end_pfn, nr_pages)); *identity += set_phys_range_identity(start_pfn, end_pfn); }
static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, struct xen_netif_tx_request *tx) { struct netfront_info *np = netdev_priv(dev); char *data = skb->data; unsigned long mfn; RING_IDX prod = np->tx.req_prod_pvt; int frags = skb_shinfo(skb)->nr_frags; unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); unsigned int id; grant_ref_t ref; int i; /* While the header overlaps a page boundary (including being larger than a page), split it it into page-sized chunks. */ while (len > PAGE_SIZE - offset) { tx->size = PAGE_SIZE - offset; tx->flags |= XEN_NETTXF_more_data; len -= tx->size; data += tx->size; offset = 0; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; tx->flags = 0; } /* Grant backend access to each skb fragment page. */ for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; struct page *page = skb_frag_page(frag); len = skb_frag_size(frag); offset = frag->page_offset; /* Data must not cross a page boundary. */ BUG_ON(len + offset > PAGE_SIZE<<compound_order(page)); /* Skip unused frames from start of page */ page += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; while (len > 0) { unsigned long bytes; BUG_ON(offset >= PAGE_SIZE); bytes = PAGE_SIZE - offset; if (bytes > len) bytes = len; tx->flags |= XEN_NETTXF_more_data; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = pfn_to_mfn(page_to_pfn(page)); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = bytes; tx->flags = 0; offset += bytes; len -= bytes; /* Next frame */ if (offset == PAGE_SIZE && len) { BUG_ON(!PageCompound(page)); page++; offset = 0; } } } np->tx.req_prod_pvt = prod; }
/* * Must not be called with IRQs off. This should only be used on the * slow path. * * Copy a foreign granted page to local memory. */ int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep) { struct gnttab_unmap_and_replace unmap; mmu_update_t mmu; struct page *page; struct page *new_page; void *new_addr; void *addr; paddr_t pfn; maddr_t mfn; maddr_t new_mfn; int err; page = *pagep; if (!get_page_unless_zero(page)) return -ENOENT; err = -ENOMEM; new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); if (!new_page) goto out; new_addr = page_address(new_page); addr = page_address(page); copy_page(new_addr, addr); pfn = page_to_pfn(page); mfn = pfn_to_mfn(pfn); new_mfn = virt_to_mfn(new_addr); write_seqlock_bh(&gnttab_dma_lock); /* Make seq visible before checking page_mapped. */ smp_mb(); /* Has the page been DMA-mapped? */ if (unlikely(page_mapped(page))) { write_sequnlock_bh(&gnttab_dma_lock); put_page(new_page); err = -EBUSY; goto out; } if (!xen_feature(XENFEAT_auto_translated_physmap)) set_phys_to_machine(pfn, new_mfn); gnttab_set_replace_op(&unmap, (unsigned long)addr, (unsigned long)new_addr, ref); err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace, &unmap, 1); BUG_ON(err); BUG_ON(unmap.status != GNTST_okay); write_sequnlock_bh(&gnttab_dma_lock); if (!xen_feature(XENFEAT_auto_translated_physmap)) { set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY); mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu.val = pfn; err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF); BUG_ON(err); } new_page->mapping = page->mapping; new_page->index = page->index; set_bit(PG_foreign, &new_page->flags); if (PageReserved(page)) SetPageReserved(new_page); *pagep = new_page; SetPageForeign(page, gnttab_page_free); page->mapping = NULL; out: put_page(page); return err; }
static unsigned long __init xen_release_chunk(unsigned long start, unsigned long end) { struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; unsigned long len = 0; unsigned long pfn; int ret; for(pfn = start; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); /* */ if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) continue; set_xen_guest_handle(reservation.extent_start, &mfn); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); if (ret == 1) { __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", start, end, len); return len; } static unsigned long __init xen_set_identity_and_release( const struct e820entry *list, size_t map_size, unsigned long nr_pages) { phys_addr_t start = 0; unsigned long released = 0; unsigned long identity = 0; const struct e820entry *entry; int i; /* */ for (i = 0, entry = list; i < map_size; i++, entry++) { phys_addr_t end = entry->addr + entry->size; if (entry->type == E820_RAM || i == map_size - 1) { unsigned long start_pfn = PFN_DOWN(start); unsigned long end_pfn = PFN_UP(end); if (entry->type == E820_RAM) end_pfn = PFN_UP(entry->addr); if (start_pfn < end_pfn) { if (start_pfn < nr_pages) released += xen_release_chunk( start_pfn, min(end_pfn, nr_pages)); identity += set_phys_range_identity( start_pfn, end_pfn); } start = end; } } printk(KERN_INFO "Released %lu pages of unused memory\n", released); printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); return released; }
static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, struct xen_netif_tx_request *tx) { struct netfront_info *np = netdev_priv(dev); char *data = skb->data; unsigned long mfn; RING_IDX prod = np->tx.req_prod_pvt; int frags = skb_shinfo(skb)->nr_frags; unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); unsigned int id; grant_ref_t ref; int i; /* While the header overlaps a page boundary (including being larger than a page), split it it into page-sized chunks. */ while (len > PAGE_SIZE - offset) { tx->size = PAGE_SIZE - offset; tx->flags |= NETTXF_more_data; len -= tx->size; data += tx->size; offset = 0; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; tx->flags = 0; } /* Grant backend access to each skb fragment page. */ for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; tx->flags |= NETTXF_more_data; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = pfn_to_mfn(page_to_pfn(frag->page)); gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = frag->page_offset; tx->size = frag->size; tx->flags = 0; } np->tx.req_prod_pvt = prod; }
static int increase_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); balloon_lock(flags); page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); frame_list[i] = page_to_pfn(page);; page = balloon_next_page(page); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { int ret; /* We hit the Xen hard limit: reprobe. */ set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != rc); hard_limit = current_pages + rc - driver_pages; goto out; } for (i = 0; i < nr_pages; i++) { page = balloon_retrieve(); BUG_ON(page == NULL); pfn = page_to_pfn(page); BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); /* Update P->M and M->P tables. */ set_phys_to_machine(pfn, frame_list[i]); #ifdef CONFIG_XEN xen_machphys_update(frame_list[i], pfn); /* Link back into the page tables if not highmem. */ if (pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte_ma(frame_list[i], PAGE_KERNEL), 0); BUG_ON(ret); } #endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); set_page_count(page, 1); __free_page(page); } current_pages += nr_pages; totalram_pages = current_pages; out: balloon_unlock(flags); return 0; } static int decrease_reservation(unsigned long nr_pages) { unsigned long pfn, i, flags; struct page *page; void *v; int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { if ((page = alloc_page(GFP_BALLOON)) == NULL) { nr_pages = i; need_sleep = 1; break; } pfn = page_to_pfn(page); frame_list[i] = pfn_to_mfn(pfn); if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); scrub_pages(v, 1); #ifdef CONFIG_XEN ret = HYPERVISOR_update_va_mapping( (unsigned long)v, __pte_ma(0), 0); BUG_ON(ret); #endif } #ifdef CONFIG_XEN_SCRUB_PAGES else { v = kmap(page); scrub_pages(v, 1); kunmap(page); } #endif } #ifdef CONFIG_XEN /* Ensure that ballooned highmem pages don't have kmaps. */ kmap_flush_unused(); flush_tlb_all(); #endif balloon_lock(flags); /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(pfn_to_page(pfn)); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); current_pages -= nr_pages; totalram_pages = current_pages; balloon_unlock(flags); return need_sleep; } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(void *unused) { int need_sleep = 0; long credit; down(&balloon_mutex); do { credit = current_target() - current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); #ifndef CONFIG_PREEMPT if (need_resched()) schedule(); #endif } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != current_pages) mod_timer(&balloon_timer, jiffies + HZ); up(&balloon_mutex); }
/* * Make pt_pfn a new 'level' page table frame and hook it into the page * table at offset in previous level MFN (pref_l_mfn). pt_pfn is a guest * PFN. */ static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, unsigned long offset, unsigned long level) { pgentry_t *tab = (pgentry_t *)start_info.pt_base; unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); pgentry_t prot_e, prot_t; mmu_update_t mmu_updates[1]; int rc; prot_e = prot_t = 0; DEBUG("Allocating new L%d pt frame for pfn=%lx, " "prev_l_mfn=%lx, offset=%lx", level, *pt_pfn, prev_l_mfn, offset); /* We need to clear the page, otherwise we might fail to map it as a page table page */ memset((void*) pt_page, 0, PAGE_SIZE); switch ( level ) { case L1_FRAME: prot_e = L1_PROT; prot_t = L2_PROT; break; case L2_FRAME: prot_e = L2_PROT; prot_t = L3_PROT; break; #if defined(__x86_64__) case L3_FRAME: prot_e = L3_PROT; prot_t = L4_PROT; break; #endif default: printk("new_pt_frame() called with invalid level number %d\n", level); do_exit(); break; } /* Make PFN a page table page */ #if defined(__x86_64__) tab = pte_to_virt(tab[l4_table_offset(pt_page)]); #endif tab = pte_to_virt(tab[l3_table_offset(pt_page)]); mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + sizeof(pgentry_t) * l1_table_offset(pt_page); mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | (prot_e & ~_PAGE_RW); if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) { printk("ERROR: PTE for new page table page could not be updated\n"); printk(" mmu_update failed with rc=%d\n", rc); do_exit(); } /* Hook the new page table page into the hierarchy */ mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t; if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) { printk("ERROR: mmu_update failed with rc=%d\n", rc); do_exit(); } *pt_pfn += 1; }
//void free_pte_page(struct mmu_gather* tlb, struct page *pte) void free_pte_page(struct page *pte) { struct ptrpte_t *newstruct = NULL; struct ptrpte_t *temp_head = NULL; int i = 0; int counter = 0; newstruct = (struct ptrpte_t *)kmalloc(sizeof(struct ptrpte_t), GFP_KERNEL); newstruct -> content = pte; //newstruct -> mmu_tlb = tlb; spin_lock(&pte_cache_lock); newstruct -> next = pte_head; pte_head = newstruct; temp_head = pte_head; /*free node */ if(pte_used_counter) pte_used_counter--; pte_free_counter++; //spin_unlock(&pte_cache_lock); if(pte_used_counter) { //if((pte_free_counter/pte_used_counter>=8) && ((pte_used_counter + pte_free_counter) >= 2100)) //if(pte_used_counter + pte_free_counter >= 2100) //if((pte_used_counter/pte_free_counter < 1) && (pte_used_counter >= 63)) if((pte_free_counter/pte_used_counter > 4) && ((pte_used_counter + pte_free_counter) >= 320)) //if((pte_free_counter/pte_used_counter >= 5) && ((pte_used_counter + pte_free_counter) >= 320)) { printk("pte free counter is %ld\n", pte_free_counter); printk("pte used counter is %ld\n", pte_used_counter); counter = pte_free_counter * 3 / 10; //counter = 0; for(i=0;i<counter;i++) { pte_head = pte_head->next; } pte_free_counter -= counter; } } spin_unlock(&pte_cache_lock); if(counter != 0) { struct ptrpte * newstructarray = NULL; struct ptrpte * newstructarray_head = NULL; int rc = 1; newstructarray = (struct ptrpte *)kmalloc(sizeof(struct ptrpte) * counter, GFP_KERNEL); newstructarray_head = newstructarray; for (i=0;i<counter;i++) { newstruct = temp_head; temp_head = temp_head->next; //newstructarray[i].content = (unsigned long)page_address(newstruct->content); newstructarray[i].content = pfn_to_mfn(page_to_pfn(newstruct->content)); //newstructarray[i].mmu_tlb = newstruct->mmu_tlb; kfree(newstruct); } //hypercall newstructarray rc = HYPERVISOR_pte_op(newstructarray, counter); //if (rc == 0) //else //printk("pte cache free error\n"); //free page to the buddy system newstructarray = newstructarray_head; for(i=0;i<counter;i++) { //tlb_remove_page(newstructarray[i].mmu_tlb,newstructarray[i].content); //__free_page(newstructarray[i].content); __free_page(pfn_to_page(mfn_to_pfn(newstructarray[i].content))); } //free newstructarray kfree(newstructarray); } return; }