Ejemplo n.º 1
0
static void xen_load_gdt(const struct desc_ptr *dtr)
{
	unsigned long va = dtr->address;
	unsigned int size = dtr->size + 1;
	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
	unsigned long frames[pages];
	int f;

	/*
	 * A GDT can be up to 64k in size, which corresponds to 8192
	 * 8-byte entries, or 16 4k pages..
	 */

	BUG_ON(size > 65536);
	BUG_ON(va & ~PAGE_MASK);

	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
		int level;
		pte_t *ptep;
		unsigned long pfn, mfn;
		void *virt;

		/*
		 * The GDT is per-cpu and is in the percpu data area.
		 * That can be virtually mapped, so we need to do a
		 * page-walk to get the underlying MFN for the
		 * hypercall.  The page can also be in the kernel's
		 * linear range, so we need to RO that mapping too.
		 */
		ptep = lookup_address(va, &level);
		BUG_ON(ptep == NULL);

		pfn = pte_pfn(*ptep);
		mfn = pfn_to_mfn(pfn);
		virt = __va(PFN_PHYS(pfn));

		frames[f] = mfn;

		make_lowmem_page_readonly((void *)va);
		make_lowmem_page_readonly(virt);
	}

	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
		BUG();
}
Ejemplo n.º 2
0
static void xen_load_gdt(const struct desc_ptr *dtr)
{
	unsigned long va = dtr->address;
	unsigned int size = dtr->size + 1;
	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
	unsigned long frames[pages];
	int f;

	/*
                                                             
                                    
  */

	BUG_ON(size > 65536);
	BUG_ON(va & ~PAGE_MASK);

	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
		int level;
		pte_t *ptep;
		unsigned long pfn, mfn;
		void *virt;

		/*
                                                       
                                                     
                                                
                                                     
                                                     
   */
		ptep = lookup_address(va, &level);
		BUG_ON(ptep == NULL);

		pfn = pte_pfn(*ptep);
		mfn = pfn_to_mfn(pfn);
		virt = __va(PFN_PHYS(pfn));

		frames[f] = mfn;

		make_lowmem_page_readonly((void *)va);
		make_lowmem_page_readonly(virt);
	}

	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
		BUG();
}
Ejemplo n.º 3
0
void clear_highpage(struct page *page)
{
	void *kaddr;

	if (likely(xen_feature(XENFEAT_highmem_assist))
	    && PageHighMem(page)) {
		struct mmuext_op meo;

		meo.cmd = MMUEXT_CLEAR_PAGE;
		meo.arg1.mfn = pfn_to_mfn(page_to_pfn(page));
		if (HYPERVISOR_mmuext_op(&meo, 1, NULL, DOMID_SELF) == 0)
			return;
	}

	kaddr = kmap_atomic(page, KM_USER0);
	clear_page(kaddr);
	kunmap_atomic(kaddr, KM_USER0);
}
Ejemplo n.º 4
0
Archivo: p2m.c Proyecto: DenisLug/mptcp
int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
			    struct gnttab_map_grant_ref *kmap_ops,
			    struct page **pages, unsigned int count)
{
	int i, ret = 0;
	pte_t *pte;

	if (xen_feature(XENFEAT_auto_translated_physmap))
		return 0;

	if (kmap_ops) {
		ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
						kmap_ops, count);
		if (ret)
			goto out;
	}

	for (i = 0; i < count; i++) {
		unsigned long mfn, pfn;

		/* Do not add to override if the map failed. */
		if (map_ops[i].status)
			continue;

		if (map_ops[i].flags & GNTMAP_contains_pte) {
			pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
				(map_ops[i].host_addr & ~PAGE_MASK));
			mfn = pte_mfn(*pte);
		} else {
			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
		}
		pfn = page_to_pfn(pages[i]);

		WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");

		if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
			ret = -ENOMEM;
			goto out;
		}
	}

out:
	return ret;
}
Ejemplo n.º 5
0
/* Release a pagetables pages back as normal RW */
static void xen_pgd_unpin(pgd_t *pgd)
{
	struct mmuext_op *op;
	struct multicall_space mcs;

	xen_mc_batch();

	mcs = __xen_mc_entry(sizeof(*op));

	op = mcs.args;
	op->cmd = MMUEXT_UNPIN_TABLE;
	op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));

	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);

	pgd_walk(pgd, unpin_page, TASK_SIZE);

	xen_mc_issue(0);
}
Ejemplo n.º 6
0
int kexec_allocate(struct xc_dom_image *dom, xen_vaddr_t up_to)
{
    unsigned long new_allocated = (up_to - dom->parms.virt_base) / PAGE_SIZE;
    unsigned long i;

    pages = realloc(pages, new_allocated * sizeof(*pages));
    pages_mfns = realloc(pages_mfns, new_allocated * sizeof(*pages_mfns));
    pages_moved2pfns = realloc(pages_moved2pfns, new_allocated * sizeof(*pages_moved2pfns));
    for (i = allocated; i < new_allocated; i++) {
        /* Exchange old page of PFN i with a newly allocated page.  */
        xen_pfn_t old_mfn = dom->p2m_host[i];
        xen_pfn_t new_pfn;
        xen_pfn_t new_mfn;

        pages[i] = alloc_page();
        memset((void*) pages[i], 0, PAGE_SIZE);
        new_pfn = PHYS_PFN(to_phys(pages[i]));
        pages_mfns[i] = new_mfn = pfn_to_mfn(new_pfn);

	/*
	 * If PFN of newly allocated page (new_pfn) is less then currently
	 * requested PFN (i) then look for relevant PFN/MFN pair. In this
	 * situation dom->p2m_host[new_pfn] no longer contains proper MFN
	 * because original page with new_pfn was moved earlier
	 * to different location.
	 */
	for (; new_pfn < i; new_pfn = pages_moved2pfns[new_pfn]);

	/* Store destination PFN of currently requested page. */
	pages_moved2pfns[i] = new_pfn;

        /* Put old page at new PFN */
        dom->p2m_host[new_pfn] = old_mfn;

        /* Put new page at PFN i */
        dom->p2m_host[i] = new_mfn;
    }

    allocated = new_allocated;

    return 0;
}
Ejemplo n.º 7
0
int gnttable_init(void)
{
	int				dom0_id = 0;
	unsigned long	vaddr;
	unsigned long	mfn;
	
	vaddr = alloc_pages(0);
	mfn   = pfn_to_mfn(virt_to_pfn(vaddr));

//	printf("[gnttab_test_dom1] set grant table entry %d\n", GNTTAB_REF_NUM);
//	printf("[gnttab_test_dom1] vaddr = 0x%lx, mfn = 0x%lx\n", vaddr, mfn);
	gnttab_grant_foreign_access_ref(GNTTAB_REF_NUM, dom0_id, mfn, 1);

	shared_ring = (shared_ring_t *) vaddr;
	
	shared_ring->start = 0;
	shared_ring->end = 0;
	
	return 0;
}
Ejemplo n.º 8
0
void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
{
    unsigned long start_address, end_address;
    unsigned long pfn_to_map, pt_pfn = *start_pfn;
    static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
    pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
    unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
    unsigned long offset;
    int count = 0;

    pfn_to_map = (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES;

    if (*max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START))
    {
        printk("WARNING: Mini-OS trying to use Xen virtual space. "
               "Truncating memory from %dMB to ",
               ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20);
        *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
        printk("%dMB\n",
               ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20);
    }
Ejemplo n.º 9
0
Archivo: setup.c Proyecto: mbgg/linux
static void __init xen_add_extra_mem(u64 start, u64 size)
{
	unsigned long pfn;
	int i;

	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
		/* Add new region. */
		if (xen_extra_mem[i].size == 0) {
			xen_extra_mem[i].start = start;
			xen_extra_mem[i].size  = size;
			break;
		}
		/* Append to existing region. */
		if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
			xen_extra_mem[i].size += size;
			break;
		}
	}
	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
		printk(KERN_WARNING "Warning: not enough extra memory regions\n");

	memblock_reserve(start, size);

	if (xen_feature(XENFEAT_auto_translated_physmap))
		return;

	xen_max_p2m_pfn = PFN_DOWN(start + size);
	for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
			continue;
		WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
			pfn, mfn);

		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
	}
}
Ejemplo n.º 10
0
static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	for(pfn = start; pfn < end; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		/* Make sure pfn exists to start with */
		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
			continue;

		set_xen_guest_handle(reservation.extent_start, &mfn);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
					   &reservation);
		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
		if (ret == 1) {
			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
			len++;
		}
	}
	printk(KERN_INFO "Freeing  %lx-%lx pfn range: %lu pages freed\n",
	       start, end, len);

	return len;
}

static unsigned long __init xen_set_identity_and_release(
	const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
	phys_addr_t start = 0;
	unsigned long released = 0;
	unsigned long identity = 0;
	const struct e820entry *entry;
	int i;

	/*
	 * Combine non-RAM regions and gaps until a RAM region (or the
	 * end of the map) is reached, then set the 1:1 map and
	 * release the pages (if available) in those non-RAM regions.
	 *
	 * The combined non-RAM regions are rounded to a whole number
	 * of pages so any partial pages are accessible via the 1:1
	 * mapping.  This is needed for some BIOSes that put (for
	 * example) the DMI tables in a reserved region that begins on
	 * a non-page boundary.
	 */
	for (i = 0, entry = list; i < map_size; i++, entry++) {
		phys_addr_t end = entry->addr + entry->size;

		if (entry->type == E820_RAM || i == map_size - 1) {
			unsigned long start_pfn = PFN_DOWN(start);
			unsigned long end_pfn = PFN_UP(end);

			if (entry->type == E820_RAM)
				end_pfn = PFN_UP(entry->addr);

			if (start_pfn < end_pfn) {
				if (start_pfn < nr_pages)
					released += xen_release_chunk(
						start_pfn, min(end_pfn, nr_pages));

				identity += set_phys_range_identity(
					start_pfn, end_pfn);
			}
			start = end;
		}
	}

	printk(KERN_INFO "Released %lu pages of unused memory\n", released);
	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);

	return released;
}
Ejemplo n.º 11
0
void free_pmd_page(unsigned long addr)
{
    struct ptrpmd *newstruct = NULL;
    struct ptrpmd *temp_head = NULL;
    int i = 0;
    int counter = 0;
    
    newstruct = (struct ptrpmd *)kmalloc(sizeof(struct ptrpmd), GFP_KERNEL);
    newstruct -> content = addr;

    spin_lock(&pmd_cache_lock);
    newstruct -> next = pmd_head;
    pmd_head = newstruct;
    temp_head = pmd_head;
    
    /*free node */
    if(pmd_used_counter)
    	pmd_used_counter--;
    pmd_free_counter++;
   
    if(pmd_used_counter) 
    {
	//if((pmd_free_counter/pmd_used_counter>=3) && ((pmd_used_counter + pmd_free_counter) >= 1800))
    	//if((pmd_used_counter/pmd_free_counter < 8) && ((pmd_used_counter + pmd_free_counter) >= 600))
    	//if((pmd_used_counter/pmd_free_counter < 1) && (pmd_used_counter >= 42))
    	//if((pmd_free_counter/pmd_used_counter >= 4) && (pmd_used_counter >= 80))
    	//if((pmd_free_counter/pmd_used_counter >= 6) && ((pgd_used_counter + pgd_free_counter) >= 230))
    	//if((pmd_used_counter/pmd_free_counter < 2) && ((pgd_used_counter + pgd_free_counter) >= 80))
    	if((pmd_free_counter/pmd_used_counter > 1) && ((pmd_used_counter + pmd_free_counter) >= 40))
    	//if((pmd_free_counter/pmd_used_counter >= 5) && ((pmd_used_counter + pmd_free_counter) >= 200))
    	{
        	//counter = pmd_free_counter * 3 / 10;
        	counter = 0;
       		for(i=0;i<counter;i++)
		{
	    		pmd_head = pmd_head->next;
		}
        	pmd_free_counter -= counter;
    	}
    }
    spin_unlock(&pmd_cache_lock);

    if(counter != 0)
    {
    	struct ptrpmd * newstructarray = NULL;
    	struct ptrpmd * newstructarray_head = NULL;
    	int rc = 1;
    	newstructarray = (struct ptrpmd *)kmalloc(sizeof(struct ptrpmd) * counter, GFP_KERNEL);
    	newstructarray_head = newstructarray;
        for (i=0;i<counter;i++)
        {
	    newstruct = temp_head;
	    temp_head = temp_head->next;
	    newstructarray[i].content = pfn_to_mfn(PFN_DOWN(__pa(newstruct->content)));
	    kfree(newstruct);
        }
	//hypercall newstructarray
	rc = HYPERVISOR_pmd_op(newstructarray, counter);
        //if (rc == 0)
 	    //printk("pmd cache free success\n");
	//else 
 	    //printk("pmd cache free error\n");
	    
	//free page to the buddy system
        newstructarray = newstructarray_head;
	for(i=0;i<counter;i++)
	{
	    free_page(newstructarray[i].content);
	}

	//free newstructarray
	kfree(newstructarray);
    }
    
    return;
}
Ejemplo n.º 12
0
static int map_data_for_request(struct vscsifrnt_info *info,
				struct scsi_cmnd *sc,
				struct vscsiif_request *ring_req,
				struct vscsifrnt_shadow *shadow)
{
	grant_ref_t gref_head;
	struct page *page;
	int err, ref, ref_cnt = 0;
	int grant_ro = (sc->sc_data_direction == DMA_TO_DEVICE);
	unsigned int i, off, len, bytes;
	unsigned int data_len = scsi_bufflen(sc);
	unsigned int data_grants = 0, seg_grants = 0;
	struct scatterlist *sg;
	unsigned long mfn;
	struct scsiif_request_segment *seg;

	ring_req->nr_segments = 0;
	if (sc->sc_data_direction == DMA_NONE || !data_len)
		return 0;

	scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i)
		data_grants += PFN_UP(sg->offset + sg->length);

	if (data_grants > VSCSIIF_SG_TABLESIZE) {
		if (data_grants > info->host->sg_tablesize) {
			shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
			     "Unable to map request_buffer for command!\n");
			return -E2BIG;
		}
		seg_grants = vscsiif_grants_sg(data_grants);
		shadow->sg = kcalloc(data_grants,
			sizeof(struct scsiif_request_segment), GFP_ATOMIC);
		if (!shadow->sg)
			return -ENOMEM;
	}
	seg = shadow->sg ? : ring_req->seg;

	err = gnttab_alloc_grant_references(seg_grants + data_grants,
					    &gref_head);
	if (err) {
		kfree(shadow->sg);
		shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
			     "gnttab_alloc_grant_references() error\n");
		return -ENOMEM;
	}

	if (seg_grants) {
		page = virt_to_page(seg);
		off = (unsigned long)seg & ~PAGE_MASK;
		len = sizeof(struct scsiif_request_segment) * data_grants;
		while (len > 0) {
			bytes = min_t(unsigned int, len, PAGE_SIZE - off);

			ref = gnttab_claim_grant_reference(&gref_head);
			BUG_ON(ref == -ENOSPC);

			mfn = pfn_to_mfn(page_to_pfn(page));
			gnttab_grant_foreign_access_ref(ref,
				info->dev->otherend_id, mfn, 1);
			shadow->gref[ref_cnt] = ref;
			ring_req->seg[ref_cnt].gref   = ref;
			ring_req->seg[ref_cnt].offset = (uint16_t)off;
			ring_req->seg[ref_cnt].length = (uint16_t)bytes;

			page++;
			len -= bytes;
			off = 0;
			ref_cnt++;
		}
		BUG_ON(seg_grants < ref_cnt);
		seg_grants = ref_cnt;
	}

	scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i) {
		page = sg_page(sg);
		off = sg->offset;
		len = sg->length;

		while (len > 0 && data_len > 0) {
			/*
			 * sg sends a scatterlist that is larger than
			 * the data_len it wants transferred for certain
			 * IO sizes.
			 */
			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
			bytes = min(bytes, data_len);

			ref = gnttab_claim_grant_reference(&gref_head);
			BUG_ON(ref == -ENOSPC);

			mfn = pfn_to_mfn(page_to_pfn(page));
			gnttab_grant_foreign_access_ref(ref,
				info->dev->otherend_id, mfn, grant_ro);

			shadow->gref[ref_cnt] = ref;
			seg->gref   = ref;
			seg->offset = (uint16_t)off;
			seg->length = (uint16_t)bytes;

			page++;
			seg++;
			len -= bytes;
			data_len -= bytes;
			off = 0;
			ref_cnt++;
		}
	}
Ejemplo n.º 13
0
static unsigned long __init xen_do_chunk(unsigned long start,
					 unsigned long end, bool release)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
	unsigned long pfn;
	int ret;

	for (pfn = start; pfn < end; pfn++) {
		unsigned long frame;
		unsigned long mfn = pfn_to_mfn(pfn);

		if (release) {
			/* Make sure pfn exists to start with */
			if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
				continue;
			frame = mfn;
		} else {
			if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
				continue;
			frame = pfn;
		}
		set_xen_guest_handle(reservation.extent_start, &frame);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
					   &reservation);
		WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
		     release ? "release" : "populate", pfn, ret);

		if (ret == 1) {
			if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
				if (release)
					break;
				set_xen_guest_handle(reservation.extent_start, &frame);
				reservation.nr_extents = 1;
				ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
							   &reservation);
				break;
			}
			len++;
		} else
			break;
	}
	if (len)
		printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
		       release ? "Freeing" : "Populating",
		       start, end, len,
		       release ? "freed" : "added");

	return len;
}

static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	/*
	 * Xen already ballooned out the E820 non RAM regions for us
	 * and set them up properly in EPT.
	 */
	if (xen_feature(XENFEAT_auto_translated_physmap))
		return end - start;

	return xen_do_chunk(start, end, true);
}

static unsigned long __init xen_populate_chunk(
	const struct e820entry *list, size_t map_size,
	unsigned long max_pfn, unsigned long *last_pfn,
	unsigned long credits_left)
{
	const struct e820entry *entry;
	unsigned int i;
	unsigned long done = 0;
	unsigned long dest_pfn;

	for (i = 0, entry = list; i < map_size; i++, entry++) {
		unsigned long s_pfn;
		unsigned long e_pfn;
		unsigned long pfns;
		long capacity;

		if (credits_left <= 0)
			break;

		if (entry->type != E820_RAM)
			continue;

		e_pfn = PFN_DOWN(entry->addr + entry->size);

		/* We only care about E820 after the xen_start_info->nr_pages */
		if (e_pfn <= max_pfn)
			continue;

		s_pfn = PFN_UP(entry->addr);
		/* If the E820 falls within the nr_pages, we want to start
		 * at the nr_pages PFN.
		 * If that would mean going past the E820 entry, skip it
		 */
		if (s_pfn <= max_pfn) {
			capacity = e_pfn - max_pfn;
			dest_pfn = max_pfn;
		} else {
			capacity = e_pfn - s_pfn;
			dest_pfn = s_pfn;
		}

		if (credits_left < capacity)
			capacity = credits_left;

		pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
		done += pfns;
		*last_pfn = (dest_pfn + pfns);
		if (pfns < capacity)
			break;
		credits_left -= pfns;
	}
	return done;
}

static void __init xen_set_identity_and_release_chunk(
	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
	unsigned long *released, unsigned long *identity)
{
	unsigned long pfn;

	/*
	 * If the PFNs are currently mapped, clear the mappings
	 * (except for the ISA region which must be 1:1 mapped) to
	 * release the refcounts (in Xen) on the original frames.
	 */

	/*
	 * PVH E820 matches the hypervisor's P2M which means we need to
	 * account for the proper values of *release and *identity.
	 */
	for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) &&
	     pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
		pte_t pte = __pte_ma(0);

		if (pfn < PFN_UP(ISA_END_ADDRESS))
			pte = mfn_pte(pfn, PAGE_KERNEL_IO);

		(void)HYPERVISOR_update_va_mapping(
			(unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
	}

	if (start_pfn < nr_pages)
		*released += xen_release_chunk(
			start_pfn, min(end_pfn, nr_pages));

	*identity += set_phys_range_identity(start_pfn, end_pfn);
}
Ejemplo n.º 14
0
/*
 * Top level routine to direct suspend/resume of a domain.
 */
void
xen_suspend_domain(void)
{
	extern void rtcsync(void);
	extern hrtime_t hres_last_tick;
	mfn_t start_info_mfn;
	ulong_t flags;
	pfn_t pfn;
	int i;

	/*
	 * Check that we are happy to suspend on this hypervisor.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) {
		cpr_err(CE_WARN, "Cannot suspend on this hypervisor "
		    "version: v%lu.%lu%s, need at least version v3.0.4 or "
		    "-xvm based hypervisor", XENVER_CURRENT(xv_major),
		    XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver));
		return;
	}

	/*
	 * XXPV - Are we definitely OK to suspend by the time we've connected
	 * the handler?
	 */

	cpr_err(CE_NOTE, "Domain suspending for save/migrate");

	SUSPEND_DEBUG("xen_suspend_domain\n");

	/*
	 * suspend interrupts and devices
	 * XXPV - we use suspend/resume for both save/restore domains (like sun
	 * cpr) and for migration.  Would be nice to know the difference if
	 * possible.  For save/restore where down time may be a long time, we
	 * may want to do more of the things that cpr does.  (i.e. notify user
	 * processes, shrink memory footprint for faster restore, etc.)
	 */
	xen_suspend_devices();
	SUSPEND_DEBUG("xenbus_suspend\n");
	xenbus_suspend();

	pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info);
	start_info_mfn = pfn_to_mfn(pfn);

	/*
	 * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe
	 * wrt xenbus being suspended here?
	 */
	mutex_enter(&cpu_lock);

	/*
	 * Suspend must be done on vcpu 0, as no context for other CPUs is
	 * saved.
	 *
	 * XXPV - add to taskq API ?
	 */
	thread_affinity_set(curthread, 0);
	kpreempt_disable();

	SUSPEND_DEBUG("xen_start_migrate\n");
	xen_start_migrate();
	if (ncpus > 1)
		suspend_cpus();

	/*
	 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence
	 * any holder would have dropped it to get through suspend_cpus().
	 */
	mutex_enter(&ec_lock);

	/*
	 * From here on in, we can't take locks.
	 */
	SUSPEND_DEBUG("ec_suspend\n");
	ec_suspend();
	SUSPEND_DEBUG("gnttab_suspend\n");
	gnttab_suspend();

	flags = intr_clear();

	xpv_time_suspend();

	/*
	 * Currently, the hypervisor incorrectly fails to bring back
	 * powered-down VCPUs.  Thus we need to record any powered-down VCPUs
	 * to prevent any attempts to operate on them.  But we have to do this
	 * *after* the very first time we do ec_suspend().
	 */
	for (i = 1; i < ncpus; i++) {
		if (cpu[i] == NULL)
			continue;

		if (cpu_get_state(cpu[i]) == P_POWEROFF)
			CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i);
	}

	/*
	 * The dom0 save/migrate code doesn't automatically translate
	 * these into PFNs, but expects them to be, so we do it here.
	 * We don't use mfn_to_pfn() because so many OS services have
	 * been disabled at this point.
	 */
	xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn];
	xen_info->console.domU.mfn =
	    mfn_to_pfn_mapping[xen_info->console.domU.mfn];

	if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) {
		prom_printf("xen_suspend_domain(): "
		    "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    0, UVMF_INVLPG)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_update_va_mapping() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	SUSPEND_DEBUG("HYPERVISOR_suspend\n");

	/*
	 * At this point we suspend and sometime later resume.
	 */
	if (HYPERVISOR_suspend(start_info_mfn)) {
		prom_printf("xen_suspend_domain(): "
		    "HYPERVISOR_suspend() failed\n");
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	/*
	 * Point HYPERVISOR_shared_info to its new value.
	 */
	if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info,
	    xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE,
	    UVMF_INVLPG))
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);

	if (xen_info->nr_pages != mfn_count) {
		prom_printf("xen_suspend_domain(): number of pages"
		    " changed, was 0x%lx, now 0x%lx\n", mfn_count,
		    xen_info->nr_pages);
		(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
	}

	xpv_time_resume();

	cached_max_mfn = 0;

	SUSPEND_DEBUG("gnttab_resume\n");
	gnttab_resume();

	/* XXPV: add a note that this must be lockless. */
	SUSPEND_DEBUG("ec_resume\n");
	ec_resume();

	intr_restore(flags);

	if (ncpus > 1)
		resume_cpus();

	mutex_exit(&ec_lock);
	xen_end_migrate();
	mutex_exit(&cpu_lock);

	/*
	 * Now we can take locks again.
	 */

	/*
	 * Force the tick value used for tv_nsec in hres_tick() to be up to
	 * date. rtcsync() will reset the hrestime value appropriately.
	 */
	hres_last_tick = xpv_gethrtime();

	/*
	 * XXPV: we need to have resumed the CPUs since this takes locks, but
	 * can remote CPUs see bad state? Presumably yes. Should probably nest
	 * taking of todlock inside of cpu_lock, or vice versa, then provide an
	 * unlocked version.  Probably need to call clkinitf to reset cpu freq
	 * and re-calibrate if we migrated to a different speed cpu.  Also need
	 * to make a (re)init_cpu_info call to update processor info structs
	 * and device tree info.  That remains to be written at the moment.
	 */
	rtcsync();

	rebuild_mfn_list();

	SUSPEND_DEBUG("xenbus_resume\n");
	xenbus_resume();
	SUSPEND_DEBUG("xenbus_resume_devices\n");
	xen_resume_devices();

	thread_affinity_clear(curthread);
	kpreempt_enable();

	SUSPEND_DEBUG("finished xen_suspend_domain\n");

	/*
	 * We have restarted our suspended domain, update the hypervisor
	 * details. NB: This must be done at the end of this function,
	 * since we need the domain to be completely resumed before
	 * these functions will work correctly.
	 */
	xen_set_version(XENVER_CURRENT_IDX);

	/*
	 * We can check and report a warning, but we don't stop the
	 * process.
	 */
	if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0)
		cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s "
		    "but need at least version v3.0.4",
		    XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor),
		    XENVER_CURRENT(xv_ver));

	cmn_err(CE_NOTE, "domain restore/migrate completed");
}
static void xennet_alloc_rx_buffers(struct net_device *dev)
{
	unsigned short id;
	struct netfront_info *np = netdev_priv(dev);
	struct sk_buff *skb;
	struct page *page;
	int i, batch_target, notify;
	RING_IDX req_prod = np->rx.req_prod_pvt;
	grant_ref_t ref;
	unsigned long pfn;
	void *vaddr;
	struct xen_netif_rx_request *req;

	if (unlikely(!netif_carrier_ok(dev)))
		return;

	/*
	 * Allocate skbuffs greedily, even though we batch updates to the
	 * receive ring. This creates a less bursty demand on the memory
	 * allocator, so should reduce the chance of failed allocation requests
	 * both for ourself and for other kernel subsystems.
	 */
	batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
	for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
		skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD,
					 GFP_ATOMIC | __GFP_NOWARN);
		if (unlikely(!skb))
			goto no_skb;

		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
		if (!page) {
			kfree_skb(skb);
no_skb:
			/* Any skbuffs queued for refill? Force them out. */
			if (i != 0)
				goto refill;
			/* Could not allocate any skbuffs. Try again later. */
			mod_timer(&np->rx_refill_timer,
				  jiffies + (HZ/10));
			break;
		}

		skb_shinfo(skb)->frags[0].page = page;
		skb_shinfo(skb)->nr_frags = 1;
		__skb_queue_tail(&np->rx_batch, skb);
	}

	/* Is the batch large enough to be worthwhile? */
	if (i < (np->rx_target/2)) {
		if (req_prod > np->rx.sring->req_prod)
			goto push;
		return;
	}

	/* Adjust our fill target if we risked running out of buffers. */
	if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
	    ((np->rx_target *= 2) > np->rx_max_target))
		np->rx_target = np->rx_max_target;

 refill:
	for (i = 0; ; i++) {
		skb = __skb_dequeue(&np->rx_batch);
		if (skb == NULL)
			break;

		skb->dev = dev;

		id = xennet_rxidx(req_prod + i);

		BUG_ON(np->rx_skbs[id]);
		np->rx_skbs[id] = skb;

		ref = gnttab_claim_grant_reference(&np->gref_rx_head);
		BUG_ON((signed short)ref < 0);
		np->grant_rx_ref[id] = ref;

		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
		vaddr = page_address(skb_shinfo(skb)->frags[0].page);

		req = RING_GET_REQUEST(&np->rx, req_prod + i);
		gnttab_grant_foreign_access_ref(ref,
						np->xbdev->otherend_id,
						pfn_to_mfn(pfn),
						0);

		req->id = id;
		req->gref = ref;
	}

	wmb();		/* barrier so backend seens requests */

	/* Above is a suitable barrier to ensure backend will see requests. */
	np->rx.req_prod_pvt = req_prod + i;
 push:
	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
	if (notify)
		notify_remote_via_irq(np->netdev->irq);
}
Ejemplo n.º 16
0
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
					      phys_addr_t end_addr)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long start, end;
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	start = PFN_UP(start_addr);
	end = PFN_DOWN(end_addr);

	if (end <= start)
		return 0;

	printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
	       start, end);
	for(pfn = start; pfn < end; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		/* Make sure pfn exists to start with */
		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
			continue;

		set_xen_guest_handle(reservation.extent_start, &mfn);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
					   &reservation);
		WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
		     start, end, ret);
		if (ret == 1) {
			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
			len++;
		}
	}
	printk(KERN_CONT "%ld pages freed\n", len);

	return len;
}

static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
						     const struct e820map *e820)
{
	phys_addr_t max_addr = PFN_PHYS(max_pfn);
	phys_addr_t last_end = ISA_END_ADDRESS;
	unsigned long released = 0;
	int i;

	/* Free any unused memory above the low 1Mbyte. */
	for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
		phys_addr_t end = e820->map[i].addr;
		end = min(max_addr, end);

		if (last_end < end)
			released += xen_release_chunk(last_end, end);
		last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
	}

	if (last_end < max_addr)
		released += xen_release_chunk(last_end, max_addr);

	printk(KERN_INFO "released %ld pages of unused memory\n", released);
	return released;
}
Ejemplo n.º 17
0
void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline, unsigned long flags)
{
    struct xc_dom_image *dom;
    int rc;
    domid_t domid = DOMID_SELF;
    xen_pfn_t pfn;
    xc_interface *xc_handle;
    unsigned long i;
    void *seg;
    xen_pfn_t boot_page_mfn = virt_to_mfn(&_boot_page);
    char features[] = "";
    struct mmu_update *m2p_updates;
    unsigned long nr_m2p_updates;

    DEBUG("booting with cmdline %s\n", cmdline);
    xc_handle = xc_interface_open(0,0,0);

    dom = xc_dom_allocate(xc_handle, cmdline, features);
    dom->allocate = kexec_allocate;

    /* We are using guest owned memory, therefore no limits. */
    xc_dom_kernel_max_size(dom, 0);
    xc_dom_ramdisk_max_size(dom, 0);

    dom->kernel_blob = kernel;
    dom->kernel_size = kernel_size;

    dom->ramdisk_blob = module;
    dom->ramdisk_size = module_size;

    dom->flags = flags;
    dom->console_evtchn = start_info.console.domU.evtchn;
    dom->xenstore_evtchn = start_info.store_evtchn;

    tpm_hash2pcr(dom, cmdline);

    if ( (rc = xc_dom_boot_xen_init(dom, xc_handle, domid)) != 0 ) {
        grub_printf("xc_dom_boot_xen_init returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }
    if ( (rc = xc_dom_parse_image(dom)) != 0 ) {
        grub_printf("xc_dom_parse_image returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }

#ifdef __i386__
    if (strcmp(dom->guest_type, "xen-3.0-x86_32p")) {
        grub_printf("can only boot x86 32 PAE kernels, not %s\n", dom->guest_type);
        errnum = ERR_EXEC_FORMAT;
        goto out;
    }
#endif
#ifdef __x86_64__
    if (strcmp(dom->guest_type, "xen-3.0-x86_64")) {
        grub_printf("can only boot x86 64 kernels, not %s\n", dom->guest_type);
        errnum = ERR_EXEC_FORMAT;
        goto out;
    }
#endif

    /* equivalent of xc_dom_mem_init */
    dom->arch_hooks = xc_dom_find_arch_hooks(xc_handle, dom->guest_type);
    dom->total_pages = start_info.nr_pages;

    /* equivalent of arch_setup_meminit */

    /* setup initial p2m */
    dom->p2m_host = malloc(sizeof(*dom->p2m_host) * dom->total_pages);

    /* Start with our current P2M */
    for (i = 0; i < dom->total_pages; i++)
        dom->p2m_host[i] = pfn_to_mfn(i);

    if ( (rc = xc_dom_build_image(dom)) != 0 ) {
        grub_printf("xc_dom_build_image returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }

    /* copy hypercall page */
    /* TODO: domctl instead, but requires privileges */
    if (dom->parms.virt_hypercall != -1) {
        pfn = PHYS_PFN(dom->parms.virt_hypercall - dom->parms.virt_base);
        memcpy((void *) pages[pfn], hypercall_page, PAGE_SIZE);
    }

    /* Equivalent of xc_dom_boot_image */
    dom->shared_info_mfn = PHYS_PFN(start_info.shared_info);

    if (!xc_dom_compat_check(dom)) {
        grub_printf("xc_dom_compat_check failed\n");
        errnum = ERR_EXEC_FORMAT;
        goto out;
    }

    /* Move current console, xenstore and boot MFNs to the allocated place */
    do_exchange(dom, dom->console_pfn, start_info.console.domU.mfn);
    do_exchange(dom, dom->xenstore_pfn, start_info.store_mfn);
    DEBUG("virt base at %llx\n", dom->parms.virt_base);
    DEBUG("bootstack_pfn %lx\n", dom->bootstack_pfn);
    _boot_target = dom->parms.virt_base + PFN_PHYS(dom->bootstack_pfn);
    DEBUG("_boot_target %lx\n", _boot_target);
    do_exchange(dom, PHYS_PFN(_boot_target - dom->parms.virt_base),
            virt_to_mfn(&_boot_page));

    /* Make sure the bootstrap page table does not RW-map any of our current
     * page table frames */
    kexec_allocate(dom, dom->virt_pgtab_end);

    if ( (rc = xc_dom_update_guest_p2m(dom))) {
        grub_printf("xc_dom_update_guest_p2m returned %d\n", rc);
        errnum = ERR_BOOT_FAILURE;
        goto out;
    }

    if ( dom->arch_hooks->setup_pgtables )
        if ( (rc = dom->arch_hooks->setup_pgtables(dom))) {
            grub_printf("setup_pgtables returned %d\n", rc);
            errnum = ERR_BOOT_FAILURE;
            goto out;
        }

    /* start info page */
#undef start_info
    if ( dom->arch_hooks->start_info )
        dom->arch_hooks->start_info(dom);
#define start_info (start_info_union.start_info)

    xc_dom_log_memory_footprint(dom);

    /* Unmap libxc's projection of the boot page table */
    seg = xc_dom_seg_to_ptr(dom, &dom->pgtables_seg);
    munmap(seg, dom->pgtables_seg.vend - dom->pgtables_seg.vstart);

    /* Unmap day0 pages to avoid having a r/w mapping of the future page table */
    for (pfn = 0; pfn < allocated; pfn++)
        munmap((void*) pages[pfn], PAGE_SIZE);

    /* Pin the boot page table base */
    if ( (rc = pin_table(dom->xch,
#ifdef __i386__
                MMUEXT_PIN_L3_TABLE,
#endif
#ifdef __x86_64__
                MMUEXT_PIN_L4_TABLE,
#endif
                xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
                dom->guest_domid)) != 0 ) {
        grub_printf("pin_table(%lx) returned %d\n", xc_dom_p2m_host(dom,
                    dom->pgtables_seg.pfn), rc);
        errnum = ERR_BOOT_FAILURE;
        goto out_remap;
    }

    /* We populate the Mini-OS page table here so that boot.S can just call
     * update_va_mapping to project itself there.  */
    need_pgt(_boot_target);
    DEBUG("day0 pages %lx\n", allocated);
    DEBUG("boot target page %lx\n", _boot_target);
    DEBUG("boot page %p\n", &_boot_page);
    DEBUG("boot page mfn %lx\n", boot_page_mfn);
    _boot_page_entry = PFN_PHYS(boot_page_mfn) | L1_PROT;
    DEBUG("boot page entry %llx\n", _boot_page_entry);
    _boot_oldpdmfn = virt_to_mfn(start_info.pt_base);
    DEBUG("boot old pd mfn %lx\n", _boot_oldpdmfn);
    DEBUG("boot pd virt %lx\n", dom->pgtables_seg.vstart);
    _boot_pdmfn = dom->p2m_host[PHYS_PFN(dom->pgtables_seg.vstart - dom->parms.virt_base)];
    DEBUG("boot pd mfn %lx\n", _boot_pdmfn);
    _boot_stack = _boot_target + PAGE_SIZE;
    DEBUG("boot stack %lx\n", _boot_stack);
    _boot_start_info = dom->parms.virt_base + PFN_PHYS(dom->start_info_pfn);
    DEBUG("boot start info %lx\n", _boot_start_info);
    _boot_start = dom->parms.virt_entry;
    DEBUG("boot start %lx\n", _boot_start);

    /* Keep only useful entries */
    for (nr_m2p_updates = pfn = 0; pfn < start_info.nr_pages; pfn++)
        if (dom->p2m_host[pfn] != pfn_to_mfn(pfn))
            nr_m2p_updates++;

    m2p_updates = malloc(sizeof(*m2p_updates) * nr_m2p_updates);
    for (i = pfn = 0; pfn < start_info.nr_pages; pfn++)
        if (dom->p2m_host[pfn] != pfn_to_mfn(pfn)) {
            m2p_updates[i].ptr = PFN_PHYS(dom->p2m_host[pfn]) | MMU_MACHPHYS_UPDATE;
            m2p_updates[i].val = pfn;
            i++;
        }

    for (i = 0; i < blk_nb; i++)
        shutdown_blkfront(blk_dev[i]);
    if (net_dev)
        shutdown_netfront(net_dev);
    if (kbd_dev)
        shutdown_kbdfront(kbd_dev);
    stop_kernel();

    /* Update M2P */
    if ((rc = HYPERVISOR_mmu_update(m2p_updates, nr_m2p_updates, NULL, DOMID_SELF)) < 0) {
        xprintk("Could not update M2P\n");
        ASSERT(0);
    }

    xprintk("go!\n");

    /* Jump to trampoline boot page */
    _boot();

    ASSERT(0);

out_remap:
    for (pfn = 0; pfn < allocated; pfn++)
        do_map_frames(pages[pfn], &pages_mfns[pfn], 1, 0, 0, DOMID_SELF, 0, L1_PROT);
out:
    xc_dom_release(dom);
    for (pfn = 0; pfn < allocated; pfn++)
        free_page((void*)pages[pfn]);
    free(pages);
    free(pages_mfns);
    pages = NULL;
    pages_mfns = NULL;
    allocated = 0;
    xc_interface_close(xc_handle );
}
Ejemplo n.º 18
0
/*
 * Fill in the remaining CPU context and initialize it.
 */
static int
mp_set_cpu_context(vcpu_guest_context_t *vgc, cpu_t *cp)
{
	uint_t vec, iopl;

	vgc->flags = VGCF_IN_KERNEL;

	/*
	 * fpu_ctx we leave as zero; on first fault we'll store
	 * sse_initial into it anyway.
	 */

#if defined(__amd64)
	vgc->user_regs.cs = KCS_SEL | SEL_KPL;	/* force to ring 3 */
#else
	vgc->user_regs.cs = KCS_SEL;
#endif
	vgc->user_regs.ds = KDS_SEL;
	vgc->user_regs.es = KDS_SEL;
	vgc->user_regs.ss = KDS_SEL;
	vgc->kernel_ss = KDS_SEL;

	/*
	 * Allow I/O privilege level for Dom0 kernel.
	 */
	if (DOMAIN_IS_INITDOMAIN(xen_info))
		iopl = (PS_IOPL & 0x1000); /* ring 1 */
	else
		iopl = 0;

#if defined(__amd64)
	vgc->user_regs.fs = 0;
	vgc->user_regs.gs = 0;
	vgc->user_regs.rflags = F_OFF | iopl;
#elif defined(__i386)
	vgc->user_regs.fs = KFS_SEL;
	vgc->user_regs.gs = KGS_SEL;
	vgc->user_regs.eflags = F_OFF | iopl;
	vgc->event_callback_cs = vgc->user_regs.cs;
	vgc->failsafe_callback_cs = vgc->user_regs.cs;
#endif

	/*
	 * Initialize the trap_info_t from the IDT
	 */
#if !defined(__lint)
	ASSERT(NIDT == sizeof (vgc->trap_ctxt) / sizeof (vgc->trap_ctxt[0]));
#endif
	for (vec = 0; vec < NIDT; vec++) {
		trap_info_t *ti = &vgc->trap_ctxt[vec];

		if (xen_idt_to_trap_info(vec,
		    &cp->cpu_m.mcpu_idt[vec], ti) == 0) {
			ti->cs = KCS_SEL;
			ti->vector = vec;
		}
	}

	/*
	 * No LDT
	 */

	/*
	 * (We assert in various places that the GDT is (a) aligned on a
	 * page boundary and (b) one page long, so this really should fit..)
	 */
#ifdef CRASH_XEN
	vgc->gdt_frames[0] = pa_to_ma(mmu_btop(cp->cpu_m.mcpu_gdtpa));
#else
	vgc->gdt_frames[0] = pfn_to_mfn(mmu_btop(cp->cpu_m.mcpu_gdtpa));
#endif
	vgc->gdt_ents = NGDT;

	vgc->ctrlreg[0] = CR0_ENABLE_FPU_FLAGS(getcr0());

#if defined(__i386)
	if (mmu.pae_hat)
		vgc->ctrlreg[3] =
		    xen_pfn_to_cr3(pfn_to_mfn(kas.a_hat->hat_htable->ht_pfn));
	else
#endif
		vgc->ctrlreg[3] =
		    pa_to_ma(mmu_ptob(kas.a_hat->hat_htable->ht_pfn));

	vgc->ctrlreg[4] = getcr4();

	vgc->event_callback_eip = (uintptr_t)xen_callback;
	vgc->failsafe_callback_eip = (uintptr_t)xen_failsafe_callback;
	vgc->flags |= VGCF_failsafe_disables_events;

#if defined(__amd64)
	/*
	 * XXPV should this be moved to init_cpu_syscall?
	 */
	vgc->syscall_callback_eip = (uintptr_t)sys_syscall;
	vgc->flags |= VGCF_syscall_disables_events;

	ASSERT(vgc->user_regs.gs == 0);
	vgc->gs_base_kernel = (uintptr_t)cp;
#endif

	return (xen_vcpu_initialize(cp->cpu_id, vgc));
}
Ejemplo n.º 19
0
/*
 * Helper function to update the p2m and m2p tables and kernel mapping.
 */
static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
{
	struct mmu_update update = {
		.ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
		.val = pfn
	};

	/* Update p2m */
	if (!set_phys_to_machine(pfn, mfn)) {
		WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
		     pfn, mfn);
		BUG();
	}

	/* Update m2p */
	if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
		WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
		     mfn, pfn);
		BUG();
	}

	/* Update kernel mapping, but not for highmem. */
	if (pfn >= PFN_UP(__pa(high_memory - 1)))
		return;

	if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
					 mfn_pte(mfn, PAGE_KERNEL), 0)) {
		WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
		      mfn, pfn);
		BUG();
	}
}

/*
 * This function updates the p2m and m2p tables with an identity map from
 * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
 * original allocation at remap_pfn. The information needed for remapping is
 * saved in the memory itself to avoid the need for allocating buffers. The
 * complete remap information is contained in a list of MFNs each containing
 * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
 * This enables us to preserve the original mfn sequence while doing the
 * remapping at a time when the memory management is capable of allocating
 * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
 * its callers.
 */
static void __init xen_do_set_identity_and_remap_chunk(
        unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
{
	unsigned long buf = (unsigned long)&xen_remap_buf;
	unsigned long mfn_save, mfn;
	unsigned long ident_pfn_iter, remap_pfn_iter;
	unsigned long ident_end_pfn = start_pfn + size;
	unsigned long left = size;
	unsigned int i, chunk;

	WARN_ON(size == 0);

	BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));

	mfn_save = virt_to_mfn(buf);

	for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
	     ident_pfn_iter < ident_end_pfn;
	     ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
		chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;

		/* Map first pfn to xen_remap_buf */
		mfn = pfn_to_mfn(ident_pfn_iter);
		set_pte_mfn(buf, mfn, PAGE_KERNEL);

		/* Save mapping information in page */
		xen_remap_buf.next_area_mfn = xen_remap_mfn;
		xen_remap_buf.target_pfn = remap_pfn_iter;
		xen_remap_buf.size = chunk;
		for (i = 0; i < chunk; i++)
			xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);

		/* Put remap buf into list. */
		xen_remap_mfn = mfn;

		/* Set identity map */
		set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk);

		left -= chunk;
	}

	/* Restore old xen_remap_buf mapping */
	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
}
Ejemplo n.º 20
0
/*
 * balloon_free_pages()
 *    free page_cnt pages, using any combination of mfns, pfns, and kva as long
 *    as they refer to the same mapping.  If an array of mfns is passed in, we
 *    assume they were already cleared.  Otherwise, we need to zero the pages
 *    before giving them back to the hypervisor. kva space is not free'd up in
 *    case the caller wants to re-use it.
 */
long
balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns)
{
	xen_memory_reservation_t memdec;
	mfn_t mfn;
	pfn_t pfn;
	uint_t i;
	long e;


#if DEBUG
	/* make sure kva is page aligned and maps to first pfn */
	if (kva != NULL) {
		ASSERT(((uintptr_t)kva & PAGEOFFSET) == 0);
		if (pfns != NULL) {
			ASSERT(hat_getpfnum(kas.a_hat, kva) == pfns[0]);
		}
	}
#endif

	/* if we have a kva, we can clean all pages with just one bzero */
	if ((kva != NULL) && balloon_zero_memory) {
		bzero(kva, (page_cnt * PAGESIZE));
	}

	/* if we were given a kva and/or a pfn */
	if ((kva != NULL) || (pfns != NULL)) {

		/*
		 * All the current callers only pass 1 page when using kva or
		 * pfns, and use mfns when passing multiple pages.  If that
		 * assumption is changed, the following code will need some
		 * work.  The following ASSERT() guarantees we're respecting
		 * the io locking quota.
		 */
		ASSERT(page_cnt < bln_contig_list_quota);

		/* go through all the pages */
		for (i = 0; i < page_cnt; i++) {

			/* get the next pfn */
			if (pfns == NULL) {
				pfn = hat_getpfnum(kas.a_hat,
				    (kva + (PAGESIZE * i)));
			} else {
				pfn = pfns[i];
			}

			/*
			 * if we didn't already zero this page, do it now. we
			 * need to do this *before* we give back the MFN
			 */
			if ((kva == NULL) && (balloon_zero_memory)) {
				pfnzero(pfn, 0, PAGESIZE);
			}

			/*
			 * unmap the pfn. We don't free up the kva vmem space
			 * so the caller can re-use it. The page must be
			 * unmapped before it is given back to the hypervisor.
			 */
			if (kva != NULL) {
				hat_unload(kas.a_hat, (kva + (PAGESIZE * i)),
				    PAGESIZE, HAT_UNLOAD_UNMAP);
			}

			/* grab the mfn before the pfn is marked as invalid */
			mfn = pfn_to_mfn(pfn);

			/* mark the pfn as invalid */
			reassign_pfn(pfn, MFN_INVALID);

			/*
			 * if we weren't given an array of MFNs, we need to
			 * free them up one at a time. Otherwise, we'll wait
			 * until later and do it in one hypercall
			 */
			if (mfns == NULL) {
				bzero(&memdec, sizeof (memdec));
				/*LINTED: constant in conditional context*/
				set_xen_guest_handle(memdec.extent_start, &mfn);
				memdec.domid = DOMID_SELF;
				memdec.nr_extents = 1;
				e = HYPERVISOR_memory_op(
				    XENMEM_decrease_reservation, &memdec);
				if (e != 1) {
					cmn_err(CE_PANIC, "balloon: unable to "
					    "give a page back to the "
					    "hypervisor.\n");
				}
			}
		}
	}

	/*
	 * if we were passed in MFNs, we haven't free'd them up yet. We can
	 * do it with one call.
	 */
	if (mfns != NULL) {
		bzero(&memdec, sizeof (memdec));
		/*LINTED: constant in conditional context*/
		set_xen_guest_handle(memdec.extent_start, mfns);
		memdec.domid = DOMID_SELF;
		memdec.nr_extents = page_cnt;
		e = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &memdec);
		if (e != page_cnt) {
			cmn_err(CE_PANIC, "balloon: unable to give pages back "
			    "to the hypervisor.\n");
		}
	}

	atomic_add_long((ulong_t *)&bln_stats.bln_hv_pages, page_cnt);
	return (page_cnt);
}
Ejemplo n.º 21
0
/*
 * This function is called when we want to decrease the memory reservation
 * of our domain.  Allocate the memory and make a hypervisor call to give
 * it back.
 */
static spgcnt_t
balloon_dec_reservation(ulong_t debit)
{
	int	i, locked;
	long	rv;
	ulong_t	request;
	page_t	*pp;

	bzero(mfn_frames, sizeof (mfn_frames));
	bzero(pfn_frames, sizeof (pfn_frames));

	if (debit > FRAME_ARRAY_SIZE) {
		debit = FRAME_ARRAY_SIZE;
	}
	request = debit;

	/*
	 * Don't bother if there isn't a safe amount of kmem left.
	 */
	if (kmem_avail() < balloon_minkmem) {
		kmem_reap();
		if (kmem_avail() < balloon_minkmem)
			return (0);
	}

	if (page_resv(request, KM_NOSLEEP) == 0) {
		return (0);
	}
	xen_block_migrate();
	for (i = 0; i < debit; i++) {
		pp = page_get_high_mfn(new_high_mfn);
		new_high_mfn = 0;
		if (pp == NULL) {
			/*
			 * Call kmem_reap(), then try once more,
			 * but only if there is a safe amount of
			 * kmem left.
			 */
			kmem_reap();
			if (kmem_avail() < balloon_minkmem ||
			    (pp = page_get_high_mfn(0)) == NULL) {
				debit = i;
				break;
			}
		}
		ASSERT(PAGE_EXCL(pp));
		ASSERT(!hat_page_is_mapped(pp));

		balloon_page_add(pp);
		pfn_frames[i] = pp->p_pagenum;
		mfn_frames[i] = pfn_to_mfn(pp->p_pagenum);
	}
	if (debit == 0) {
		xen_allow_migrate();
		page_unresv(request);
		return (0);
	}

	/*
	 * We zero all the pages before we start reassigning them in order to
	 * minimize the time spent holding the lock on the contig pfn list.
	 */
	if (balloon_zero_memory) {
		for (i = 0; i < debit; i++) {
			pfnzero(pfn_frames[i], 0, PAGESIZE);
		}
	}

	/*
	 * Remove all mappings for the pfns from the system
	 */
	locked = balloon_lock_contig_pfnlist(debit);
	for (i = 0; i < debit; i++) {
		reassign_pfn(pfn_frames[i], MFN_INVALID);
	}
	if (locked)
		unlock_contig_pfnlist();

	rv = balloon_free_pages(debit, mfn_frames, NULL, NULL);

	if (rv < 0) {
		cmn_err(CE_WARN, "Attempt to return pages to the hypervisor "
		    "failed - up to %lu pages lost (error = %ld)", debit, rv);
		rv = 0;
	} else if (rv != debit) {
		panic("Unexpected return value (%ld) from decrease reservation "
		    "hypervisor call", rv);
	}

	xen_allow_migrate();
	if (debit != request)
		page_unresv(request - debit);
	return (rv);
}
Ejemplo n.º 22
0
/* Add an MFN override for a particular page */
int m2p_add_override(unsigned long mfn, struct page *page,
		struct gnttab_map_grant_ref *kmap_op)
{
	unsigned long flags;
	unsigned long pfn;
	unsigned long uninitialized_var(address);
	unsigned level;
	pte_t *ptep = NULL;
	int ret = 0;

	pfn = page_to_pfn(page);
	if (!PageHighMem(page)) {
		address = (unsigned long)__va(pfn << PAGE_SHIFT);
		ptep = lookup_address(address, &level);
		if (WARN(ptep == NULL || level != PG_LEVEL_4K,
					"m2p_add_override: pfn %lx not mapped", pfn))
			return -EINVAL;
	}
	WARN_ON(PagePrivate(page));
	SetPagePrivate(page);
	set_page_private(page, mfn);
	page->index = pfn_to_mfn(pfn);

	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
		return -ENOMEM;

	if (kmap_op != NULL) {
		if (!PageHighMem(page)) {
			struct multicall_space mcs =
				xen_mc_entry(sizeof(*kmap_op));

			MULTI_grant_table_op(mcs.mc,
					GNTTABOP_map_grant_ref, kmap_op, 1);

			xen_mc_issue(PARAVIRT_LAZY_MMU);
		}
		/* let's use dev_bus_addr to record the old mfn instead */
		kmap_op->dev_bus_addr = page->index;
		page->index = (unsigned long) kmap_op;
	}
	spin_lock_irqsave(&m2p_override_lock, flags);
	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
	spin_unlock_irqrestore(&m2p_override_lock, flags);

	/* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
	 * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
	 * pfn so that the following mfn_to_pfn(mfn) calls will return the
	 * pfn from the m2p_override (the backend pfn) instead.
	 * We need to do this because the pages shared by the frontend
	 * (xen-blkfront) can be already locked (lock_page, called by
	 * do_read_cache_page); when the userspace backend tries to use them
	 * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
	 * do_blockdev_direct_IO is going to try to lock the same pages
	 * again resulting in a deadlock.
	 * As a side effect get_user_pages_fast might not be safe on the
	 * frontend pages while they are being shared with the backend,
	 * because mfn_to_pfn (that ends up being called by GUPF) will
	 * return the backend pfn rather than the frontend pfn. */
	ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
	if (ret == 0 && get_phys_to_machine(pfn) == mfn)
		set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));

	return 0;
}
Ejemplo n.º 23
0
static unsigned long __init xen_do_chunk(unsigned long start,
					 unsigned long end, bool release)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	for (pfn = start; pfn < end; pfn++) {
		unsigned long frame;
		unsigned long mfn = pfn_to_mfn(pfn);

		if (release) {
			/* Make sure pfn exists to start with */
			if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
				continue;
			frame = mfn;
		} else {
			if (mfn != INVALID_P2M_ENTRY)
				continue;
			frame = pfn;
		}
		set_xen_guest_handle(reservation.extent_start, &frame);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap,
					   &reservation);
		WARN(ret != 1, "Failed to %s pfn %lx err=%d\n",
		     release ? "release" : "populate", pfn, ret);

		if (ret == 1) {
			if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) {
				if (release)
					break;
				set_xen_guest_handle(reservation.extent_start, &frame);
				reservation.nr_extents = 1;
				ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
							   &reservation);
				break;
			}
			len++;
		} else
			break;
	}
	if (len)
		printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n",
		       release ? "Freeing" : "Populating",
		       start, end, len,
		       release ? "freed" : "added");

	return len;
}

static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	return xen_do_chunk(start, end, true);
}

static unsigned long __init xen_populate_chunk(
	const struct e820entry *list, size_t map_size,
	unsigned long max_pfn, unsigned long *last_pfn,
	unsigned long credits_left)
{
	const struct e820entry *entry;
	unsigned int i;
	unsigned long done = 0;
	unsigned long dest_pfn;

	for (i = 0, entry = list; i < map_size; i++, entry++) {
		unsigned long s_pfn;
		unsigned long e_pfn;
		unsigned long pfns;
		long capacity;

		if (credits_left <= 0)
			break;

		if (entry->type != E820_RAM)
			continue;

		e_pfn = PFN_DOWN(entry->addr + entry->size);

		/* We only care about E820 after the xen_start_info->nr_pages */
		if (e_pfn <= max_pfn)
			continue;

		s_pfn = PFN_UP(entry->addr);
		/* If the E820 falls within the nr_pages, we want to start
		 * at the nr_pages PFN.
		 * If that would mean going past the E820 entry, skip it
		 */
		if (s_pfn <= max_pfn) {
			capacity = e_pfn - max_pfn;
			dest_pfn = max_pfn;
		} else {
			capacity = e_pfn - s_pfn;
			dest_pfn = s_pfn;
		}

		if (credits_left < capacity)
			capacity = credits_left;

		pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
		done += pfns;
		*last_pfn = (dest_pfn + pfns);
		if (pfns < capacity)
			break;
		credits_left -= pfns;
	}
	return done;
}

static void __init xen_set_identity_and_release_chunk(
	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
	unsigned long *released, unsigned long *identity)
{
	unsigned long pfn;

	/*
	 * If the PFNs are currently mapped, the VA mapping also needs
	 * to be updated to be 1:1.
	 */
	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
		(void)HYPERVISOR_update_va_mapping(
			(unsigned long)__va(pfn << PAGE_SHIFT),
			mfn_pte(pfn, PAGE_KERNEL_IO), 0);

	if (start_pfn < nr_pages)
		*released += xen_release_chunk(
			start_pfn, min(end_pfn, nr_pages));

	*identity += set_phys_range_identity(start_pfn, end_pfn);
}
Ejemplo n.º 24
0
static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
			      struct xen_netif_tx_request *tx)
{
	struct netfront_info *np = netdev_priv(dev);
	char *data = skb->data;
	unsigned long mfn;
	RING_IDX prod = np->tx.req_prod_pvt;
	int frags = skb_shinfo(skb)->nr_frags;
	unsigned int offset = offset_in_page(data);
	unsigned int len = skb_headlen(skb);
	unsigned int id;
	grant_ref_t ref;
	int i;

	/* While the header overlaps a page boundary (including being
	   larger than a page), split it it into page-sized chunks. */
	while (len > PAGE_SIZE - offset) {
		tx->size = PAGE_SIZE - offset;
		tx->flags |= XEN_NETTXF_more_data;
		len -= tx->size;
		data += tx->size;
		offset = 0;

		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
		np->tx_skbs[id].skb = skb_get(skb);
		tx = RING_GET_REQUEST(&np->tx, prod++);
		tx->id = id;
		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
		BUG_ON((signed short)ref < 0);

		mfn = virt_to_mfn(data);
		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
						mfn, GNTMAP_readonly);

		tx->gref = np->grant_tx_ref[id] = ref;
		tx->offset = offset;
		tx->size = len;
		tx->flags = 0;
	}

	/* Grant backend access to each skb fragment page. */
	for (i = 0; i < frags; i++) {
		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
		struct page *page = skb_frag_page(frag);

		len = skb_frag_size(frag);
		offset = frag->page_offset;

		/* Data must not cross a page boundary. */
		BUG_ON(len + offset > PAGE_SIZE<<compound_order(page));

		/* Skip unused frames from start of page */
		page += offset >> PAGE_SHIFT;
		offset &= ~PAGE_MASK;

		while (len > 0) {
			unsigned long bytes;

			BUG_ON(offset >= PAGE_SIZE);

			bytes = PAGE_SIZE - offset;
			if (bytes > len)
				bytes = len;

			tx->flags |= XEN_NETTXF_more_data;

			id = get_id_from_freelist(&np->tx_skb_freelist,
						  np->tx_skbs);
			np->tx_skbs[id].skb = skb_get(skb);
			tx = RING_GET_REQUEST(&np->tx, prod++);
			tx->id = id;
			ref = gnttab_claim_grant_reference(&np->gref_tx_head);
			BUG_ON((signed short)ref < 0);

			mfn = pfn_to_mfn(page_to_pfn(page));
			gnttab_grant_foreign_access_ref(ref,
							np->xbdev->otherend_id,
							mfn, GNTMAP_readonly);

			tx->gref = np->grant_tx_ref[id] = ref;
			tx->offset = offset;
			tx->size = bytes;
			tx->flags = 0;

			offset += bytes;
			len -= bytes;

			/* Next frame */
			if (offset == PAGE_SIZE && len) {
				BUG_ON(!PageCompound(page));
				page++;
				offset = 0;
			}
		}
	}

	np->tx.req_prod_pvt = prod;
}
Ejemplo n.º 25
0
/*
 * Must not be called with IRQs off.  This should only be used on the
 * slow path.
 *
 * Copy a foreign granted page to local memory.
 */
int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
{
	struct gnttab_unmap_and_replace unmap;
	mmu_update_t mmu;
	struct page *page;
	struct page *new_page;
	void *new_addr;
	void *addr;
	paddr_t pfn;
	maddr_t mfn;
	maddr_t new_mfn;
	int err;

	page = *pagep;
	if (!get_page_unless_zero(page))
		return -ENOENT;

	err = -ENOMEM;
	new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
	if (!new_page)
		goto out;

	new_addr = page_address(new_page);
	addr = page_address(page);
	copy_page(new_addr, addr);

	pfn = page_to_pfn(page);
	mfn = pfn_to_mfn(pfn);
	new_mfn = virt_to_mfn(new_addr);

	write_seqlock_bh(&gnttab_dma_lock);

	/* Make seq visible before checking page_mapped. */
	smp_mb();

	/* Has the page been DMA-mapped? */
	if (unlikely(page_mapped(page))) {
		write_sequnlock_bh(&gnttab_dma_lock);
		put_page(new_page);
		err = -EBUSY;
		goto out;
	}

	if (!xen_feature(XENFEAT_auto_translated_physmap))
		set_phys_to_machine(pfn, new_mfn);

	gnttab_set_replace_op(&unmap, (unsigned long)addr,
			      (unsigned long)new_addr, ref);

	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
					&unmap, 1);
	BUG_ON(err);
	BUG_ON(unmap.status != GNTST_okay);

	write_sequnlock_bh(&gnttab_dma_lock);

	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);

		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
		mmu.val = pfn;
		err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
		BUG_ON(err);
	}

	new_page->mapping = page->mapping;
	new_page->index = page->index;
	set_bit(PG_foreign, &new_page->flags);
	if (PageReserved(page))
		SetPageReserved(new_page);
	*pagep = new_page;

	SetPageForeign(page, gnttab_page_free);
	page->mapping = NULL;

out:
	put_page(page);
	return err;
}
Ejemplo n.º 26
0
static unsigned long __init xen_release_chunk(unsigned long start,
					      unsigned long end)
{
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};
	unsigned long len = 0;
	unsigned long pfn;
	int ret;

	for(pfn = start; pfn < end; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		/*                                    */
		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
			continue;

		set_xen_guest_handle(reservation.extent_start, &mfn);
		reservation.nr_extents = 1;

		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
					   &reservation);
		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
		if (ret == 1) {
			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
			len++;
		}
	}
	printk(KERN_INFO "Freeing  %lx-%lx pfn range: %lu pages freed\n",
	       start, end, len);

	return len;
}

static unsigned long __init xen_set_identity_and_release(
	const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
	phys_addr_t start = 0;
	unsigned long released = 0;
	unsigned long identity = 0;
	const struct e820entry *entry;
	int i;

	/*
                                                               
                                                        
                                                              
   
                                                              
                                                            
                                                          
                                                               
                        
  */
	for (i = 0, entry = list; i < map_size; i++, entry++) {
		phys_addr_t end = entry->addr + entry->size;

		if (entry->type == E820_RAM || i == map_size - 1) {
			unsigned long start_pfn = PFN_DOWN(start);
			unsigned long end_pfn = PFN_UP(end);

			if (entry->type == E820_RAM)
				end_pfn = PFN_UP(entry->addr);

			if (start_pfn < end_pfn) {
				if (start_pfn < nr_pages)
					released += xen_release_chunk(
						start_pfn, min(end_pfn, nr_pages));

				identity += set_phys_range_identity(
					start_pfn, end_pfn);
			}
			start = end;
		}
	}

	printk(KERN_INFO "Released %lu pages of unused memory\n", released);
	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);

	return released;
}
static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
			      struct xen_netif_tx_request *tx)
{
	struct netfront_info *np = netdev_priv(dev);
	char *data = skb->data;
	unsigned long mfn;
	RING_IDX prod = np->tx.req_prod_pvt;
	int frags = skb_shinfo(skb)->nr_frags;
	unsigned int offset = offset_in_page(data);
	unsigned int len = skb_headlen(skb);
	unsigned int id;
	grant_ref_t ref;
	int i;

	/* While the header overlaps a page boundary (including being
	   larger than a page), split it it into page-sized chunks. */
	while (len > PAGE_SIZE - offset) {
		tx->size = PAGE_SIZE - offset;
		tx->flags |= NETTXF_more_data;
		len -= tx->size;
		data += tx->size;
		offset = 0;

		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
		np->tx_skbs[id].skb = skb_get(skb);
		tx = RING_GET_REQUEST(&np->tx, prod++);
		tx->id = id;
		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
		BUG_ON((signed short)ref < 0);

		mfn = virt_to_mfn(data);
		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
						mfn, GNTMAP_readonly);

		tx->gref = np->grant_tx_ref[id] = ref;
		tx->offset = offset;
		tx->size = len;
		tx->flags = 0;
	}

	/* Grant backend access to each skb fragment page. */
	for (i = 0; i < frags; i++) {
		skb_frag_t *frag = skb_shinfo(skb)->frags + i;

		tx->flags |= NETTXF_more_data;

		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
		np->tx_skbs[id].skb = skb_get(skb);
		tx = RING_GET_REQUEST(&np->tx, prod++);
		tx->id = id;
		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
		BUG_ON((signed short)ref < 0);

		mfn = pfn_to_mfn(page_to_pfn(frag->page));
		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
						mfn, GNTMAP_readonly);

		tx->gref = np->grant_tx_ref[id] = ref;
		tx->offset = frag->page_offset;
		tx->size = frag->size;
		tx->flags = 0;
	}

	np->tx.req_prod_pvt = prod;
}
Ejemplo n.º 28
0
static int increase_reservation(unsigned long nr_pages)
{
	unsigned long  pfn, i, flags;
	struct page   *page;
	long           rc;
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	if (nr_pages > ARRAY_SIZE(frame_list))
		nr_pages = ARRAY_SIZE(frame_list);

	balloon_lock(flags);

	page = balloon_first_page();
	for (i = 0; i < nr_pages; i++) {
		BUG_ON(page == NULL);
		frame_list[i] = page_to_pfn(page);;
		page = balloon_next_page(page);
	}

	set_xen_guest_handle(reservation.extent_start, frame_list);
	reservation.nr_extents   = nr_pages;
	rc = HYPERVISOR_memory_op(
		XENMEM_populate_physmap, &reservation);
	if (rc < nr_pages) {
		int ret;
		/* We hit the Xen hard limit: reprobe. */
		set_xen_guest_handle(reservation.extent_start, frame_list);
		reservation.nr_extents   = rc;
		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
				&reservation);
		BUG_ON(ret != rc);
		hard_limit = current_pages + rc - driver_pages;
		goto out;
	}

	for (i = 0; i < nr_pages; i++) {
		page = balloon_retrieve();
		BUG_ON(page == NULL);

		pfn = page_to_pfn(page);
		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
		       phys_to_machine_mapping_valid(pfn));

		/* Update P->M and M->P tables. */
		set_phys_to_machine(pfn, frame_list[i]);

#ifdef CONFIG_XEN
		xen_machphys_update(frame_list[i], pfn);

		/* Link back into the page tables if not highmem. */
		if (pfn < max_low_pfn) {
			int ret;
			ret = HYPERVISOR_update_va_mapping(
				(unsigned long)__va(pfn << PAGE_SHIFT),
				pfn_pte_ma(frame_list[i], PAGE_KERNEL),
				0);
			BUG_ON(ret);
		}
#endif
		/* Relinquish the page back to the allocator. */
		ClearPageReserved(page);
		set_page_count(page, 1);
		__free_page(page);
	}

	current_pages += nr_pages;
	totalram_pages = current_pages;

 out:
	balloon_unlock(flags);

	return 0;
}

static int decrease_reservation(unsigned long nr_pages)
{
	unsigned long  pfn, i, flags;
	struct page   *page;
	void          *v;
	int            need_sleep = 0;
	int ret;
	struct xen_memory_reservation reservation = {
		.address_bits = 0,
		.extent_order = 0,
		.domid        = DOMID_SELF
	};

	if (nr_pages > ARRAY_SIZE(frame_list))
		nr_pages = ARRAY_SIZE(frame_list);

	for (i = 0; i < nr_pages; i++) {
		if ((page = alloc_page(GFP_BALLOON)) == NULL) {
			nr_pages = i;
			need_sleep = 1;
			break;
		}

		pfn = page_to_pfn(page);
		frame_list[i] = pfn_to_mfn(pfn);

		if (!PageHighMem(page)) {
			v = phys_to_virt(pfn << PAGE_SHIFT);
			scrub_pages(v, 1);
#ifdef CONFIG_XEN
			ret = HYPERVISOR_update_va_mapping(
				(unsigned long)v, __pte_ma(0), 0);
			BUG_ON(ret);
#endif
		}
#ifdef CONFIG_XEN_SCRUB_PAGES
		else {
			v = kmap(page);
			scrub_pages(v, 1);
			kunmap(page);
		}
#endif
	}

#ifdef CONFIG_XEN
	/* Ensure that ballooned highmem pages don't have kmaps. */
	kmap_flush_unused();
	flush_tlb_all();
#endif

	balloon_lock(flags);

	/* No more mappings: invalidate P2M and add to balloon. */
	for (i = 0; i < nr_pages; i++) {
		pfn = mfn_to_pfn(frame_list[i]);
		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
		balloon_append(pfn_to_page(pfn));
	}

	set_xen_guest_handle(reservation.extent_start, frame_list);
	reservation.nr_extents   = nr_pages;
	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
	BUG_ON(ret != nr_pages);

	current_pages -= nr_pages;
	totalram_pages = current_pages;

	balloon_unlock(flags);

	return need_sleep;
}

/*
 * We avoid multiple worker processes conflicting via the balloon mutex.
 * We may of course race updates of the target counts (which are protected
 * by the balloon lock), or with changes to the Xen hard limit, but we will
 * recover from these in time.
 */
static void balloon_process(void *unused)
{
	int need_sleep = 0;
	long credit;

	down(&balloon_mutex);

	do {
		credit = current_target() - current_pages;
		if (credit > 0)
			need_sleep = (increase_reservation(credit) != 0);
		if (credit < 0)
			need_sleep = (decrease_reservation(-credit) != 0);

#ifndef CONFIG_PREEMPT
		if (need_resched())
			schedule();
#endif
	} while ((credit != 0) && !need_sleep);

	/* Schedule more work if there is some still to be done. */
	if (current_target() != current_pages)
		mod_timer(&balloon_timer, jiffies + HZ);

	up(&balloon_mutex);
}
Ejemplo n.º 29
0
/*
 * Make pt_pfn a new 'level' page table frame and hook it into the page
 * table at offset in previous level MFN (pref_l_mfn). pt_pfn is a guest
 * PFN.
 */
static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, 
                         unsigned long offset, unsigned long level)
{   
    pgentry_t *tab = (pgentry_t *)start_info.pt_base;
    unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); 
    pgentry_t prot_e, prot_t;
    mmu_update_t mmu_updates[1];
    int rc;
    
    prot_e = prot_t = 0;
    DEBUG("Allocating new L%d pt frame for pfn=%lx, "
          "prev_l_mfn=%lx, offset=%lx", 
          level, *pt_pfn, prev_l_mfn, offset);

    /* We need to clear the page, otherwise we might fail to map it
       as a page table page */
    memset((void*) pt_page, 0, PAGE_SIZE);  
 
    switch ( level )
    {
    case L1_FRAME:
        prot_e = L1_PROT;
        prot_t = L2_PROT;
        break;
    case L2_FRAME:
        prot_e = L2_PROT;
        prot_t = L3_PROT;
        break;
#if defined(__x86_64__)
    case L3_FRAME:
        prot_e = L3_PROT;
        prot_t = L4_PROT;
        break;
#endif
    default:
        printk("new_pt_frame() called with invalid level number %d\n", level);
        do_exit();
        break;
    }

    /* Make PFN a page table page */
#if defined(__x86_64__)
    tab = pte_to_virt(tab[l4_table_offset(pt_page)]);
#endif
    tab = pte_to_virt(tab[l3_table_offset(pt_page)]);

    mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + 
        sizeof(pgentry_t) * l1_table_offset(pt_page);
    mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | 
        (prot_e & ~_PAGE_RW);
    
    if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 )
    {
        printk("ERROR: PTE for new page table page could not be updated\n");
        printk("       mmu_update failed with rc=%d\n", rc);
        do_exit();
    }

    /* Hook the new page table page into the hierarchy */
    mmu_updates[0].ptr =
        ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
    mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;

    if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) 
    {
        printk("ERROR: mmu_update failed with rc=%d\n", rc);
        do_exit();
    }

    *pt_pfn += 1;
}
Ejemplo n.º 30
0
//void free_pte_page(struct mmu_gather* tlb, struct page *pte)
void free_pte_page(struct page *pte)
{
    struct ptrpte_t *newstruct = NULL;
    struct ptrpte_t *temp_head = NULL;
    int i = 0;
    int counter = 0;
    
    newstruct = (struct ptrpte_t *)kmalloc(sizeof(struct ptrpte_t), GFP_KERNEL);
    newstruct -> content = pte;
    //newstruct -> mmu_tlb = tlb;

    spin_lock(&pte_cache_lock);
    newstruct -> next = pte_head;
    pte_head = newstruct;
    temp_head = pte_head;
    
    /*free node */
    if(pte_used_counter)
    	pte_used_counter--;
    pte_free_counter++;
    //spin_unlock(&pte_cache_lock);
   
    if(pte_used_counter)
    { 
    	//if((pte_free_counter/pte_used_counter>=8) && ((pte_used_counter + pte_free_counter) >= 2100))
    	//if(pte_used_counter + pte_free_counter >= 2100)
    	//if((pte_used_counter/pte_free_counter < 1) && (pte_used_counter >= 63))
    	if((pte_free_counter/pte_used_counter > 4) && ((pte_used_counter + pte_free_counter) >= 320))
    	//if((pte_free_counter/pte_used_counter >= 5) && ((pte_used_counter + pte_free_counter) >= 320))
    	{
 	        printk("pte free counter is %ld\n", pte_free_counter);
 	        printk("pte used counter is %ld\n", pte_used_counter);
        	counter = pte_free_counter * 3 / 10;
        	//counter = 0;
        	for(i=0;i<counter;i++)
		{
	    		pte_head = pte_head->next;
		}
        	pte_free_counter -= counter;
    	}
    }
    spin_unlock(&pte_cache_lock);

    if(counter != 0)
    {
    	struct ptrpte * newstructarray = NULL;
    	struct ptrpte * newstructarray_head = NULL;
    	int rc = 1;
    	newstructarray = (struct ptrpte *)kmalloc(sizeof(struct ptrpte) * counter, GFP_KERNEL);
    	newstructarray_head = newstructarray;
        for (i=0;i<counter;i++)
        {
	    newstruct = temp_head;
	    temp_head = temp_head->next;
	    //newstructarray[i].content = (unsigned long)page_address(newstruct->content);
	    newstructarray[i].content = pfn_to_mfn(page_to_pfn(newstruct->content));
	    //newstructarray[i].mmu_tlb = newstruct->mmu_tlb;
	    kfree(newstruct);
        }
	//hypercall newstructarray
	rc = HYPERVISOR_pte_op(newstructarray, counter);
        //if (rc == 0)
	//else 
 	    //printk("pte cache free error\n");
	    
	//free page to the buddy system
        newstructarray = newstructarray_head;
	for(i=0;i<counter;i++)
	{
	    //tlb_remove_page(newstructarray[i].mmu_tlb,newstructarray[i].content);
	    //__free_page(newstructarray[i].content);
	    __free_page(pfn_to_page(mfn_to_pfn(newstructarray[i].content)));
	}

	//free newstructarray
	kfree(newstructarray);
    }
   
    return;
}