int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(d), gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); p2m_mem_paging_drop_page(p2m_get_hostp2m(d), gmfn); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } page = mfn_to_page(mfn); #ifdef CONFIG_X86 /* If gmfn is shared, just drop the guest reference (which may or may not * free the page) */ if(p2m_is_shared(p2mt)) { put_page_and_type(page); guest_physmap_remove_page(d, gmfn, mfn, 0); return 1; } #endif /* CONFIG_X86 */ if ( unlikely(!get_page(page, d)) ) { gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); return 1; }
/* Atomically look up a GFN and take a reference count on the backing page. */ struct page_info *get_page_from_gfn_p2m( struct domain *d, struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q) { struct page_info *page = NULL; p2m_access_t _a; p2m_type_t _t; mfn_t mfn; /* Allow t or a to be NULL */ t = t ?: &_t; a = a ?: &_a; if ( likely(!p2m_locked_by_me(p2m)) ) { /* Fast path: look up and get out */ p2m_read_lock(p2m); mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0); if ( (p2m_is_ram(*t) || p2m_is_grant(*t)) && mfn_valid(mfn) && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) ) { page = mfn_to_page(mfn); if ( !get_page(page, d) /* Page could be shared */ && !get_page(page, dom_cow) ) page = NULL; } p2m_read_unlock(p2m); if ( page ) return page; /* Error path: not a suitable GFN at all */ if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) ) return NULL; } /* Slow path: take the write lock and do fixups */ mfn = get_gfn_type_access(p2m, gfn, t, a, q, NULL); if ( p2m_is_ram(*t) && mfn_valid(mfn) ) { page = mfn_to_page(mfn); if ( !get_page(page, d) ) page = NULL; } put_gfn(d, gfn); return page; }
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order) { uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; unsigned long top_gfn; struct page_info *top_page; /* Get the top-level table's MFN */ top_gfn = cr3 >> PAGE_SHIFT; top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn, &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; if ( top_page ) put_page(top_page); p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; if ( top_page ) put_page(top_page); return INVALID_GFN; } if ( !top_page ) { pfec[0] &= ~PFEC_page_present; return INVALID_GFN; } top_mfn = _mfn(page_to_mfn(top_page)); /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(mfn_x(top_mfn))); top_map = map_domain_page(mfn_x(top_mfn)); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); put_page(top_page); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_l1e_get_gfn(gw.l1e); struct page_info *page; page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( page ) put_page(page); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return INVALID_GFN; } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; return INVALID_GFN; } if ( page_order ) *page_order = guest_walk_to_page_order(&gw); return gfn_x(gfn); } if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; if ( missing & _PAGE_INVALID_BITS ) pfec[0] |= PFEC_reserved_bit; if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; if ( missing & _PAGE_SHARED ) pfec[0] = PFEC_page_shared; return INVALID_GFN; }
int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); put_gfn(d, gmfn); /* If the page hasn't yet been paged out, there is an * actual page that needs to be released. */ if ( p2mt == p2m_ram_paging_out ) { ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); } p2m_mem_paging_drop_page(d, gmfn, p2mt); return 1; } if ( p2mt == p2m_mmio_direct ) { clear_mmio_p2m_entry(d, gmfn, _mfn(mfn)); put_gfn(d, gmfn); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } #ifdef CONFIG_X86 if ( p2m_is_shared(p2mt) ) { /* Unshare the page, bail out on error. We unshare because * we might be the only one using this shared page, and we * need to trigger proper cleanup. Once done, this is * like any other page. */ if ( mem_sharing_unshare_page(d, gmfn, 0) ) { put_gfn(d, gmfn); (void)mem_sharing_notify_enomem(d, gmfn, 0); return 0; } /* Maybe the mfn changed */ mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt)); ASSERT(!p2m_is_shared(p2mt)); } #endif /* CONFIG_X86 */ page = mfn_to_page(mfn); if ( unlikely(!get_page(page, d)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); /* * With the lack of an IOMMU on some platforms, domains with DMA-capable * device must retrieve the same pfn when the hypercall populate_physmap * is called. * * For this purpose (and to match populate_physmap() behavior), the page * is kept allocated. */ if ( !is_domain_direct_mapped(d) && test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); put_gfn(d, gmfn); return 1; }
static int hvmemul_do_io( int is_mmio, paddr_t addr, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { struct vcpu *curr = current; struct hvm_vcpu_io *vio; ioreq_t p = { .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO, .addr = addr, .size = size, .dir = dir, .df = df, .data = ram_gpa, .data_is_ptr = (p_data == NULL), }; unsigned long ram_gfn = paddr_to_pfn(ram_gpa); p2m_type_t p2mt; struct page_info *ram_page; int rc; /* Check for paged out page */ ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { if ( ram_page ) put_page(ram_page); p2m_mem_paging_populate(curr->domain, ram_gfn); return X86EMUL_RETRY; } if ( p2m_is_shared(p2mt) ) { if ( ram_page ) put_page(ram_page); return X86EMUL_RETRY; } /* * Weird-sized accesses have undefined behaviour: we discard writes * and read all-ones. */ if ( unlikely((size > sizeof(long)) || (size & (size - 1))) ) { gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size); ASSERT(p_data != NULL); /* cannot happen with a REP prefix */ if ( dir == IOREQ_READ ) memset(p_data, ~0, size); if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } if ( !p.data_is_ptr && (dir == IOREQ_WRITE) ) { memcpy(&p.data, p_data, size); p_data = NULL; } vio = &curr->arch.hvm_vcpu.hvm_io; if ( is_mmio && !p.data_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = vio->mmio_large_write_pa; unsigned int bytes = vio->mmio_large_write_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) { if ( ram_page ) put_page(ram_page); return X86EMUL_OKAY; } } else { paddr_t pa = vio->mmio_large_read_pa; unsigned int bytes = vio->mmio_large_read_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) { memcpy(p_data, &vio->mmio_large_read[addr - pa], size); if ( ram_page ) put_page(ram_page); return X86EMUL_OKAY; } } } switch ( vio->io_state ) { case HVMIO_none: break; case HVMIO_completed: vio->io_state = HVMIO_none; if ( p_data == NULL ) { if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } goto finish_access; case HVMIO_dispatched: /* May have to wait for previous cycle of a multi-write to complete. */ if ( is_mmio && !p.data_is_ptr && (dir == IOREQ_WRITE) && (addr == (vio->mmio_large_write_pa + vio->mmio_large_write_bytes)) ) { if ( ram_page ) put_page(ram_page); return X86EMUL_RETRY; } default: if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } if ( hvm_io_pending(curr) ) { gdprintk(XENLOG_WARNING, "WARNING: io already pending?\n"); if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } vio->io_state = (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; vio->io_size = size; /* * When retrying a repeated string instruction, force exit to guest after * completion of the retried iteration to allow handling of interrupts. */ if ( vio->mmio_retrying ) *reps = 1; p.count = *reps; if ( dir == IOREQ_WRITE ) hvmtrace_io_assist(is_mmio, &p); if ( is_mmio ) { rc = hvm_mmio_intercept(&p); if ( rc == X86EMUL_UNHANDLEABLE ) rc = hvm_buffered_io_intercept(&p); } else { rc = hvm_portio_intercept(&p); } switch ( rc ) { case X86EMUL_OKAY: case X86EMUL_RETRY: *reps = p.count; p.state = STATE_IORESP_READY; if ( !vio->mmio_retry ) { hvm_io_assist(&p); vio->io_state = HVMIO_none; } else /* Defer hvm_io_assist() invocation to hvm_do_resume(). */ vio->io_state = HVMIO_handle_mmio_awaiting_completion; break; case X86EMUL_UNHANDLEABLE: /* If there is no backing DM, just ignore accesses */ if ( !hvm_has_dm(curr->domain) ) { rc = X86EMUL_OKAY; vio->io_state = HVMIO_none; } else { rc = X86EMUL_RETRY; if ( !hvm_send_assist_req(&p) ) vio->io_state = HVMIO_none; else if ( p_data == NULL ) rc = X86EMUL_OKAY; } break; default: BUG(); } if ( rc != X86EMUL_OKAY ) { if ( ram_page ) put_page(ram_page); return rc; } finish_access: if ( dir == IOREQ_READ ) hvmtrace_io_assist(is_mmio, &p); if ( p_data != NULL ) memcpy(p_data, &vio->io_data, size); if ( is_mmio && !p.data_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = vio->mmio_large_write_pa; unsigned int bytes = vio->mmio_large_write_bytes; if ( bytes == 0 ) pa = vio->mmio_large_write_pa = addr; if ( addr == (pa + bytes) ) vio->mmio_large_write_bytes += size; } else { paddr_t pa = vio->mmio_large_read_pa; unsigned int bytes = vio->mmio_large_read_bytes; if ( bytes == 0 ) pa = vio->mmio_large_read_pa = addr; if ( (addr == (pa + bytes)) && ((bytes + size) <= sizeof(vio->mmio_large_read)) ) { memcpy(&vio->mmio_large_read[bytes], p_data, size); vio->mmio_large_read_bytes += size; } } } if ( ram_page ) put_page(ram_page); return X86EMUL_OKAY; } int hvmemul_do_pio( unsigned long port, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { return hvmemul_do_io(0, port, reps, size, ram_gpa, dir, df, p_data); } static int hvmemul_do_mmio( paddr_t gpa, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { return hvmemul_do_io(1, gpa, reps, size, ram_gpa, dir, df, p_data); } /* * Convert addr from linear to physical form, valid over the range * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to * the valid computed range. It is always >0 when X86EMUL_OKAY is returned. * @pfec indicates the access checks to be performed during page-table walks. */ static int hvmemul_linear_to_phys( unsigned long addr, paddr_t *paddr, unsigned int bytes_per_rep, unsigned long *reps, uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt) { struct vcpu *curr = current; unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK; int reverse; /* * Clip repetitions to a sensible maximum. This avoids extensive looping in * this function while still amortising the cost of I/O trap-and-emulate. */ *reps = min_t(unsigned long, *reps, 4096); /* With no paging it's easy: linear == physical. */ if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) ) { *paddr = addr; return X86EMUL_OKAY; } /* Reverse mode if this is a backwards multi-iteration string operation. */ reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1); if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) ) { /* Do page-straddling first iteration forwards via recursion. */ paddr_t _paddr; unsigned long one_rep = 1; int rc = hvmemul_linear_to_phys( addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; pfn = _paddr >> PAGE_SHIFT; } else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
static int hvmemul_do_io( int is_mmio, paddr_t addr, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { paddr_t value = ram_gpa; int value_is_ptr = (p_data == NULL); struct vcpu *curr = current; struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain); ioreq_t *p = get_ioreq(curr); unsigned long ram_gfn = paddr_to_pfn(ram_gpa); p2m_type_t p2mt; mfn_t ram_mfn; int rc; /* Check for paged out page */ ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt, 0); if ( p2m_is_paging(p2mt) ) { p2m_mem_paging_populate(p2m, ram_gfn); return X86EMUL_RETRY; } if ( p2m_is_shared(p2mt) ) return X86EMUL_RETRY; /* * Weird-sized accesses have undefined behaviour: we discard writes * and read all-ones. */ if ( unlikely((size > sizeof(long)) || (size & (size - 1))) ) { gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size); ASSERT(p_data != NULL); /* cannot happen with a REP prefix */ if ( dir == IOREQ_READ ) memset(p_data, ~0, size); return X86EMUL_UNHANDLEABLE; } if ( (p_data != NULL) && (dir == IOREQ_WRITE) ) { memcpy(&value, p_data, size); p_data = NULL; } if ( is_mmio && !value_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) return X86EMUL_OKAY; } else { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) { memcpy(p_data, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa], size); return X86EMUL_OKAY; } } } switch ( curr->arch.hvm_vcpu.io_state ) { case HVMIO_none: break; case HVMIO_completed: curr->arch.hvm_vcpu.io_state = HVMIO_none; if ( p_data == NULL ) return X86EMUL_UNHANDLEABLE; goto finish_access; case HVMIO_dispatched: /* May have to wait for previous cycle of a multi-write to complete. */ if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) && (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa + curr->arch.hvm_vcpu.mmio_large_write_bytes)) ) return X86EMUL_RETRY; default: return X86EMUL_UNHANDLEABLE; } if ( p->state != STATE_IOREQ_NONE ) { gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n", p->state); return X86EMUL_UNHANDLEABLE; } curr->arch.hvm_vcpu.io_state = (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; curr->arch.hvm_vcpu.io_size = size; p->dir = dir; p->data_is_ptr = value_is_ptr; p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; p->size = size; p->addr = addr; p->count = *reps; p->df = df; p->data = value; hvmtrace_io_assist(is_mmio, p); if ( is_mmio ) { rc = hvm_mmio_intercept(p); if ( rc == X86EMUL_UNHANDLEABLE ) rc = hvm_buffered_io_intercept(p); } else { rc = hvm_portio_intercept(p); } switch ( rc ) { case X86EMUL_OKAY: case X86EMUL_RETRY: *reps = p->count; p->state = STATE_IORESP_READY; hvm_io_assist(); curr->arch.hvm_vcpu.io_state = HVMIO_none; break; case X86EMUL_UNHANDLEABLE: rc = X86EMUL_RETRY; if ( !hvm_send_assist_req(curr) ) curr->arch.hvm_vcpu.io_state = HVMIO_none; else if ( p_data == NULL ) rc = X86EMUL_OKAY; break; default: BUG(); } if ( rc != X86EMUL_OKAY ) return rc; finish_access: if ( p_data != NULL ) memcpy(p_data, &curr->arch.hvm_vcpu.io_data, size); if ( is_mmio && !value_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; if ( bytes == 0 ) pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr; if ( addr == (pa + bytes) ) curr->arch.hvm_vcpu.mmio_large_write_bytes += size; } else { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; if ( bytes == 0 ) pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr; if ( (addr == (pa + bytes)) && ((bytes + size) < sizeof(curr->arch.hvm_vcpu.mmio_large_read)) ) { memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa], p_data, size); curr->arch.hvm_vcpu.mmio_large_read_bytes += size; } } } return X86EMUL_OKAY; }
static mfn_t p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order) { mfn_t mfn; paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; unsigned long l1e_flags; p2m_type_t l1t; ASSERT(paging_mode_translate(p2m->domain)); /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ *t = p2m_mmio_dm; /* Not implemented except with EPT */ *a = p2m_access_rwx; if ( gfn > p2m->max_mapped_pfn ) /* This pfn is higher than the highest the p2m map currently holds */ return _mfn(INVALID_MFN); /* Use the fast path with the linear mapping if we can */ if ( p2m == p2m_get_hostp2m(current->domain) ) return p2m_gfn_to_mfn_current(p2m, gfn, t, a, q, page_order); mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); #if CONFIG_PAGING_LEVELS >= 4 { l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); l4e += l4_table_offset(addr); if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) { unmap_domain_page(l4e); return _mfn(INVALID_MFN); } mfn = _mfn(l4e_get_pfn(*l4e)); unmap_domain_page(l4e); } #endif { l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); #if CONFIG_PAGING_LEVELS == 3 /* On PAE hosts the p2m has eight l3 entries, not four (see * shadow_set_p2m_entry()) so we can't use l3_table_offset. * Instead, just count the number of l3es from zero. It's safe * to do this because we already checked that the gfn is within * the bounds of the p2m. */ l3e += (addr >> L3_PAGETABLE_SHIFT); #else l3e += l3_table_offset(addr); #endif pod_retry_l3: if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) { if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__); } else *t = p2m_populate_on_demand; } unmap_domain_page(l3e); return _mfn(INVALID_MFN); } else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) ) { mfn = _mfn(l3e_get_pfn(*l3e) + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + l1_table_offset(addr)); *t = p2m_flags_to_type(l3e_get_flags(*l3e)); unmap_domain_page(l3e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_1G; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l3e_get_pfn(*l3e)); unmap_domain_page(l3e); } l2e = map_domain_page(mfn_x(mfn)); l2e += l2_table_offset(addr); pod_retry_l2: if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) { /* PoD: Try to populate a 2-meg chunk */ if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) ) goto pod_retry_l2; } else *t = p2m_populate_on_demand; } unmap_domain_page(l2e); return _mfn(INVALID_MFN); } else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) ) { mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); *t = p2m_flags_to_type(l2e_get_flags(*l2e)); unmap_domain_page(l2e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_2M; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l2e_get_pfn(*l2e)); unmap_domain_page(l2e); l1e = map_domain_page(mfn_x(mfn)); l1e += l1_table_offset(addr); pod_retry_l1: l1e_flags = l1e_get_flags(*l1e); l1t = p2m_flags_to_type(l1e_flags); if ( ((l1e_flags & _PAGE_PRESENT) == 0) && (!p2m_is_paging(l1t)) ) { /* PoD: Try to populate */ if ( l1t == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) goto pod_retry_l1; } else *t = p2m_populate_on_demand; } unmap_domain_page(l1e); return _mfn(INVALID_MFN); } mfn = _mfn(l1e_get_pfn(*l1e)); *t = l1t; unmap_domain_page(l1e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t)); if ( page_order ) *page_order = PAGE_ORDER_4K; return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN); }
/* Read the current domain's p2m table (through the linear mapping). */ static mfn_t p2m_gfn_to_mfn_current(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order) { mfn_t mfn = _mfn(INVALID_MFN); p2m_type_t p2mt = p2m_mmio_dm; paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ l1_pgentry_t l1e = l1e_empty(), *p2m_entry; l2_pgentry_t l2e = l2e_empty(); int ret; #if CONFIG_PAGING_LEVELS >= 4 l3_pgentry_t l3e = l3e_empty(); #endif ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); #if CONFIG_PAGING_LEVELS >= 4 /* * Read & process L3 */ p2m_entry = (l1_pgentry_t *) &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START) + l3_linear_offset(addr)]; pod_retry_l3: ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e)); if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) { if ( (l3e_get_flags(l3e) & _PAGE_PSE) && (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) ) { /* The read has succeeded, so we know that mapping exists */ if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; p2mt = p2m_invalid; gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__); goto out; } else { p2mt = p2m_populate_on_demand; goto out; } } goto pod_retry_l2; } if ( l3e_get_flags(l3e) & _PAGE_PSE ) { p2mt = p2m_flags_to_type(l3e_get_flags(l3e)); ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt)); if (p2m_is_valid(p2mt) ) mfn = _mfn(l3e_get_pfn(l3e) + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + l1_table_offset(addr)); else p2mt = p2m_mmio_dm; if ( page_order ) *page_order = PAGE_ORDER_1G; goto out; } #endif /* * Read & process L2 */ p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)]; pod_retry_l2: ret = __copy_from_user(&l2e, p2m_entry, sizeof(l2e)); if ( ret != 0 || !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) { if( (l2e_get_flags(l2e) & _PAGE_PSE) && ( p2m_flags_to_type(l2e_get_flags(l2e)) == p2m_populate_on_demand ) ) { /* The read has succeeded, so we know that the mapping * exits at this point. */ if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) ) goto pod_retry_l2; /* Allocate failed. */ p2mt = p2m_invalid; printk("%s: Allocate failed!\n", __func__); goto out; } else { p2mt = p2m_populate_on_demand; goto out; } } goto pod_retry_l1; } if (l2e_get_flags(l2e) & _PAGE_PSE) { p2mt = p2m_flags_to_type(l2e_get_flags(l2e)); ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt)); if ( p2m_is_valid(p2mt) ) mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr)); else p2mt = p2m_mmio_dm; if ( page_order ) *page_order = PAGE_ORDER_2M; goto out; } /* * Read and process L1 */ /* Need to __copy_from_user because the p2m is sparse and this * part might not exist */ pod_retry_l1: p2m_entry = &phys_to_machine_mapping[gfn]; ret = __copy_from_user(&l1e, p2m_entry, sizeof(l1e)); if ( ret == 0 ) { unsigned long l1e_mfn = l1e_get_pfn(l1e); p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); ASSERT( mfn_valid(_mfn(l1e_mfn)) || !p2m_is_ram(p2mt) || p2m_is_paging(p2mt) ); if ( p2mt == p2m_populate_on_demand ) { /* The read has succeeded, so we know that the mapping * exits at this point. */ if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) goto pod_retry_l1; /* Allocate failed. */ p2mt = p2m_invalid; goto out; } else { p2mt = p2m_populate_on_demand; goto out; } } if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) ) mfn = _mfn(l1e_mfn); else /* XXX see above */ p2mt = p2m_mmio_dm; } if ( page_order ) *page_order = PAGE_ORDER_4K; out: *t = p2mt; return mfn; }
// Returns 0 on error (out of memory) static int p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); void *table =map_domain_page(mfn_x(table_mfn)); unsigned long i, gfn_remainder = gfn; l1_pgentry_t *p2m_entry; l1_pgentry_t entry_content; l2_pgentry_t l2e_content; l3_pgentry_t l3e_content; int rv=0; unsigned int iommu_pte_flags = (p2mt == p2m_ram_rw) ? IOMMUF_readable|IOMMUF_writable: 0; unsigned long old_mfn = 0; if ( tb_init_done ) { struct { u64 gfn, mfn; int p2mt; int d:16,order:16; } t; t.gfn = gfn; t.mfn = mfn_x(mfn); t.p2mt = p2mt; t.d = p2m->domain->domain_id; t.order = page_order; __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t); } #if CONFIG_PAGING_LEVELS >= 4 if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L4_PAGETABLE_SHIFT - PAGE_SHIFT, L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) goto out; #endif /* * Try to allocate 1GB page table if this feature is supported. */ if ( page_order == PAGE_ORDER_1G ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE) : l3e_empty(); entry_content.l1 = l3e_content.l3; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 3); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } /* * When using PAE Xen, we only allow 33 bits of pseudo-physical * address in translated guests (i.e. 8 GBytes). This restriction * comes from wanting to map the P2M table into the 16MB RO_MPT hole * in Xen's address space for translated PV guests. * When using AMD's NPT on PAE Xen, we are restricted to 4GB. */ else if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, ((CONFIG_PAGING_LEVELS == 3) ? (hap_enabled(p2m->domain) ? 4 : 8) : L3_PAGETABLE_ENTRIES), PGT_l2_page_table) ) goto out; if ( page_order == PAGE_ORDER_4K ) { if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) goto out; p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 0, L1_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) || p2m_is_paging(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn)); else entry_content = l1e_empty(); if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } /* level 1 entry */ p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 1); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } else if ( page_order == PAGE_ORDER_2M ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES); ASSERT(p2m_entry); /* FIXME: Deal with 4k replaced by 2meg pages */ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); if ( mfn_valid(mfn) || p2m_is_magic(p2mt) ) l2e_content = l2e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE); else l2e_content = l2e_empty(); entry_content.l1 = l2e_content.l2; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 2); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } /* Track the highest gfn for which we have ever had a valid mapping */ if ( p2mt != p2m_invalid && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) ) p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1; if ( iommu_enabled && need_iommu(p2m->domain) ) { if ( iommu_hap_pt_share ) { if ( old_mfn && (old_mfn != mfn_x(mfn)) ) amd_iommu_flush_pages(p2m->domain, gfn, page_order); } else { if ( p2mt == p2m_ram_rw ) for ( i = 0; i < (1UL << page_order); i++ ) iommu_map_page(p2m->domain, gfn+i, mfn_x(mfn)+i, IOMMUF_readable|IOMMUF_writable); else for ( int i = 0; i < (1UL << page_order); i++ ) iommu_unmap_page(p2m->domain, gfn+i); } } /* Success */ rv = 1; out: unmap_domain_page(table); return rv; }
unsigned long hap_p2m_ga_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3, paddr_t ga, uint32_t *pfec, unsigned int *page_order) { uint32_t missing; mfn_t top_mfn; void *top_map; p2m_type_t p2mt; walk_t gw; unsigned long top_gfn; struct page_info *top_page; /* Get the top-level table's MFN */ top_gfn = cr3 >> PAGE_SHIFT; top_page = get_page_from_gfn_p2m(p2m->domain, p2m, top_gfn, &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; if ( top_page ) put_page(top_page); p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT); return gfn_x(INVALID_GFN); } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; if ( top_page ) put_page(top_page); return gfn_x(INVALID_GFN); } if ( !top_page ) { pfec[0] &= ~PFEC_page_present; goto out_tweak_pfec; } top_mfn = _mfn(page_to_mfn(top_page)); /* Map the top-level table and call the tree-walker */ ASSERT(mfn_valid(top_mfn)); top_map = map_domain_page(top_mfn); #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif missing = guest_walk_tables(v, p2m, ga, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); put_page(top_page); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_walk_to_gfn(&gw); struct page_info *page; page = get_page_from_gfn_p2m(p2m->domain, p2m, gfn_x(gfn), &p2mt, NULL, P2M_ALLOC | P2M_UNSHARE); if ( page ) put_page(page); if ( p2m_is_paging(p2mt) ) { ASSERT(p2m_is_hostp2m(p2m)); pfec[0] = PFEC_page_paged; p2m_mem_paging_populate(p2m->domain, gfn_x(gfn)); return gfn_x(INVALID_GFN); } if ( p2m_is_shared(p2mt) ) { pfec[0] = PFEC_page_shared; return gfn_x(INVALID_GFN); } if ( page_order ) *page_order = guest_walk_to_page_order(&gw); return gfn_x(gfn); } if ( missing & _PAGE_PRESENT ) pfec[0] &= ~PFEC_page_present; if ( missing & _PAGE_INVALID_BITS ) pfec[0] |= PFEC_reserved_bit; if ( missing & _PAGE_PKEY_BITS ) pfec[0] |= PFEC_prot_key; if ( missing & _PAGE_PAGED ) pfec[0] = PFEC_page_paged; if ( missing & _PAGE_SHARED ) pfec[0] = PFEC_page_shared; out_tweak_pfec: /* * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS: * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled. */ if ( !hvm_nx_enabled(v) && !hvm_smep_enabled(v) ) pfec[0] &= ~PFEC_insn_fetch; return gfn_x(INVALID_GFN); }
static mfn_t p2m_pt_get_entry(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order) { mfn_t mfn; paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; unsigned long l1e_flags; p2m_type_t l1t; ASSERT(paging_mode_translate(p2m->domain)); /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ *t = p2m_mmio_dm; /* Not implemented except with EPT */ *a = p2m_access_rwx; if ( gfn > p2m->max_mapped_pfn ) /* This pfn is higher than the highest the p2m map currently holds */ return _mfn(INVALID_MFN); mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); { l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); l4e += l4_table_offset(addr); if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) { unmap_domain_page(l4e); return _mfn(INVALID_MFN); } mfn = _mfn(l4e_get_pfn(*l4e)); unmap_domain_page(l4e); } { l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); l3e += l3_table_offset(addr); pod_retry_l3: if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) { if ( p2m_flags_to_type(l3e_get_flags(*l3e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__); } else *t = p2m_populate_on_demand; } unmap_domain_page(l3e); return _mfn(INVALID_MFN); } else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) ) { mfn = _mfn(l3e_get_pfn(*l3e) + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + l1_table_offset(addr)); *t = p2m_flags_to_type(l3e_get_flags(*l3e)); unmap_domain_page(l3e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_1G; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l3e_get_pfn(*l3e)); unmap_domain_page(l3e); } l2e = map_domain_page(mfn_x(mfn)); l2e += l2_table_offset(addr); pod_retry_l2: if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) { /* PoD: Try to populate a 2-meg chunk */ if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_2M, q) ) goto pod_retry_l2; } else *t = p2m_populate_on_demand; } unmap_domain_page(l2e); return _mfn(INVALID_MFN); } else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) ) { mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); *t = p2m_flags_to_type(l2e_get_flags(*l2e)); unmap_domain_page(l2e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); if ( page_order ) *page_order = PAGE_ORDER_2M; return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } mfn = _mfn(l2e_get_pfn(*l2e)); unmap_domain_page(l2e); l1e = map_domain_page(mfn_x(mfn)); l1e += l1_table_offset(addr); pod_retry_l1: l1e_flags = l1e_get_flags(*l1e); l1t = p2m_flags_to_type(l1e_flags); if ( ((l1e_flags & _PAGE_PRESENT) == 0) && (!p2m_is_paging(l1t)) ) { /* PoD: Try to populate */ if ( l1t == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) goto pod_retry_l1; } else *t = p2m_populate_on_demand; } unmap_domain_page(l1e); return _mfn(INVALID_MFN); } mfn = _mfn(l1e_get_pfn(*l1e)); *t = l1t; unmap_domain_page(l1e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t)); if ( page_order ) *page_order = PAGE_ORDER_4K; return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN); }
/* Returns: 0 for success, -errno for failure */ static int p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) { /* XXX -- this might be able to be faster iff current->domain == d */ mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); void *table = map_domain_page(mfn_x(table_mfn)); unsigned long i, gfn_remainder = gfn; l1_pgentry_t *p2m_entry; l1_pgentry_t entry_content; l2_pgentry_t l2e_content; l3_pgentry_t l3e_content; int rc; unsigned int iommu_pte_flags = (p2mt == p2m_ram_rw) ? IOMMUF_readable|IOMMUF_writable: 0; unsigned long old_mfn = 0; if ( tb_init_done ) { struct { u64 gfn, mfn; int p2mt; int d:16,order:16; } t; t.gfn = gfn; t.mfn = mfn_x(mfn); t.p2mt = p2mt; t.d = p2m->domain->domain_id; t.order = page_order; __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t); } rc = p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L4_PAGETABLE_SHIFT - PAGE_SHIFT, L4_PAGETABLE_ENTRIES, PGT_l3_page_table); if ( rc ) goto out; /* * Try to allocate 1GB page table if this feature is supported. */ if ( page_order == PAGE_ORDER_1G ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE) : l3e_empty(); entry_content.l1 = l3e_content.l3; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 3); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } else { rc = p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES, PGT_l2_page_table); if ( rc ) goto out; } if ( page_order == PAGE_ORDER_4K ) { rc = p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES, PGT_l1_page_table); if ( rc ) goto out; p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 0, L1_PAGETABLE_ENTRIES); ASSERT(p2m_entry); if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) || p2m_is_paging(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn)); else entry_content = l1e_empty(); if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } /* level 1 entry */ p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 1); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } else if ( page_order == PAGE_ORDER_2M ) { l1_pgentry_t old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES); ASSERT(p2m_entry); /* FIXME: Deal with 4k replaced by 2meg pages */ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { /* We're replacing a non-SP page with a superpage. Make sure to * handle freeing the table properly. */ old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); if ( mfn_valid(mfn) || p2m_is_pod(p2mt) ) l2e_content = l2e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE); else l2e_content = l2e_empty(); entry_content.l1 = l2e_content.l2; if ( entry_content.l1 != 0 ) { p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); old_mfn = l1e_get_pfn(*p2m_entry); } p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, entry_content, 2); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); } /* Track the highest gfn for which we have ever had a valid mapping */ if ( p2mt != p2m_invalid && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) ) p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1; if ( iommu_enabled && need_iommu(p2m->domain) ) { if ( iommu_hap_pt_share ) { if ( old_mfn && (old_mfn != mfn_x(mfn)) ) amd_iommu_flush_pages(p2m->domain, gfn, page_order); } else { if ( p2mt == p2m_ram_rw ) for ( i = 0; i < (1UL << page_order); i++ ) iommu_map_page(p2m->domain, gfn+i, mfn_x(mfn)+i, IOMMUF_readable|IOMMUF_writable); else for ( int i = 0; i < (1UL << page_order); i++ ) iommu_unmap_page(p2m->domain, gfn+i); } } out: unmap_domain_page(table); return rc; }
int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); put_gfn(d, gmfn); /* If the page hasn't yet been paged out, there is an * actual page that needs to be released. */ if ( p2mt == p2m_ram_paging_out ) { ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); } p2m_mem_paging_drop_page(d, gmfn, p2mt); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } #ifdef CONFIG_X86_64 if ( p2m_is_shared(p2mt) ) { /* Unshare the page, bail out on error. We unshare because * we might be the only one using this shared page, and we * need to trigger proper cleanup. Once done, this is * like any other page. */ if ( mem_sharing_unshare_page(d, gmfn, 0) ) { put_gfn(d, gmfn); (void)mem_sharing_notify_enomem(d, gmfn, 0); return 0; } /* Maybe the mfn changed */ mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt)); ASSERT(!p2m_is_shared(p2mt)); } #endif /* CONFIG_X86_64 */ page = mfn_to_page(mfn); if ( unlikely(!get_page(page, d)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); put_gfn(d, gmfn); return 1; }
int guest_physmap_add_entry(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int page_order, p2m_type_t t) { struct p2m_domain *p2m = p2m_get_hostp2m(d); unsigned long i, ogfn; p2m_type_t ot; p2m_access_t a; mfn_t omfn; int pod_count = 0; int rc = 0; if ( !paging_mode_translate(d) ) { if ( need_iommu(d) && t == p2m_ram_rw ) { for ( i = 0; i < (1 << page_order); i++ ) { rc = iommu_map_page( d, mfn + i, mfn + i, IOMMUF_readable|IOMMUF_writable); if ( rc != 0 ) { while ( i-- > 0 ) iommu_unmap_page(d, mfn + i); return rc; } } } return 0; } p2m_lock(p2m); P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); /* First, remove m->p mappings for existing p->m mappings */ for ( i = 0; i < (1UL << page_order); i++ ) { omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL); if ( p2m_is_shared(ot) ) { /* Do an unshare to cleanly take care of all corner * cases. */ int rc; rc = mem_sharing_unshare_page(p2m->domain, gfn + i, 0); if ( rc ) { p2m_unlock(p2m); /* NOTE: Should a guest domain bring this upon itself, * there is not a whole lot we can do. We are buried * deep in locks from most code paths by now. So, fail * the call and don't try to sleep on a wait queue * while placing the mem event. * * However, all current (changeset 3432abcf9380) code * paths avoid this unsavoury situation. For now. * * Foreign domains are okay to place an event as they * won't go to sleep. */ (void)mem_sharing_notify_enomem(p2m->domain, gfn + i, 0); return rc; } omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, 0, NULL); ASSERT(!p2m_is_shared(ot)); } if ( p2m_is_grant(ot) ) { /* Really shouldn't be unmapping grant maps this way */ domain_crash(d); p2m_unlock(p2m); return -EINVAL; } else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) ) { ASSERT(mfn_valid(omfn)); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } else if ( ot == p2m_populate_on_demand ) { /* Count how man PoD entries we'll be replacing if successful */ pod_count++; } else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) ) { /* We're plugging a hole in the physmap where a paged out page was */ atomic_dec(&d->paged_pages); } } /* Then, look for m->p mappings for this range and deal with them */ for ( i = 0; i < (1UL << page_order); i++ ) { if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) == dom_cow ) { /* This is no way to add a shared page to your physmap! */ gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom %hu " "physmap not allowed.\n", mfn+i, d->domain_id); p2m_unlock(p2m); return -EINVAL; } if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d ) continue; ogfn = mfn_to_gfn(d, _mfn(mfn+i)); if ( (ogfn != INVALID_M2P_ENTRY) && (ogfn != gfn + i) ) { /* This machine frame is already mapped at another physical * address */ P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", mfn + i, ogfn, gfn + i); omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL); if ( p2m_is_ram(ot) && !p2m_is_paged(ot) ) { ASSERT(mfn_valid(omfn)); P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", ogfn , mfn_x(omfn)); if ( mfn_x(omfn) == (mfn + i) ) p2m_remove_page(p2m, ogfn, mfn + i, 0); } } } /* Now, actually do the two-way mapping */ if ( mfn_valid(_mfn(mfn)) ) { if ( !set_p2m_entry(p2m, gfn, _mfn(mfn), page_order, t, p2m->default_access) ) { rc = -EINVAL; goto out; /* Failed to update p2m, bail without updating m2p. */ } if ( !p2m_is_grant(t) ) { for ( i = 0; i < (1UL << page_order); i++ ) set_gpfn_from_mfn(mfn+i, gfn+i); } } else { gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", gfn, mfn); if ( !set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid, p2m->default_access) ) rc = -EINVAL; else { pod_lock(p2m); p2m->pod.entry_count -= pod_count; BUG_ON(p2m->pod.entry_count < 0); pod_unlock(p2m); } } out: p2m_unlock(p2m); return rc; }