/* Allocate a new page table page and hook it in via the given entry */ static int p2m_create_table(struct domain *d, lpae_t *entry) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; void *p; lpae_t pte; BUG_ON(entry->p2m.valid); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); clear_page(p); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM); write_pte(entry, pte); return 0; }
/* Map the physical memory range start - start + len into virtual * memory and return the virtual address of the mapping. * start has to be 2MB aligned. * len has to be < EARLY_VMAP_VIRT_END - EARLY_VMAP_VIRT_START. */ void* __init early_ioremap(paddr_t start, size_t len, unsigned attributes) { static unsigned long virt_start = EARLY_VMAP_VIRT_START; unsigned long ret_addr = virt_start; paddr_t end = start + len; ASSERT(!(start & (~SECOND_MASK))); ASSERT(!(virt_start & (~SECOND_MASK))); /* The range we need to map is too big */ if ( virt_start + len >= EARLY_VMAP_VIRT_END ) return NULL; while ( start < end ) { lpae_t e = mfn_to_xen_entry(start >> PAGE_SHIFT); e.pt.ai = attributes; write_pte(xen_second + second_table_offset(virt_start), e); start += SECOND_SIZE; virt_start += SECOND_SIZE; } flush_xen_data_tlb_range_va(ret_addr, len); return (void*)ret_addr; }
/* Create Xen's mappings of memory. * Mapping_size must be either 2MB or 32MB. * Base and virt must be mapping_size aligned. * Size must be a multiple of mapping_size. * second must be a contiguous set of second level page tables * covering the region starting at virt_offset. */ static void __init create_mappings(lpae_t *second, unsigned long virt_offset, unsigned long base_mfn, unsigned long nr_mfns, unsigned int mapping_size) { unsigned long i, count; const unsigned long granularity = mapping_size >> PAGE_SHIFT; lpae_t pte, *p; ASSERT((mapping_size == MB(2)) || (mapping_size == MB(32))); ASSERT(!((virt_offset >> PAGE_SHIFT) % granularity)); ASSERT(!(base_mfn % granularity)); ASSERT(!(nr_mfns % granularity)); count = nr_mfns / LPAE_ENTRIES; p = second + second_linear_offset(virt_offset); pte = mfn_to_xen_entry(base_mfn, WRITEALLOC); if ( granularity == 16 * LPAE_ENTRIES ) pte.pt.contig = 1; /* These maps are in 16-entry contiguous chunks. */ for ( i = 0; i < count; i++ ) { write_pte(p + i, pte); pte.pt.base += 1 << LPAE_SHIFT; } flush_xen_data_tlb_local(); }
/* Map a 4k page in a fixmap entry */ void set_fixmap(unsigned map, unsigned long mfn, unsigned attributes) { lpae_t pte = mfn_to_xen_entry(mfn, attributes); pte.pt.table = 1; /* 4k mappings always have this bit set */ pte.pt.xn = 1; write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte); flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE); }
/* * Allocate a new page table page and hook it in via the given entry. * apply_one_level relies on this returning 0 on success * and -ve on failure. * * If the existing entry is present then it must be a mapping and not * a table and it will be shattered into the next level down. * * level_shift is the number of bits at the level we want to create. */ static int p2m_create_table(struct domain *d, lpae_t *entry, int level_shift, bool_t flush_cache) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; lpae_t *p; lpae_t pte; int splitting = p2m_valid(*entry); BUG_ON(p2m_table(*entry)); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); if ( splitting ) { p2m_type_t t = entry->p2m.type; unsigned long base_pfn = entry->p2m.base; int i; /* * We are either splitting a first level 1G page into 512 second level * 2M pages, or a second level 2M page into 512 third level 4K pages. */ for ( i=0 ; i < LPAE_ENTRIES; i++ ) { pte = mfn_to_p2m_entry(base_pfn + (i<<(level_shift-LPAE_SHIFT)), MATTR_MEM, t, p2m->default_access); /* * First and second level super pages set p2m.table = 0, but * third level entries set table = 1. */ if ( level_shift - LPAE_SHIFT ) pte.p2m.table = 0; write_pte(&p[i], pte); } } else clear_page(p); if ( flush_cache ) clean_dcache_va_range(p, PAGE_SIZE); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid, p2m->default_access); p2m_write_pte(entry, pte, flush_cache); return 0; }
static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr, p2m_type_t t, p2m_access_t a) { paddr_t pa = ((paddr_t) mfn) << PAGE_SHIFT; /* sh, xn and write bit will be defined in the following switches * based on mattr and t. */ lpae_t e = (lpae_t) { .p2m.af = 1, .p2m.read = 1, .p2m.mattr = mattr, .p2m.table = 1, .p2m.valid = 1, .p2m.type = t, }; BUILD_BUG_ON(p2m_max_real_type > (1 << 4)); switch (mattr) { case MATTR_MEM: e.p2m.sh = LPAE_SH_INNER; break; case MATTR_DEV: e.p2m.sh = LPAE_SH_OUTER; break; default: BUG(); break; } p2m_set_permission(&e, t, a); ASSERT(!(pa & ~PAGE_MASK)); ASSERT(!(pa & ~PADDR_MASK)); e.bits |= pa; return e; } static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool_t flush_cache) { write_pte(p, pte); if ( flush_cache ) clean_dcache(*p); }
static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg) { lpae_t pte; int i; ASSERT(is_kernel(p) && is_kernel(p + l)); /* Can only guard in page granularity */ ASSERT(!((unsigned long) p & ~PAGE_MASK)); ASSERT(!(l & ~PAGE_MASK)); for ( i = (p - _start) / PAGE_SIZE; i < (p + l - _start) / PAGE_SIZE; i++ ) { pte = xen_xenmap[i]; switch ( mg ) { case mg_clear: pte.pt.valid = 0; break; case mg_ro: pte.pt.valid = 1; pte.pt.pxn = 1; pte.pt.xn = 1; pte.pt.ro = 1; break; case mg_rw: pte.pt.valid = 1; pte.pt.pxn = 1; pte.pt.xn = 1; pte.pt.ro = 0; break; case mg_rx: pte.pt.valid = 1; pte.pt.pxn = 0; pte.pt.xn = 0; pte.pt.ro = 1; break; } write_pte(xen_xenmap + i, pte); } flush_xen_text_tlb(); }
/* Create Xen's mappings of memory. * Base and virt must be 32MB aligned and size a multiple of 32MB. */ static void __init create_mappings(unsigned long virt, unsigned long base_mfn, unsigned long nr_mfns) { unsigned long i, count; lpae_t pte, *p; ASSERT(!((virt >> PAGE_SHIFT) % (16 * LPAE_ENTRIES))); ASSERT(!(base_mfn % (16 * LPAE_ENTRIES))); ASSERT(!(nr_mfns % (16 * LPAE_ENTRIES))); count = nr_mfns / LPAE_ENTRIES; p = xen_second + second_linear_offset(virt); pte = mfn_to_xen_entry(base_mfn); pte.pt.hint = 1; /* These maps are in 16-entry contiguous chunks. */ for ( i = 0; i < count; i++ ) { write_pte(p + i, pte); pte.pt.base += 1 << LPAE_SHIFT; } flush_xen_data_tlb(); }
static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr, p2m_type_t t) { paddr_t pa = ((paddr_t) mfn) << PAGE_SHIFT; /* xn and write bit will be defined in the switch */ lpae_t e = (lpae_t) { .p2m.af = 1, .p2m.sh = LPAE_SH_OUTER, .p2m.read = 1, .p2m.mattr = mattr, .p2m.table = 1, .p2m.valid = 1, .p2m.type = t, }; BUILD_BUG_ON(p2m_max_real_type > (1 << 4)); switch (t) { case p2m_ram_rw: e.p2m.xn = 0; e.p2m.write = 1; break; case p2m_ram_ro: e.p2m.xn = 0; e.p2m.write = 0; break; case p2m_map_foreign: case p2m_grant_map_rw: case p2m_mmio_direct: e.p2m.xn = 1; e.p2m.write = 1; break; case p2m_grant_map_ro: case p2m_invalid: e.p2m.xn = 1; e.p2m.write = 0; break; case p2m_max_real_type: BUG(); break; } ASSERT(!(pa & ~PAGE_MASK)); ASSERT(!(pa & ~PADDR_MASK)); e.bits |= pa; return e; } /* Allocate a new page table page and hook it in via the given entry */ static int p2m_create_table(struct domain *d, lpae_t *entry) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; void *p; lpae_t pte; BUG_ON(entry->p2m.valid); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); clear_page(p); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); write_pte(entry, pte); return 0; } enum p2m_operation { INSERT, ALLOCATE, REMOVE, RELINQUISH, CACHEFLUSH, }; static int apply_p2m_changes(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr, p2m_type_t t) { int rc; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_page = ~0, cur_first_offset = ~0, cur_second_offset = ~0; unsigned long count = 0; unsigned int flush = 0; bool_t populate = (op == INSERT || op == ALLOCATE); lpae_t pte; spin_lock(&p2m->lock); if ( d != current->domain ) p2m_load_VTTBR(d); addr = start_gpaddr; while ( addr < end_gpaddr ) { if ( cur_first_page != p2m_first_level_index(addr) ) { if ( first ) unmap_domain_page(first); first = p2m_map_first(p2m, addr); if ( !first ) { rc = -EINVAL; goto out; } cur_first_page = p2m_first_level_index(addr); } if ( !first[first_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + FIRST_SIZE) & FIRST_MASK; continue; } rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + SECOND_SIZE) & SECOND_MASK; continue; } rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } pte = third[third_table_offset(addr)]; flush |= pte.p2m.valid; /* TODO: Handle other p2m type * * It's safe to do the put_page here because page_alloc will * flush the TLBs if the page is reallocated before the end of * this loop. */ if ( pte.p2m.valid && p2m_is_foreign(pte.p2m.type) ) { unsigned long mfn = pte.p2m.base; ASSERT(mfn_valid(mfn)); put_page(mfn_to_page(mfn)); } /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; ASSERT(!pte.p2m.valid); rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case RELINQUISH: case REMOVE: { if ( !pte.p2m.valid ) { count++; break; } count += 0x10; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); count++; } break; case CACHEFLUSH: { if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) ) break; flush_page_to_ram(pte.p2m.base); } break; } /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */ if ( op == RELINQUISH && count >= 0x2000 ) { if ( hypercall_preempt_check() ) { p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; rc = -EAGAIN; goto out; } count = 0; } /* Got the next page */ addr += PAGE_SIZE; } if ( flush ) { /* At the beginning of the function, Xen is updating VTTBR * with the domain where the mappings are created. In this * case it's only necessary to flush TLBs on every CPUs with * the current VMID (our domain). */ flush_tlb(); } if ( op == ALLOCATE || op == INSERT ) { unsigned long sgfn = paddr_to_pfn(start_gpaddr); unsigned long egfn = paddr_to_pfn(end_gpaddr); p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn); p2m->lowest_mapped_gfn = MIN(p2m->lowest_mapped_gfn, sgfn); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); if ( d != current->domain ) p2m_load_VTTBR(current->domain); spin_unlock(&p2m->lock); return rc; }
static int create_p2m_entries(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr) { int rc, flush; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_offset = ~0, cur_second_offset = ~0; spin_lock(&p2m->lock); /* XXX Don't actually handle 40 bit guest physical addresses */ BUG_ON(start_gpaddr & 0x8000000000ULL); BUG_ON(end_gpaddr & 0x8000000000ULL); first = __map_domain_page(p2m->first_level); for(addr = start_gpaddr; addr < end_gpaddr; addr += PAGE_SIZE) { if ( !first[first_table_offset(addr)].p2m.valid ) { rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } flush = third[third_table_offset(addr)].p2m.valid; /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; lpae_t pte; rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { lpae_t pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case REMOVE: { lpae_t pte; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; } if ( flush ) flush_tlb_all_local(); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); spin_unlock(&p2m->lock); return rc; }
/* Map a page of domheap memory */ void *map_domain_page(mfn_t mfn) { unsigned long flags; lpae_t *map = this_cpu(xen_dommap); unsigned long slot_mfn = mfn_x(mfn) & ~LPAE_ENTRY_MASK; vaddr_t va; lpae_t pte; int i, slot; local_irq_save(flags); /* The map is laid out as an open-addressed hash table where each * entry is a 2MB superpage pte. We use the available bits of each * PTE as a reference count; when the refcount is zero the slot can * be reused. */ for ( slot = (slot_mfn >> LPAE_SHIFT) % DOMHEAP_ENTRIES, i = 0; i < DOMHEAP_ENTRIES; slot = (slot + 1) % DOMHEAP_ENTRIES, i++ ) { if ( map[slot].pt.avail < 0xf && map[slot].pt.base == slot_mfn && map[slot].pt.valid ) { /* This slot already points to the right place; reuse it */ map[slot].pt.avail++; break; } else if ( map[slot].pt.avail == 0 ) { /* Commandeer this 2MB slot */ pte = mfn_to_xen_entry(slot_mfn, WRITEALLOC); pte.pt.avail = 1; write_pte(map + slot, pte); break; } } /* If the map fills up, the callers have misbehaved. */ BUG_ON(i == DOMHEAP_ENTRIES); #ifndef NDEBUG /* Searching the hash could get slow if the map starts filling up. * Cross that bridge when we come to it */ { static int max_tries = 32; if ( i >= max_tries ) { dprintk(XENLOG_WARNING, "Domheap map is filling: %i tries\n", i); max_tries *= 2; } } #endif local_irq_restore(flags); va = (DOMHEAP_VIRT_START + (slot << SECOND_SHIFT) + ((mfn_x(mfn) & LPAE_ENTRY_MASK) << THIRD_SHIFT)); /* * We may not have flushed this specific subpage at map time, * since we only flush the 4k page not the superpage */ flush_xen_data_tlb_range_va_local(va, PAGE_SIZE); return (void *)va; }
/* Remove a mapping from a fixmap entry */ void clear_fixmap(unsigned map) { lpae_t pte = {0}; write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte); flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE); }
/* Boot-time pagetable setup. * Changes here may need matching changes in head.S */ void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t xen_paddr) { unsigned long dest_va; lpae_t pte, *p; int i; /* Map the destination in the boot misc area. */ dest_va = BOOT_MISC_VIRT_START; pte = mfn_to_xen_entry(xen_paddr >> PAGE_SHIFT); write_pte(xen_second + second_table_offset(dest_va), pte); flush_xen_data_tlb_range_va(dest_va, SECOND_SIZE); /* Calculate virt-to-phys offset for the new location */ phys_offset = xen_paddr - (unsigned long) _start; /* Copy */ memcpy((void *) dest_va, _start, _end - _start); /* Beware! Any state we modify between now and the PT switch may be * discarded when we switch over to the copy. */ /* Update the copy of xen_pgtable to use the new paddrs */ p = (void *) xen_pgtable + dest_va - (unsigned long) _start; #ifdef CONFIG_ARM_64 p[0].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; p = (void *) xen_first + dest_va - (unsigned long) _start; #endif for ( i = 0; i < 4; i++) p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; p = (void *) xen_second + dest_va - (unsigned long) _start; if ( boot_phys_offset != 0 ) { /* Remove the old identity mapping of the boot paddr */ vaddr_t va = (vaddr_t)_start + boot_phys_offset; p[second_linear_offset(va)].bits = 0; } for ( i = 0; i < 4 * LPAE_ENTRIES; i++) if ( p[i].pt.valid ) p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; /* Change pagetables to the copy in the relocated Xen */ boot_ttbr = (uintptr_t) xen_pgtable + phys_offset; flush_xen_dcache(boot_ttbr); flush_xen_dcache_va_range((void*)dest_va, _end - _start); flush_xen_text_tlb(); WRITE_SYSREG64(boot_ttbr, TTBR0_EL2); dsb(); /* Ensure visibility of HTTBR update */ flush_xen_text_tlb(); /* Undo the temporary map */ pte.bits = 0; write_pte(xen_second + second_table_offset(dest_va), pte); flush_xen_text_tlb(); /* Link in the fixmap pagetable */ pte = mfn_to_xen_entry((((unsigned long) xen_fixmap) + phys_offset) >> PAGE_SHIFT); pte.pt.table = 1; write_pte(xen_second + second_table_offset(FIXMAP_ADDR(0)), pte); /* * No flush required here. Individual flushes are done in * set_fixmap as entries are used. */ /* Break up the Xen mapping into 4k pages and protect them separately. */ for ( i = 0; i < LPAE_ENTRIES; i++ ) { unsigned long mfn = paddr_to_pfn(xen_paddr) + i; unsigned long va = XEN_VIRT_START + (i << PAGE_SHIFT); if ( !is_kernel(va) ) break; pte = mfn_to_xen_entry(mfn); pte.pt.table = 1; /* 4k mappings always have this bit set */ if ( is_kernel_text(va) || is_kernel_inittext(va) ) { pte.pt.xn = 0; pte.pt.ro = 1; } if ( is_kernel_rodata(va) ) pte.pt.ro = 1; write_pte(xen_xenmap + i, pte); /* No flush required here as page table is not hooked in yet. */ } pte = mfn_to_xen_entry((((unsigned long) xen_xenmap) + phys_offset) >> PAGE_SHIFT); pte.pt.table = 1; write_pte(xen_second + second_linear_offset(XEN_VIRT_START), pte); /* TLBFLUSH and ISB would be needed here, but wait until we set WXN */ /* From now on, no mapping may be both writable and executable. */ WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2); /* Flush everything after setting WXN bit. */ flush_xen_text_tlb(); }