void dump_pt_walk(lpae_t *first, paddr_t addr) { lpae_t *second = NULL, *third = NULL; if ( first_table_offset(addr) >= LPAE_ENTRIES ) return; printk("1ST[0x%x] = 0x%"PRIpaddr"\n", first_table_offset(addr), first[first_table_offset(addr)].bits); if ( !first[first_table_offset(addr)].walk.valid || !first[first_table_offset(addr)].walk.table ) goto done; second = map_domain_page(first[first_table_offset(addr)].walk.base); printk("2ND[0x%x] = 0x%"PRIpaddr"\n", second_table_offset(addr), second[second_table_offset(addr)].bits); if ( !second[second_table_offset(addr)].walk.valid || !second[second_table_offset(addr)].walk.table ) goto done; third = map_domain_page(second[second_table_offset(addr)].walk.base); printk("3RD[0x%x] = 0x%"PRIpaddr"\n", third_table_offset(addr), third[third_table_offset(addr)].bits); done: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); }
/* Map the physical memory range start - start + len into virtual * memory and return the virtual address of the mapping. * start has to be 2MB aligned. * len has to be < EARLY_VMAP_VIRT_END - EARLY_VMAP_VIRT_START. */ void* __init early_ioremap(paddr_t start, size_t len, unsigned attributes) { static unsigned long virt_start = EARLY_VMAP_VIRT_START; unsigned long ret_addr = virt_start; paddr_t end = start + len; ASSERT(!(start & (~SECOND_MASK))); ASSERT(!(virt_start & (~SECOND_MASK))); /* The range we need to map is too big */ if ( virt_start + len >= EARLY_VMAP_VIRT_END ) return NULL; while ( start < end ) { lpae_t e = mfn_to_xen_entry(start >> PAGE_SHIFT); e.pt.ai = attributes; write_pte(xen_second + second_table_offset(virt_start), e); start += SECOND_SIZE; virt_start += SECOND_SIZE; } flush_xen_data_tlb_range_va(ret_addr, len); return (void*)ret_addr; }
/* * Lookup the MFN corresponding to a domain's PFN. * * There are no processor functions to do a stage 2 only lookup therefore we * do a a software walk. */ paddr_t p2m_lookup(struct domain *d, paddr_t paddr, p2m_type_t *t) { struct p2m_domain *p2m = &d->arch.p2m; lpae_t pte, *first = NULL, *second = NULL, *third = NULL; paddr_t maddr = INVALID_PADDR; p2m_type_t _t; /* Allow t to be NULL */ t = t ?: &_t; *t = p2m_invalid; spin_lock(&p2m->lock); first = p2m_map_first(p2m, paddr); if ( !first ) goto err; pte = first[first_table_offset(paddr)]; if ( !pte.p2m.valid || !pte.p2m.table ) goto done; second = map_domain_page(pte.p2m.base); pte = second[second_table_offset(paddr)]; if ( !pte.p2m.valid || !pte.p2m.table ) goto done; third = map_domain_page(pte.p2m.base); pte = third[third_table_offset(paddr)]; /* This bit must be one in the level 3 entry */ if ( !pte.p2m.table ) pte.bits = 0; done: if ( pte.p2m.valid ) { ASSERT(pte.p2m.type != p2m_invalid); maddr = (pte.bits & PADDR_MASK & PAGE_MASK) | (paddr & ~PAGE_MASK); *t = pte.p2m.type; } if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); err: spin_unlock(&p2m->lock); return maddr; }
/* * Lookup the MFN corresponding to a domain's PFN. * * There are no processor functions to do a stage 2 only lookup therefore we * do a a software walk. */ paddr_t p2m_lookup(struct domain *d, paddr_t paddr) { struct p2m_domain *p2m = &d->arch.p2m; lpae_t pte, *first = NULL, *second = NULL, *third = NULL; paddr_t maddr = INVALID_PADDR; spin_lock(&p2m->lock); first = __map_domain_page(p2m->first_level); pte = first[first_table_offset(paddr)]; if ( !pte.p2m.valid || !pte.p2m.table ) goto done; second = map_domain_page(pte.p2m.base); pte = second[second_table_offset(paddr)]; if ( !pte.p2m.valid || !pte.p2m.table ) goto done; third = map_domain_page(pte.p2m.base); pte = third[third_table_offset(paddr)]; /* This bit must be one in the level 3 entry */ if ( !pte.p2m.table ) pte.bits = 0; done: if ( pte.p2m.valid ) maddr = (pte.bits & PADDR_MASK & PAGE_MASK) | (paddr & ~PAGE_MASK); if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); spin_unlock(&p2m->lock); return maddr; }
static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr, p2m_type_t t) { paddr_t pa = ((paddr_t) mfn) << PAGE_SHIFT; /* xn and write bit will be defined in the switch */ lpae_t e = (lpae_t) { .p2m.af = 1, .p2m.sh = LPAE_SH_OUTER, .p2m.read = 1, .p2m.mattr = mattr, .p2m.table = 1, .p2m.valid = 1, .p2m.type = t, }; BUILD_BUG_ON(p2m_max_real_type > (1 << 4)); switch (t) { case p2m_ram_rw: e.p2m.xn = 0; e.p2m.write = 1; break; case p2m_ram_ro: e.p2m.xn = 0; e.p2m.write = 0; break; case p2m_map_foreign: case p2m_grant_map_rw: case p2m_mmio_direct: e.p2m.xn = 1; e.p2m.write = 1; break; case p2m_grant_map_ro: case p2m_invalid: e.p2m.xn = 1; e.p2m.write = 0; break; case p2m_max_real_type: BUG(); break; } ASSERT(!(pa & ~PAGE_MASK)); ASSERT(!(pa & ~PADDR_MASK)); e.bits |= pa; return e; } /* Allocate a new page table page and hook it in via the given entry */ static int p2m_create_table(struct domain *d, lpae_t *entry) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; void *p; lpae_t pte; BUG_ON(entry->p2m.valid); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); clear_page(p); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); write_pte(entry, pte); return 0; } enum p2m_operation { INSERT, ALLOCATE, REMOVE, RELINQUISH, CACHEFLUSH, }; static int apply_p2m_changes(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr, p2m_type_t t) { int rc; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_page = ~0, cur_first_offset = ~0, cur_second_offset = ~0; unsigned long count = 0; unsigned int flush = 0; bool_t populate = (op == INSERT || op == ALLOCATE); lpae_t pte; spin_lock(&p2m->lock); if ( d != current->domain ) p2m_load_VTTBR(d); addr = start_gpaddr; while ( addr < end_gpaddr ) { if ( cur_first_page != p2m_first_level_index(addr) ) { if ( first ) unmap_domain_page(first); first = p2m_map_first(p2m, addr); if ( !first ) { rc = -EINVAL; goto out; } cur_first_page = p2m_first_level_index(addr); } if ( !first[first_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + FIRST_SIZE) & FIRST_MASK; continue; } rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + SECOND_SIZE) & SECOND_MASK; continue; } rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } pte = third[third_table_offset(addr)]; flush |= pte.p2m.valid; /* TODO: Handle other p2m type * * It's safe to do the put_page here because page_alloc will * flush the TLBs if the page is reallocated before the end of * this loop. */ if ( pte.p2m.valid && p2m_is_foreign(pte.p2m.type) ) { unsigned long mfn = pte.p2m.base; ASSERT(mfn_valid(mfn)); put_page(mfn_to_page(mfn)); } /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; ASSERT(!pte.p2m.valid); rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case RELINQUISH: case REMOVE: { if ( !pte.p2m.valid ) { count++; break; } count += 0x10; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); count++; } break; case CACHEFLUSH: { if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) ) break; flush_page_to_ram(pte.p2m.base); } break; } /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */ if ( op == RELINQUISH && count >= 0x2000 ) { if ( hypercall_preempt_check() ) { p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; rc = -EAGAIN; goto out; } count = 0; } /* Got the next page */ addr += PAGE_SIZE; } if ( flush ) { /* At the beginning of the function, Xen is updating VTTBR * with the domain where the mappings are created. In this * case it's only necessary to flush TLBs on every CPUs with * the current VMID (our domain). */ flush_tlb(); } if ( op == ALLOCATE || op == INSERT ) { unsigned long sgfn = paddr_to_pfn(start_gpaddr); unsigned long egfn = paddr_to_pfn(end_gpaddr); p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn); p2m->lowest_mapped_gfn = MIN(p2m->lowest_mapped_gfn, sgfn); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); if ( d != current->domain ) p2m_load_VTTBR(current->domain); spin_unlock(&p2m->lock); return rc; }
static int create_p2m_entries(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr) { int rc, flush; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_offset = ~0, cur_second_offset = ~0; spin_lock(&p2m->lock); /* XXX Don't actually handle 40 bit guest physical addresses */ BUG_ON(start_gpaddr & 0x8000000000ULL); BUG_ON(end_gpaddr & 0x8000000000ULL); first = __map_domain_page(p2m->first_level); for(addr = start_gpaddr; addr < end_gpaddr; addr += PAGE_SIZE) { if ( !first[first_table_offset(addr)].p2m.valid ) { rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } flush = third[third_table_offset(addr)].p2m.valid; /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; lpae_t pte; rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { lpae_t pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case REMOVE: { lpae_t pte; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; } if ( flush ) flush_tlb_all_local(); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); spin_unlock(&p2m->lock); return rc; }
void dump_pt_walk(paddr_t ttbr, paddr_t addr, unsigned int root_level, unsigned int nr_root_tables) { static const char *level_strs[4] = { "0TH", "1ST", "2ND", "3RD" }; const unsigned long root_pfn = paddr_to_pfn(ttbr); const unsigned int offsets[4] = { zeroeth_table_offset(addr), first_table_offset(addr), second_table_offset(addr), third_table_offset(addr) }; lpae_t pte, *mapping; unsigned int level, root_table; #ifdef CONFIG_ARM_32 BUG_ON(root_level < 1); #endif BUG_ON(root_level > 3); if ( nr_root_tables > 1 ) { /* * Concatenated root-level tables. The table number will be * the offset at the previous level. It is not possible to * concatenate a level-0 root. */ BUG_ON(root_level == 0); root_table = offsets[root_level - 1]; printk("Using concatenated root table %u\n", root_table); if ( root_table >= nr_root_tables ) { printk("Invalid root table offset\n"); return; } } else root_table = 0; mapping = map_domain_page(_mfn(root_pfn + root_table)); for ( level = root_level; ; level++ ) { if ( offsets[level] > LPAE_ENTRIES ) break; pte = mapping[offsets[level]]; printk("%s[0x%x] = 0x%"PRIpaddr"\n", level_strs[level], offsets[level], pte.bits); if ( level == 3 || !pte.walk.valid || !pte.walk.table ) break; /* For next iteration */ unmap_domain_page(mapping); mapping = map_domain_page(_mfn(pte.walk.base)); } unmap_domain_page(mapping); }
/* Boot-time pagetable setup. * Changes here may need matching changes in head.S */ void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t xen_paddr) { unsigned long dest_va; lpae_t pte, *p; int i; /* Map the destination in the boot misc area. */ dest_va = BOOT_MISC_VIRT_START; pte = mfn_to_xen_entry(xen_paddr >> PAGE_SHIFT); write_pte(xen_second + second_table_offset(dest_va), pte); flush_xen_data_tlb_range_va(dest_va, SECOND_SIZE); /* Calculate virt-to-phys offset for the new location */ phys_offset = xen_paddr - (unsigned long) _start; /* Copy */ memcpy((void *) dest_va, _start, _end - _start); /* Beware! Any state we modify between now and the PT switch may be * discarded when we switch over to the copy. */ /* Update the copy of xen_pgtable to use the new paddrs */ p = (void *) xen_pgtable + dest_va - (unsigned long) _start; #ifdef CONFIG_ARM_64 p[0].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; p = (void *) xen_first + dest_va - (unsigned long) _start; #endif for ( i = 0; i < 4; i++) p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; p = (void *) xen_second + dest_va - (unsigned long) _start; if ( boot_phys_offset != 0 ) { /* Remove the old identity mapping of the boot paddr */ vaddr_t va = (vaddr_t)_start + boot_phys_offset; p[second_linear_offset(va)].bits = 0; } for ( i = 0; i < 4 * LPAE_ENTRIES; i++) if ( p[i].pt.valid ) p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; /* Change pagetables to the copy in the relocated Xen */ boot_ttbr = (uintptr_t) xen_pgtable + phys_offset; flush_xen_dcache(boot_ttbr); flush_xen_dcache_va_range((void*)dest_va, _end - _start); flush_xen_text_tlb(); WRITE_SYSREG64(boot_ttbr, TTBR0_EL2); dsb(); /* Ensure visibility of HTTBR update */ flush_xen_text_tlb(); /* Undo the temporary map */ pte.bits = 0; write_pte(xen_second + second_table_offset(dest_va), pte); flush_xen_text_tlb(); /* Link in the fixmap pagetable */ pte = mfn_to_xen_entry((((unsigned long) xen_fixmap) + phys_offset) >> PAGE_SHIFT); pte.pt.table = 1; write_pte(xen_second + second_table_offset(FIXMAP_ADDR(0)), pte); /* * No flush required here. Individual flushes are done in * set_fixmap as entries are used. */ /* Break up the Xen mapping into 4k pages and protect them separately. */ for ( i = 0; i < LPAE_ENTRIES; i++ ) { unsigned long mfn = paddr_to_pfn(xen_paddr) + i; unsigned long va = XEN_VIRT_START + (i << PAGE_SHIFT); if ( !is_kernel(va) ) break; pte = mfn_to_xen_entry(mfn); pte.pt.table = 1; /* 4k mappings always have this bit set */ if ( is_kernel_text(va) || is_kernel_inittext(va) ) { pte.pt.xn = 0; pte.pt.ro = 1; } if ( is_kernel_rodata(va) ) pte.pt.ro = 1; write_pte(xen_xenmap + i, pte); /* No flush required here as page table is not hooked in yet. */ } pte = mfn_to_xen_entry((((unsigned long) xen_xenmap) + phys_offset) >> PAGE_SHIFT); pte.pt.table = 1; write_pte(xen_second + second_linear_offset(XEN_VIRT_START), pte); /* TLBFLUSH and ISB would be needed here, but wait until we set WXN */ /* From now on, no mapping may be both writable and executable. */ WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2); /* Flush everything after setting WXN bit. */ flush_xen_text_tlb(); }
/* * Lookup the MFN corresponding to a domain's PFN. * * There are no processor functions to do a stage 2 only lookup therefore we * do a a software walk. */ static paddr_t __p2m_lookup(struct domain *d, paddr_t paddr, p2m_type_t *t) { struct p2m_domain *p2m = &d->arch.p2m; const unsigned int offsets[4] = { zeroeth_table_offset(paddr), first_table_offset(paddr), second_table_offset(paddr), third_table_offset(paddr) }; const paddr_t masks[4] = { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK }; lpae_t pte, *map; paddr_t maddr = INVALID_PADDR; paddr_t mask = 0; p2m_type_t _t; unsigned int level, root_table; ASSERT(spin_is_locked(&p2m->lock)); BUILD_BUG_ON(THIRD_MASK != PAGE_MASK); /* Allow t to be NULL */ t = t ?: &_t; *t = p2m_invalid; if ( P2M_ROOT_PAGES > 1 ) { /* * Concatenated root-level tables. The table number will be * the offset at the previous level. It is not possible to * concatenate a level-0 root. */ ASSERT(P2M_ROOT_LEVEL > 0); root_table = offsets[P2M_ROOT_LEVEL - 1]; if ( root_table >= P2M_ROOT_PAGES ) goto err; } else root_table = 0; map = __map_domain_page(p2m->root + root_table); ASSERT(P2M_ROOT_LEVEL < 4); for ( level = P2M_ROOT_LEVEL ; level < 4 ; level++ ) { mask = masks[level]; pte = map[offsets[level]]; if ( level == 3 && !p2m_table(pte) ) /* Invalid, clobber the pte */ pte.bits = 0; if ( level == 3 || !p2m_table(pte) ) /* Done */ break; ASSERT(level < 3); /* Map for next level */ unmap_domain_page(map); map = map_domain_page(_mfn(pte.p2m.base)); } unmap_domain_page(map); if ( p2m_valid(pte) ) { ASSERT(mask); ASSERT(pte.p2m.type != p2m_invalid); maddr = (pte.bits & PADDR_MASK & mask) | (paddr & ~mask); *t = pte.p2m.type; } err: return maddr; }