/* Creates MMIO mappings base..end as well as 4 SPIs from the given base. */ static int xgene_storm_pcie_specific_mapping(struct domain *d, const struct dt_device_node *node, paddr_t base, paddr_t end, int base_spi) { int ret; printk("Mapping additional regions for PCIe device %s\n", dt_node_full_name(node)); /* Map the PCIe bus resources */ ret = map_one_mmio(d, "PCI MEMORY", paddr_to_pfn(base), paddr_to_pfn(end)); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTA", base_spi+0, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTB", base_spi+1, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTC", base_spi+2, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTD", base_spi+3, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = 0; err: return ret; }
/* Additional mappings for dom0 (Not in the DTS) */ static int exynos5250_specific_mapping(struct domain *d) { /* Map the chip ID */ map_mmio_regions(d, paddr_to_pfn(EXYNOS5_PA_CHIPID), 1, paddr_to_pfn(EXYNOS5_PA_CHIPID)); /* Map the PWM region */ map_mmio_regions(d, paddr_to_pfn(EXYNOS5_PA_TIMER), 2, paddr_to_pfn(EXYNOS5_PA_TIMER)); return 0; }
void __init vga_init(void) { char *p; /* Look for 'keep' in comma-separated options. */ for ( p = opt_vga; p != NULL; p = strchr(p, ',') ) { if ( *p == ',' ) p++; if ( strncmp(p, "keep", 4) == 0 ) vgacon_keep = 1; } switch ( vga_console_info.video_type ) { case XEN_VGATYPE_TEXT_MODE_3: if ( page_is_ram_type(paddr_to_pfn(0xB8000), RAM_TYPE_CONVENTIONAL) || ((video = ioremap(0xB8000, 0x8000)) == NULL) ) return; outw(0x200a, 0x3d4); /* disable cursor */ columns = vga_console_info.u.text_mode_3.columns; lines = vga_console_info.u.text_mode_3.rows; memset(video, 0, columns * lines * 2); vga_puts = vga_text_puts; break; case XEN_VGATYPE_VESA_LFB: case XEN_VGATYPE_EFI_LFB: vesa_early_init(); break; default: memset(&vga_console_info, 0, sizeof(vga_console_info)); break; } }
/* Map the FDT in the early boot page table */ void * __init early_fdt_map(paddr_t fdt_paddr) { /* We are using 2MB superpage for mapping the FDT */ paddr_t base_paddr = fdt_paddr & SECOND_MASK; paddr_t offset; void *fdt_virt; uint32_t size; /* * Check whether the physical FDT address is set and meets the minimum * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be at * least 8 bytes so that we always access the magic and size fields * of the FDT header after mapping the first chunk, double check if * that is indeed the case. */ BUILD_BUG_ON(MIN_FDT_ALIGN < 8); if ( !fdt_paddr || fdt_paddr % MIN_FDT_ALIGN ) return NULL; /* The FDT is mapped using 2MB superpage */ BUILD_BUG_ON(BOOT_FDT_VIRT_START % SZ_2M); create_mappings(boot_second, BOOT_FDT_VIRT_START, paddr_to_pfn(base_paddr), SZ_2M >> PAGE_SHIFT, SZ_2M); offset = fdt_paddr % SECOND_SIZE; fdt_virt = (void *)BOOT_FDT_VIRT_START + offset; if ( fdt_magic(fdt_virt) != FDT_MAGIC ) return NULL; size = fdt_totalsize(fdt_virt); if ( size > MAX_FDT_SIZE ) return NULL; if ( (offset + size) > SZ_2M ) { create_mappings(boot_second, BOOT_FDT_VIRT_START + SZ_2M, paddr_to_pfn(base_paddr + SZ_2M), SZ_2M >> PAGE_SHIFT, SZ_2M); }
void __init discard_initial_modules(void) { struct bootmodules *mi = &bootinfo.modules; int i; for ( i = 0; i < mi->nr_mods; i++ ) { paddr_t s = mi->module[i].start; paddr_t e = s + PAGE_ALIGN(mi->module[i].size); if ( mi->module[i].kind == BOOTMOD_XEN ) continue; if ( !mfn_valid(paddr_to_pfn(s)) || !mfn_valid(paddr_to_pfn(e))) continue; dt_unreserved_regions(s, e, init_domheap_pages, 0); } mi->nr_mods = 0; remove_early_mappings(); }
int get_xen_info_x86(void) { unsigned long frame_table_vaddr; unsigned long xen_end; int i; if (SYMBOL(pgd_l2) == NOT_FOUND_SYMBOL && SYMBOL(pgd_l3) == NOT_FOUND_SYMBOL) { ERRMSG("Can't get pgd.\n"); return FALSE; } if (SYMBOL(pgd_l3) == NOT_FOUND_SYMBOL) { ERRMSG("non-PAE not support right now.\n"); return FALSE; } if (SYMBOL(frame_table) == NOT_FOUND_SYMBOL) { ERRMSG("Can't get the symbol of frame_table.\n"); return FALSE; } if (!readmem(VADDR_XEN, SYMBOL(frame_table), &frame_table_vaddr, sizeof(frame_table_vaddr))) { ERRMSG("Can't get the value of frame_table.\n"); return FALSE; } info->frame_table_vaddr = frame_table_vaddr; if (SYMBOL(xenheap_phys_end) == NOT_FOUND_SYMBOL) { ERRMSG("Can't get the symbol of xenheap_phys_end.\n"); return FALSE; } if (!readmem(VADDR_XEN, SYMBOL(xenheap_phys_end), &xen_end, sizeof(xen_end))) { ERRMSG("Can't get the value of xenheap_phys_end.\n"); return FALSE; } info->xen_heap_start = 0; info->xen_heap_end = paddr_to_pfn(xen_end); /* * pickled_id == domain addr for x86 */ for (i = 0; i < info->num_domain; i++) { info->domain_list[i].pickled_id = info->domain_list[i].domain_addr; } return TRUE; }
static void init_pdx(void) { paddr_t bank_start, bank_size, bank_end; u64 mask = pdx_init_mask(bootinfo.mem.bank[0].start); int bank; for ( bank = 0 ; bank < bootinfo.mem.nr_banks; bank++ ) { bank_start = bootinfo.mem.bank[bank].start; bank_size = bootinfo.mem.bank[bank].size; mask |= bank_start | pdx_region_mask(bank_start, bank_size); } for ( bank = 0 ; bank < bootinfo.mem.nr_banks; bank++ ) { bank_start = bootinfo.mem.bank[bank].start; bank_size = bootinfo.mem.bank[bank].size; if (~mask & pdx_region_mask(bank_start, bank_size)) mask = 0; } pfn_pdx_hole_setup(mask >> PAGE_SHIFT); for ( bank = 0 ; bank < bootinfo.mem.nr_banks; bank++ ) { bank_start = bootinfo.mem.bank[bank].start; bank_size = bootinfo.mem.bank[bank].size; bank_end = bank_start + bank_size; set_pdx_range(paddr_to_pfn(bank_start), paddr_to_pfn(bank_end)); } }
static struct mcinfo_bank *mca_init_bank(enum mca_source who, struct mc_info *mi, int bank) { struct mcinfo_bank *mib; uint64_t addr=0, misc = 0; if (!mi) return NULL; mib = x86_mcinfo_reserve(mi, sizeof(struct mcinfo_bank)); if (!mib) { mi->flags |= MCINFO_FLAGS_UNCOMPLETE; return NULL; } memset(mib, 0, sizeof (struct mcinfo_bank)); mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank)); mib->common.type = MC_TYPE_BANK; mib->common.size = sizeof (struct mcinfo_bank); mib->mc_bank = bank; addr = misc = 0; if (mib->mc_status & MCi_STATUS_MISCV) mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank)); if (mib->mc_status & MCi_STATUS_ADDRV) { mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank)); if (mfn_valid(paddr_to_pfn(mib->mc_addr))) { struct domain *d; d = maddr_get_owner(mib->mc_addr); if (d != NULL && (who == MCA_POLLER || who == MCA_CMCI_HANDLER)) mib->mc_domid = d->domain_id; } } if (who == MCA_CMCI_HANDLER) { mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank); rdtscll(mib->mc_tsc); } return mib; }
/* * Xen does not currently support mapping MMIO regions and interrupt * for bus child devices (referenced via the "ranges" and * "interrupt-map" properties to domain 0). Instead for now map the * necessary resources manually. */ static int xgene_storm_specific_mapping(struct domain *d) { int ret; /* Map the PCIe bus resources */ ret = map_one_mmio(d, "PCI MEM REGION", paddr_to_pfn(0xe000000000UL), paddr_to_pfn(0xe010000000UL)); if ( ret ) goto err; ret = map_one_mmio(d, "PCI IO REGION", paddr_to_pfn(0xe080000000UL), paddr_to_pfn(0xe080010000UL)); if ( ret ) goto err; ret = map_one_mmio(d, "PCI CFG REGION", paddr_to_pfn(0xe0d0000000UL), paddr_to_pfn(0xe0d0200000UL)); if ( ret ) goto err; ret = map_one_mmio(d, "PCI MSI REGION", paddr_to_pfn(0xe010000000UL), paddr_to_pfn(0xe010800000UL)); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTA", 0xc2, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTB", 0xc3, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTC", 0xc4, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = map_one_spi(d, "PCI#INTD", 0xc5, DT_IRQ_TYPE_LEVEL_HIGH); if ( ret ) goto err; ret = 0; err: return ret; }
static void mca_init_bank(enum mca_source who, struct mc_info *mi, int bank) { struct mcinfo_bank *mib; if (!mi) return; mib = x86_mcinfo_reserve(mi, sizeof(*mib)); if (!mib) { mi->flags |= MCINFO_FLAGS_UNCOMPLETE; return; } mib->mc_status = mca_rdmsr(MSR_IA32_MCx_STATUS(bank)); mib->common.type = MC_TYPE_BANK; mib->common.size = sizeof (struct mcinfo_bank); mib->mc_bank = bank; if (mib->mc_status & MCi_STATUS_MISCV) mib->mc_misc = mca_rdmsr(MSR_IA32_MCx_MISC(bank)); if (mib->mc_status & MCi_STATUS_ADDRV) mib->mc_addr = mca_rdmsr(MSR_IA32_MCx_ADDR(bank)); if ((mib->mc_status & MCi_STATUS_MISCV) && (mib->mc_status & MCi_STATUS_ADDRV) && (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) && (who == MCA_POLLER || who == MCA_CMCI_HANDLER) && (mfn_valid(paddr_to_pfn(mib->mc_addr)))) { struct domain *d; d = maddr_get_owner(mib->mc_addr); if (d) mib->mc_domid = d->domain_id; } if (who == MCA_CMCI_HANDLER) { mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank); rdtscll(mib->mc_tsc); } }
/* Additional mappings for dom0 (not in the DTS) */ static int omap5_specific_mapping(struct domain *d) { /* Map the PRM module */ map_mmio_regions(d, paddr_to_pfn(OMAP5_PRM_BASE), 2, paddr_to_pfn(OMAP5_PRM_BASE)); /* Map the PRM_MPU */ map_mmio_regions(d, paddr_to_pfn(OMAP5_PRCM_MPU_BASE), 1, paddr_to_pfn(OMAP5_PRCM_MPU_BASE)); /* Map the Wakeup Gen */ map_mmio_regions(d, paddr_to_pfn(OMAP5_WKUPGEN_BASE), 1, paddr_to_pfn(OMAP5_WKUPGEN_BASE)); /* Map the on-chip SRAM */ map_mmio_regions(d, paddr_to_pfn(OMAP5_SRAM_PA), 32, paddr_to_pfn(OMAP5_SRAM_PA)); return 0; }
int get_xen_basic_info_x86(void) { if (SYMBOL(pgd_l2) == NOT_FOUND_SYMBOL && SYMBOL(pgd_l3) == NOT_FOUND_SYMBOL) { ERRMSG("Can't get pgd.\n"); return FALSE; } if (SYMBOL(pgd_l3) == NOT_FOUND_SYMBOL) { ERRMSG("non-PAE not support right now.\n"); return FALSE; } if (SYMBOL(frame_table) != NOT_FOUND_SYMBOL) { unsigned long frame_table_vaddr; if (!readmem(VADDR_XEN, SYMBOL(frame_table), &frame_table_vaddr, sizeof(frame_table_vaddr))) { ERRMSG("Can't get the value of frame_table.\n"); return FALSE; } info->frame_table_vaddr = frame_table_vaddr; } else info->frame_table_vaddr = FRAMETABLE_VIRT_START; if (!info->xen_crash_info.com || info->xen_crash_info.com->xen_major_version < 4) { unsigned long xen_end; if (SYMBOL(xenheap_phys_end) == NOT_FOUND_SYMBOL) { ERRMSG("Can't get the symbol of xenheap_phys_end.\n"); return FALSE; } if (!readmem(VADDR_XEN, SYMBOL(xenheap_phys_end), &xen_end, sizeof(xen_end))) { ERRMSG("Can't get the value of xenheap_phys_end.\n"); return FALSE; } info->xen_heap_start = 0; info->xen_heap_end = paddr_to_pfn(xen_end); } return TRUE; }
/* Boot-time pagetable setup. * Changes here may need matching changes in head.S */ void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t xen_paddr) { unsigned long dest_va; lpae_t pte, *p; int i; /* Map the destination in the boot misc area. */ dest_va = BOOT_MISC_VIRT_START; pte = mfn_to_xen_entry(xen_paddr >> PAGE_SHIFT); write_pte(xen_second + second_table_offset(dest_va), pte); flush_xen_data_tlb_range_va(dest_va, SECOND_SIZE); /* Calculate virt-to-phys offset for the new location */ phys_offset = xen_paddr - (unsigned long) _start; /* Copy */ memcpy((void *) dest_va, _start, _end - _start); /* Beware! Any state we modify between now and the PT switch may be * discarded when we switch over to the copy. */ /* Update the copy of xen_pgtable to use the new paddrs */ p = (void *) xen_pgtable + dest_va - (unsigned long) _start; #ifdef CONFIG_ARM_64 p[0].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; p = (void *) xen_first + dest_va - (unsigned long) _start; #endif for ( i = 0; i < 4; i++) p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; p = (void *) xen_second + dest_va - (unsigned long) _start; if ( boot_phys_offset != 0 ) { /* Remove the old identity mapping of the boot paddr */ vaddr_t va = (vaddr_t)_start + boot_phys_offset; p[second_linear_offset(va)].bits = 0; } for ( i = 0; i < 4 * LPAE_ENTRIES; i++) if ( p[i].pt.valid ) p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT; /* Change pagetables to the copy in the relocated Xen */ boot_ttbr = (uintptr_t) xen_pgtable + phys_offset; flush_xen_dcache(boot_ttbr); flush_xen_dcache_va_range((void*)dest_va, _end - _start); flush_xen_text_tlb(); WRITE_SYSREG64(boot_ttbr, TTBR0_EL2); dsb(); /* Ensure visibility of HTTBR update */ flush_xen_text_tlb(); /* Undo the temporary map */ pte.bits = 0; write_pte(xen_second + second_table_offset(dest_va), pte); flush_xen_text_tlb(); /* Link in the fixmap pagetable */ pte = mfn_to_xen_entry((((unsigned long) xen_fixmap) + phys_offset) >> PAGE_SHIFT); pte.pt.table = 1; write_pte(xen_second + second_table_offset(FIXMAP_ADDR(0)), pte); /* * No flush required here. Individual flushes are done in * set_fixmap as entries are used. */ /* Break up the Xen mapping into 4k pages and protect them separately. */ for ( i = 0; i < LPAE_ENTRIES; i++ ) { unsigned long mfn = paddr_to_pfn(xen_paddr) + i; unsigned long va = XEN_VIRT_START + (i << PAGE_SHIFT); if ( !is_kernel(va) ) break; pte = mfn_to_xen_entry(mfn); pte.pt.table = 1; /* 4k mappings always have this bit set */ if ( is_kernel_text(va) || is_kernel_inittext(va) ) { pte.pt.xn = 0; pte.pt.ro = 1; } if ( is_kernel_rodata(va) ) pte.pt.ro = 1; write_pte(xen_xenmap + i, pte); /* No flush required here as page table is not hooked in yet. */ } pte = mfn_to_xen_entry((((unsigned long) xen_xenmap) + phys_offset) >> PAGE_SHIFT); pte.pt.table = 1; write_pte(xen_second + second_linear_offset(XEN_VIRT_START), pte); /* TLBFLUSH and ISB would be needed here, but wait until we set WXN */ /* From now on, no mapping may be both writable and executable. */ WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2); /* Flush everything after setting WXN bit. */ flush_xen_text_tlb(); }
static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr, p2m_type_t t) { paddr_t pa = ((paddr_t) mfn) << PAGE_SHIFT; /* xn and write bit will be defined in the switch */ lpae_t e = (lpae_t) { .p2m.af = 1, .p2m.sh = LPAE_SH_OUTER, .p2m.read = 1, .p2m.mattr = mattr, .p2m.table = 1, .p2m.valid = 1, .p2m.type = t, }; BUILD_BUG_ON(p2m_max_real_type > (1 << 4)); switch (t) { case p2m_ram_rw: e.p2m.xn = 0; e.p2m.write = 1; break; case p2m_ram_ro: e.p2m.xn = 0; e.p2m.write = 0; break; case p2m_map_foreign: case p2m_grant_map_rw: case p2m_mmio_direct: e.p2m.xn = 1; e.p2m.write = 1; break; case p2m_grant_map_ro: case p2m_invalid: e.p2m.xn = 1; e.p2m.write = 0; break; case p2m_max_real_type: BUG(); break; } ASSERT(!(pa & ~PAGE_MASK)); ASSERT(!(pa & ~PADDR_MASK)); e.bits |= pa; return e; } /* Allocate a new page table page and hook it in via the given entry */ static int p2m_create_table(struct domain *d, lpae_t *entry) { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page; void *p; lpae_t pte; BUG_ON(entry->p2m.valid); page = alloc_domheap_page(NULL, 0); if ( page == NULL ) return -ENOMEM; page_list_add(page, &p2m->pages); p = __map_domain_page(page); clear_page(p); unmap_domain_page(p); pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid); write_pte(entry, pte); return 0; } enum p2m_operation { INSERT, ALLOCATE, REMOVE, RELINQUISH, CACHEFLUSH, }; static int apply_p2m_changes(struct domain *d, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t maddr, int mattr, p2m_type_t t) { int rc; struct p2m_domain *p2m = &d->arch.p2m; lpae_t *first = NULL, *second = NULL, *third = NULL; paddr_t addr; unsigned long cur_first_page = ~0, cur_first_offset = ~0, cur_second_offset = ~0; unsigned long count = 0; unsigned int flush = 0; bool_t populate = (op == INSERT || op == ALLOCATE); lpae_t pte; spin_lock(&p2m->lock); if ( d != current->domain ) p2m_load_VTTBR(d); addr = start_gpaddr; while ( addr < end_gpaddr ) { if ( cur_first_page != p2m_first_level_index(addr) ) { if ( first ) unmap_domain_page(first); first = p2m_map_first(p2m, addr); if ( !first ) { rc = -EINVAL; goto out; } cur_first_page = p2m_first_level_index(addr); } if ( !first[first_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + FIRST_SIZE) & FIRST_MASK; continue; } rc = p2m_create_table(d, &first[first_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L1 failed\n"); goto out; } } BUG_ON(!first[first_table_offset(addr)].p2m.valid); if ( cur_first_offset != first_table_offset(addr) ) { if (second) unmap_domain_page(second); second = map_domain_page(first[first_table_offset(addr)].p2m.base); cur_first_offset = first_table_offset(addr); } /* else: second already valid */ if ( !second[second_table_offset(addr)].p2m.valid ) { if ( !populate ) { addr = (addr + SECOND_SIZE) & SECOND_MASK; continue; } rc = p2m_create_table(d, &second[second_table_offset(addr)]); if ( rc < 0 ) { printk("p2m_populate_ram: L2 failed\n"); goto out; } } BUG_ON(!second[second_table_offset(addr)].p2m.valid); if ( cur_second_offset != second_table_offset(addr) ) { /* map third level */ if (third) unmap_domain_page(third); third = map_domain_page(second[second_table_offset(addr)].p2m.base); cur_second_offset = second_table_offset(addr); } pte = third[third_table_offset(addr)]; flush |= pte.p2m.valid; /* TODO: Handle other p2m type * * It's safe to do the put_page here because page_alloc will * flush the TLBs if the page is reallocated before the end of * this loop. */ if ( pte.p2m.valid && p2m_is_foreign(pte.p2m.type) ) { unsigned long mfn = pte.p2m.base; ASSERT(mfn_valid(mfn)); put_page(mfn_to_page(mfn)); } /* Allocate a new RAM page and attach */ switch (op) { case ALLOCATE: { struct page_info *page; ASSERT(!pte.p2m.valid); rc = -ENOMEM; page = alloc_domheap_page(d, 0); if ( page == NULL ) { printk("p2m_populate_ram: failed to allocate page\n"); goto out; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); write_pte(&third[third_table_offset(addr)], pte); } break; case INSERT: { pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t); write_pte(&third[third_table_offset(addr)], pte); maddr += PAGE_SIZE; } break; case RELINQUISH: case REMOVE: { if ( !pte.p2m.valid ) { count++; break; } count += 0x10; memset(&pte, 0x00, sizeof(pte)); write_pte(&third[third_table_offset(addr)], pte); count++; } break; case CACHEFLUSH: { if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) ) break; flush_page_to_ram(pte.p2m.base); } break; } /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */ if ( op == RELINQUISH && count >= 0x2000 ) { if ( hypercall_preempt_check() ) { p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; rc = -EAGAIN; goto out; } count = 0; } /* Got the next page */ addr += PAGE_SIZE; } if ( flush ) { /* At the beginning of the function, Xen is updating VTTBR * with the domain where the mappings are created. In this * case it's only necessary to flush TLBs on every CPUs with * the current VMID (our domain). */ flush_tlb(); } if ( op == ALLOCATE || op == INSERT ) { unsigned long sgfn = paddr_to_pfn(start_gpaddr); unsigned long egfn = paddr_to_pfn(end_gpaddr); p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn); p2m->lowest_mapped_gfn = MIN(p2m->lowest_mapped_gfn, sgfn); } rc = 0; out: if (third) unmap_domain_page(third); if (second) unmap_domain_page(second); if (first) unmap_domain_page(first); if ( d != current->domain ) p2m_load_VTTBR(current->domain); spin_unlock(&p2m->lock); return rc; }
static int hvmemul_do_io( int is_mmio, paddr_t addr, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { struct vcpu *curr = current; struct hvm_vcpu_io *vio; ioreq_t p = { .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO, .addr = addr, .size = size, .dir = dir, .df = df, .data = ram_gpa, .data_is_ptr = (p_data == NULL), }; unsigned long ram_gfn = paddr_to_pfn(ram_gpa); p2m_type_t p2mt; struct page_info *ram_page; int rc; /* Check for paged out page */ ram_page = get_page_from_gfn(curr->domain, ram_gfn, &p2mt, P2M_UNSHARE); if ( p2m_is_paging(p2mt) ) { if ( ram_page ) put_page(ram_page); p2m_mem_paging_populate(curr->domain, ram_gfn); return X86EMUL_RETRY; } if ( p2m_is_shared(p2mt) ) { if ( ram_page ) put_page(ram_page); return X86EMUL_RETRY; } /* * Weird-sized accesses have undefined behaviour: we discard writes * and read all-ones. */ if ( unlikely((size > sizeof(long)) || (size & (size - 1))) ) { gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size); ASSERT(p_data != NULL); /* cannot happen with a REP prefix */ if ( dir == IOREQ_READ ) memset(p_data, ~0, size); if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } if ( !p.data_is_ptr && (dir == IOREQ_WRITE) ) { memcpy(&p.data, p_data, size); p_data = NULL; } vio = &curr->arch.hvm_vcpu.hvm_io; if ( is_mmio && !p.data_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = vio->mmio_large_write_pa; unsigned int bytes = vio->mmio_large_write_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) { if ( ram_page ) put_page(ram_page); return X86EMUL_OKAY; } } else { paddr_t pa = vio->mmio_large_read_pa; unsigned int bytes = vio->mmio_large_read_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) { memcpy(p_data, &vio->mmio_large_read[addr - pa], size); if ( ram_page ) put_page(ram_page); return X86EMUL_OKAY; } } } switch ( vio->io_state ) { case HVMIO_none: break; case HVMIO_completed: vio->io_state = HVMIO_none; if ( p_data == NULL ) { if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } goto finish_access; case HVMIO_dispatched: /* May have to wait for previous cycle of a multi-write to complete. */ if ( is_mmio && !p.data_is_ptr && (dir == IOREQ_WRITE) && (addr == (vio->mmio_large_write_pa + vio->mmio_large_write_bytes)) ) { if ( ram_page ) put_page(ram_page); return X86EMUL_RETRY; } default: if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } if ( hvm_io_pending(curr) ) { gdprintk(XENLOG_WARNING, "WARNING: io already pending?\n"); if ( ram_page ) put_page(ram_page); return X86EMUL_UNHANDLEABLE; } vio->io_state = (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; vio->io_size = size; /* * When retrying a repeated string instruction, force exit to guest after * completion of the retried iteration to allow handling of interrupts. */ if ( vio->mmio_retrying ) *reps = 1; p.count = *reps; if ( dir == IOREQ_WRITE ) hvmtrace_io_assist(is_mmio, &p); if ( is_mmio ) { rc = hvm_mmio_intercept(&p); if ( rc == X86EMUL_UNHANDLEABLE ) rc = hvm_buffered_io_intercept(&p); } else { rc = hvm_portio_intercept(&p); } switch ( rc ) { case X86EMUL_OKAY: case X86EMUL_RETRY: *reps = p.count; p.state = STATE_IORESP_READY; if ( !vio->mmio_retry ) { hvm_io_assist(&p); vio->io_state = HVMIO_none; } else /* Defer hvm_io_assist() invocation to hvm_do_resume(). */ vio->io_state = HVMIO_handle_mmio_awaiting_completion; break; case X86EMUL_UNHANDLEABLE: /* If there is no backing DM, just ignore accesses */ if ( !hvm_has_dm(curr->domain) ) { rc = X86EMUL_OKAY; vio->io_state = HVMIO_none; } else { rc = X86EMUL_RETRY; if ( !hvm_send_assist_req(&p) ) vio->io_state = HVMIO_none; else if ( p_data == NULL ) rc = X86EMUL_OKAY; } break; default: BUG(); } if ( rc != X86EMUL_OKAY ) { if ( ram_page ) put_page(ram_page); return rc; } finish_access: if ( dir == IOREQ_READ ) hvmtrace_io_assist(is_mmio, &p); if ( p_data != NULL ) memcpy(p_data, &vio->io_data, size); if ( is_mmio && !p.data_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = vio->mmio_large_write_pa; unsigned int bytes = vio->mmio_large_write_bytes; if ( bytes == 0 ) pa = vio->mmio_large_write_pa = addr; if ( addr == (pa + bytes) ) vio->mmio_large_write_bytes += size; } else { paddr_t pa = vio->mmio_large_read_pa; unsigned int bytes = vio->mmio_large_read_bytes; if ( bytes == 0 ) pa = vio->mmio_large_read_pa = addr; if ( (addr == (pa + bytes)) && ((bytes + size) <= sizeof(vio->mmio_large_read)) ) { memcpy(&vio->mmio_large_read[bytes], p_data, size); vio->mmio_large_read_bytes += size; } } } if ( ram_page ) put_page(ram_page); return X86EMUL_OKAY; } int hvmemul_do_pio( unsigned long port, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { return hvmemul_do_io(0, port, reps, size, ram_gpa, dir, df, p_data); } static int hvmemul_do_mmio( paddr_t gpa, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { return hvmemul_do_io(1, gpa, reps, size, ram_gpa, dir, df, p_data); } /* * Convert addr from linear to physical form, valid over the range * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to * the valid computed range. It is always >0 when X86EMUL_OKAY is returned. * @pfec indicates the access checks to be performed during page-table walks. */ static int hvmemul_linear_to_phys( unsigned long addr, paddr_t *paddr, unsigned int bytes_per_rep, unsigned long *reps, uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt) { struct vcpu *curr = current; unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK; int reverse; /* * Clip repetitions to a sensible maximum. This avoids extensive looping in * this function while still amortising the cost of I/O trap-and-emulate. */ *reps = min_t(unsigned long, *reps, 4096); /* With no paging it's easy: linear == physical. */ if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) ) { *paddr = addr; return X86EMUL_OKAY; } /* Reverse mode if this is a backwards multi-iteration string operation. */ reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1); if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) ) { /* Do page-straddling first iteration forwards via recursion. */ paddr_t _paddr; unsigned long one_rep = 1; int rc = hvmemul_linear_to_phys( addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt); if ( rc != X86EMUL_OKAY ) return rc; pfn = _paddr >> PAGE_SHIFT; } else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
/* * If mem_access is in use it might have been the reason why get_page_from_gva * failed to fetch the page, as it uses the MMU for the permission checking. * Only in these cases we do a software-based type check and fetch the page if * we indeed found a conflicting mem_access setting. */ struct page_info* p2m_mem_access_check_and_get_page(vaddr_t gva, unsigned long flag, const struct vcpu *v) { long rc; paddr_t ipa; gfn_t gfn; mfn_t mfn; xenmem_access_t xma; p2m_type_t t; struct page_info *page = NULL; struct p2m_domain *p2m = &v->domain->arch.p2m; rc = gva_to_ipa(gva, &ipa, flag); if ( rc < 0 ) goto err; gfn = _gfn(paddr_to_pfn(ipa)); /* * We do this first as this is faster in the default case when no * permission is set on the page. */ rc = __p2m_get_mem_access(v->domain, gfn, &xma); if ( rc < 0 ) goto err; /* Let's check if mem_access limited the access. */ switch ( xma ) { default: case XENMEM_access_rwx: case XENMEM_access_rw: /* * If mem_access contains no rw perm restrictions at all then the original * fault was correct. */ goto err; case XENMEM_access_n2rwx: case XENMEM_access_n: case XENMEM_access_x: /* * If no r/w is permitted by mem_access, this was a fault caused by mem_access. */ break; case XENMEM_access_wx: case XENMEM_access_w: /* * If this was a read then it was because of mem_access, but if it was * a write then the original get_page_from_gva fault was correct. */ if ( flag == GV2M_READ ) break; else goto err; case XENMEM_access_rx2rw: case XENMEM_access_rx: case XENMEM_access_r: /* * If this was a write then it was because of mem_access, but if it was * a read then the original get_page_from_gva fault was correct. */ if ( flag == GV2M_WRITE ) break; else goto err; } /* * We had a mem_access permission limiting the access, but the page type * could also be limiting, so we need to check that as well. */ mfn = p2m_get_entry(p2m, gfn, &t, NULL, NULL); if ( mfn_eq(mfn, INVALID_MFN) ) goto err; if ( !mfn_valid(mfn) ) goto err; /* * Base type doesn't allow r/w */ if ( t != p2m_ram_rw ) goto err; page = mfn_to_page(mfn_x(mfn)); if ( unlikely(!get_page(page, v->domain)) ) page = NULL; err: return page; }
static int hvmemul_do_io( int is_mmio, paddr_t addr, unsigned long *reps, int size, paddr_t ram_gpa, int dir, int df, void *p_data) { paddr_t value = ram_gpa; int value_is_ptr = (p_data == NULL); struct vcpu *curr = current; struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain); ioreq_t *p = get_ioreq(curr); unsigned long ram_gfn = paddr_to_pfn(ram_gpa); p2m_type_t p2mt; mfn_t ram_mfn; int rc; /* Check for paged out page */ ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt, 0); if ( p2m_is_paging(p2mt) ) { p2m_mem_paging_populate(p2m, ram_gfn); return X86EMUL_RETRY; } if ( p2m_is_shared(p2mt) ) return X86EMUL_RETRY; /* * Weird-sized accesses have undefined behaviour: we discard writes * and read all-ones. */ if ( unlikely((size > sizeof(long)) || (size & (size - 1))) ) { gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size); ASSERT(p_data != NULL); /* cannot happen with a REP prefix */ if ( dir == IOREQ_READ ) memset(p_data, ~0, size); return X86EMUL_UNHANDLEABLE; } if ( (p_data != NULL) && (dir == IOREQ_WRITE) ) { memcpy(&value, p_data, size); p_data = NULL; } if ( is_mmio && !value_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) return X86EMUL_OKAY; } else { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) ) { memcpy(p_data, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa], size); return X86EMUL_OKAY; } } } switch ( curr->arch.hvm_vcpu.io_state ) { case HVMIO_none: break; case HVMIO_completed: curr->arch.hvm_vcpu.io_state = HVMIO_none; if ( p_data == NULL ) return X86EMUL_UNHANDLEABLE; goto finish_access; case HVMIO_dispatched: /* May have to wait for previous cycle of a multi-write to complete. */ if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) && (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa + curr->arch.hvm_vcpu.mmio_large_write_bytes)) ) return X86EMUL_RETRY; default: return X86EMUL_UNHANDLEABLE; } if ( p->state != STATE_IOREQ_NONE ) { gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n", p->state); return X86EMUL_UNHANDLEABLE; } curr->arch.hvm_vcpu.io_state = (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; curr->arch.hvm_vcpu.io_size = size; p->dir = dir; p->data_is_ptr = value_is_ptr; p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; p->size = size; p->addr = addr; p->count = *reps; p->df = df; p->data = value; hvmtrace_io_assist(is_mmio, p); if ( is_mmio ) { rc = hvm_mmio_intercept(p); if ( rc == X86EMUL_UNHANDLEABLE ) rc = hvm_buffered_io_intercept(p); } else { rc = hvm_portio_intercept(p); } switch ( rc ) { case X86EMUL_OKAY: case X86EMUL_RETRY: *reps = p->count; p->state = STATE_IORESP_READY; hvm_io_assist(); curr->arch.hvm_vcpu.io_state = HVMIO_none; break; case X86EMUL_UNHANDLEABLE: rc = X86EMUL_RETRY; if ( !hvm_send_assist_req(curr) ) curr->arch.hvm_vcpu.io_state = HVMIO_none; else if ( p_data == NULL ) rc = X86EMUL_OKAY; break; default: BUG(); } if ( rc != X86EMUL_OKAY ) return rc; finish_access: if ( p_data != NULL ) memcpy(p_data, &curr->arch.hvm_vcpu.io_data, size); if ( is_mmio && !value_is_ptr ) { /* Part of a multi-cycle read or write? */ if ( dir == IOREQ_WRITE ) { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes; if ( bytes == 0 ) pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr; if ( addr == (pa + bytes) ) curr->arch.hvm_vcpu.mmio_large_write_bytes += size; } else { paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa; unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes; if ( bytes == 0 ) pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr; if ( (addr == (pa + bytes)) && ((bytes + size) < sizeof(curr->arch.hvm_vcpu.mmio_large_read)) ) { memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa], p_data, size); curr->arch.hvm_vcpu.mmio_large_read_bytes += size; } } } return X86EMUL_OKAY; }
void dump_pt_walk(paddr_t ttbr, paddr_t addr, unsigned int root_level, unsigned int nr_root_tables) { static const char *level_strs[4] = { "0TH", "1ST", "2ND", "3RD" }; const unsigned long root_pfn = paddr_to_pfn(ttbr); const unsigned int offsets[4] = { zeroeth_table_offset(addr), first_table_offset(addr), second_table_offset(addr), third_table_offset(addr) }; lpae_t pte, *mapping; unsigned int level, root_table; #ifdef CONFIG_ARM_32 BUG_ON(root_level < 1); #endif BUG_ON(root_level > 3); if ( nr_root_tables > 1 ) { /* * Concatenated root-level tables. The table number will be * the offset at the previous level. It is not possible to * concatenate a level-0 root. */ BUG_ON(root_level == 0); root_table = offsets[root_level - 1]; printk("Using concatenated root table %u\n", root_table); if ( root_table >= nr_root_tables ) { printk("Invalid root table offset\n"); return; } } else root_table = 0; mapping = map_domain_page(_mfn(root_pfn + root_table)); for ( level = root_level; ; level++ ) { if ( offsets[level] > LPAE_ENTRIES ) break; pte = mapping[offsets[level]]; printk("%s[0x%x] = 0x%"PRIpaddr"\n", level_strs[level], offsets[level], pte.bits); if ( level == 3 || !pte.walk.valid || !pte.walk.table ) break; /* For next iteration */ unmap_domain_page(mapping); mapping = map_domain_page(_mfn(pte.walk.base)); } unmap_domain_page(mapping); }
bool_t p2m_mem_access_check(paddr_t gpa, vaddr_t gla, const struct npfec npfec) { int rc; bool_t violation; xenmem_access_t xma; vm_event_request_t *req; struct vcpu *v = current; struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); /* Mem_access is not in use. */ if ( !p2m->mem_access_enabled ) return true; rc = p2m_get_mem_access(v->domain, _gfn(paddr_to_pfn(gpa)), &xma); if ( rc ) return true; /* Now check for mem_access violation. */ switch ( xma ) { case XENMEM_access_rwx: violation = false; break; case XENMEM_access_rw: violation = npfec.insn_fetch; break; case XENMEM_access_wx: violation = npfec.read_access; break; case XENMEM_access_rx: case XENMEM_access_rx2rw: violation = npfec.write_access; break; case XENMEM_access_x: violation = npfec.read_access || npfec.write_access; break; case XENMEM_access_w: violation = npfec.read_access || npfec.insn_fetch; break; case XENMEM_access_r: violation = npfec.write_access || npfec.insn_fetch; break; default: case XENMEM_access_n: case XENMEM_access_n2rwx: violation = true; break; } if ( !violation ) return true; /* First, handle rx2rw and n2rwx conversion automatically. */ if ( npfec.write_access && xma == XENMEM_access_rx2rw ) { rc = p2m_set_mem_access(v->domain, _gfn(paddr_to_pfn(gpa)), 1, 0, ~0, XENMEM_access_rw, 0); return false; } else if ( xma == XENMEM_access_n2rwx ) { rc = p2m_set_mem_access(v->domain, _gfn(paddr_to_pfn(gpa)), 1, 0, ~0, XENMEM_access_rwx, 0); } /* Otherwise, check if there is a vm_event monitor subscriber */ if ( !vm_event_check_ring(&v->domain->vm_event->monitor) ) { /* No listener */ if ( p2m->access_required ) { gdprintk(XENLOG_INFO, "Memory access permissions failure, " "no vm_event listener VCPU %d, dom %d\n", v->vcpu_id, v->domain->domain_id); domain_crash(v->domain); } else { /* n2rwx was already handled */ if ( xma != XENMEM_access_n2rwx ) { /* A listener is not required, so clear the access * restrictions. */ rc = p2m_set_mem_access(v->domain, _gfn(paddr_to_pfn(gpa)), 1, 0, ~0, XENMEM_access_rwx, 0); } } /* No need to reinject */ return false; } req = xzalloc(vm_event_request_t); if ( req ) { req->reason = VM_EVENT_REASON_MEM_ACCESS; /* Send request to mem access subscriber */ req->u.mem_access.gfn = gpa >> PAGE_SHIFT; req->u.mem_access.offset = gpa & ((1 << PAGE_SHIFT) - 1); if ( npfec.gla_valid ) { req->u.mem_access.flags |= MEM_ACCESS_GLA_VALID; req->u.mem_access.gla = gla; if ( npfec.kind == npfec_kind_with_gla ) req->u.mem_access.flags |= MEM_ACCESS_FAULT_WITH_GLA; else if ( npfec.kind == npfec_kind_in_gpt ) req->u.mem_access.flags |= MEM_ACCESS_FAULT_IN_GPT; } req->u.mem_access.flags |= npfec.read_access ? MEM_ACCESS_R : 0; req->u.mem_access.flags |= npfec.write_access ? MEM_ACCESS_W : 0; req->u.mem_access.flags |= npfec.insn_fetch ? MEM_ACCESS_X : 0; if ( monitor_traps(v, (xma != XENMEM_access_n2rwx), req) < 0 ) domain_crash(v->domain); xfree(req); } return false; }
/* * Cache the page's data. * * If an empty page cache location is available, take it. Otherwise, evict * the entry indexed by evict_index, and then bump evict index. The hit_count * is only gathered for dump_diskdump_environment(). * * If the page is compressed, uncompress it into the selected page cache entry. * If the page is raw, just copy it into the selected page cache entry. * If all works OK, update diskdump->curbufptr to point to the page's * uncompressed data. */ static int cache_page(physaddr_t paddr) { int i, ret; int found; ulong pfn; ulong desc_pos; off_t seek_offset; page_desc_t pd; const int block_size = dd->block_size; const off_t failed = (off_t)-1; ulong retlen; for (i = found = 0; i < DISKDUMP_CACHED_PAGES; i++) { if (DISKDUMP_VALID_PAGE(dd->page_cache_hdr[i].pg_flags)) continue; found = TRUE; break; } if (!found) { i = dd->evict_index; dd->page_cache_hdr[i].pg_hit_count = 0; dd->evict_index = (dd->evict_index+1) % DISKDUMP_CACHED_PAGES; dd->evictions++; } dd->page_cache_hdr[i].pg_flags = 0; dd->page_cache_hdr[i].pg_addr = paddr; dd->page_cache_hdr[i].pg_hit_count++; /* find page descriptor */ pfn = paddr_to_pfn(paddr); desc_pos = pfn_to_pos(pfn); seek_offset = dd->data_offset + (off_t)(desc_pos - 1)*sizeof(page_desc_t); /* read page descriptor */ if (FLAT_FORMAT()) { if (!read_flattened_format(dd->dfd, seek_offset, &pd, sizeof(pd))) return READ_ERROR; } else { if (lseek(dd->dfd, seek_offset, SEEK_SET) == failed) return SEEK_ERROR; if (read(dd->dfd, &pd, sizeof(pd)) != sizeof(pd)) return READ_ERROR; } /* sanity check */ if (pd.size > block_size) return READ_ERROR; /* read page data */ if (FLAT_FORMAT()) { if (!read_flattened_format(dd->dfd, pd.offset, dd->compressed_page, pd.size)) return READ_ERROR; } else { if (lseek(dd->dfd, pd.offset, SEEK_SET) == failed) return SEEK_ERROR; if (read(dd->dfd, dd->compressed_page, pd.size) != pd.size) return READ_ERROR; } if (pd.flags & DUMP_DH_COMPRESSED_ZLIB) { retlen = block_size; ret = uncompress((unsigned char *)dd->page_cache_hdr[i].pg_bufptr, &retlen, (unsigned char *)dd->compressed_page, pd.size); if ((ret != Z_OK) || (retlen != block_size)) { error(INFO, "%s: uncompress failed: %d\n", DISKDUMP_VALID() ? "diskdump" : "compressed kdump", ret); return READ_ERROR; } } else if (pd.flags & DUMP_DH_COMPRESSED_LZO) { if (!(dd->flags & LZO_SUPPORTED)) { error(INFO, "%s: uncompress failed: no lzo compression support\n", DISKDUMP_VALID() ? "diskdump" : "compressed kdump"); return READ_ERROR; } #ifdef LZO retlen = block_size; ret = lzo1x_decompress_safe((unsigned char *)dd->compressed_page, pd.size, (unsigned char *)dd->page_cache_hdr[i].pg_bufptr, &retlen, LZO1X_MEM_DECOMPRESS); if ((ret != LZO_E_OK) || (retlen != block_size)) { error(INFO, "%s: uncompress failed: %d\n", DISKDUMP_VALID() ? "diskdump" : "compressed kdump", ret); return READ_ERROR; } #endif } else memcpy(dd->page_cache_hdr[i].pg_bufptr, dd->compressed_page, block_size); dd->page_cache_hdr[i].pg_flags |= PAGE_VALID; dd->curbufptr = dd->page_cache_hdr[i].pg_bufptr; return TRUE; }
/* * 0 == (P2M_ONE_DESCEND) continue to descend the tree * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush, * entry, addr and maddr updated. Return value is an * indication of the amount of work done (for preemption). * -ve == (-Exxx) error. */ static int apply_one_level(struct domain *d, lpae_t *entry, unsigned int level, bool_t flush_cache, enum p2m_operation op, paddr_t start_gpaddr, paddr_t end_gpaddr, paddr_t *addr, paddr_t *maddr, bool_t *flush, int mattr, p2m_type_t t, p2m_access_t a) { const paddr_t level_size = level_sizes[level]; const paddr_t level_mask = level_masks[level]; const paddr_t level_shift = level_shifts[level]; struct p2m_domain *p2m = &d->arch.p2m; lpae_t pte; const lpae_t orig_pte = *entry; int rc; BUG_ON(level > 3); switch ( op ) { case ALLOCATE: ASSERT(level < 3 || !p2m_valid(orig_pte)); ASSERT(*maddr == 0); if ( p2m_valid(orig_pte) ) return P2M_ONE_DESCEND; if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) && /* We only create superpages when mem_access is not in use. */ (level == 3 || (level < 3 && !p2m->mem_access_enabled)) ) { struct page_info *page; page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0); if ( page ) { rc = p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a); if ( rc < 0 ) { free_domheap_page(page); return rc; } pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t, a); if ( level < 3 ) pte.p2m.table = 0; p2m_write_pte(entry, pte, flush_cache); p2m->stats.mappings[level]++; *addr += level_size; return P2M_ONE_PROGRESS; } else if ( level == 3 ) return -ENOMEM; } /* L3 is always suitably aligned for mapping (handled, above) */ BUG_ON(level == 3); /* * If we get here then we failed to allocate a sufficiently * large contiguous region for this level (which can't be * L3) or mem_access is in use. Create a page table and * continue to descend so we try smaller allocations. */ rc = p2m_create_table(d, entry, 0, flush_cache); if ( rc < 0 ) return rc; return P2M_ONE_DESCEND; case INSERT: if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) && /* * We do not handle replacing an existing table with a superpage * or when mem_access is in use. */ (level == 3 || (!p2m_table(orig_pte) && !p2m->mem_access_enabled)) ) { rc = p2m_mem_access_radix_set(p2m, paddr_to_pfn(*addr), a); if ( rc < 0 ) return rc; /* New mapping is superpage aligned, make it */ pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t, a); if ( level < 3 ) pte.p2m.table = 0; /* Superpage entry */ p2m_write_pte(entry, pte, flush_cache); *flush |= p2m_valid(orig_pte); *addr += level_size; *maddr += level_size; if ( p2m_valid(orig_pte) ) { /* * We can't currently get here for an existing table * mapping, since we don't handle replacing an * existing table with a superpage. If we did we would * need to handle freeing (and accounting) for the bit * of the p2m tree which we would be about to lop off. */ BUG_ON(level < 3 && p2m_table(orig_pte)); if ( level == 3 ) p2m_put_l3_page(orig_pte); } else /* New mapping */ p2m->stats.mappings[level]++; return P2M_ONE_PROGRESS; } else {
int __init dom0_setup_permissions(struct domain *d) { unsigned long mfn; unsigned int i; int rc; /* The hardware domain is initially permitted full I/O capabilities. */ rc = ioports_permit_access(d, 0, 0xFFFF); rc |= iomem_permit_access(d, 0UL, (1UL << (paddr_bits - PAGE_SHIFT)) - 1); rc |= irqs_permit_access(d, 1, nr_irqs_gsi - 1); /* Modify I/O port access permissions. */ /* Master Interrupt Controller (PIC). */ rc |= ioports_deny_access(d, 0x20, 0x21); /* Slave Interrupt Controller (PIC). */ rc |= ioports_deny_access(d, 0xA0, 0xA1); /* Interval Timer (PIT). */ rc |= ioports_deny_access(d, 0x40, 0x43); /* PIT Channel 2 / PC Speaker Control. */ rc |= ioports_deny_access(d, 0x61, 0x61); /* ACPI PM Timer. */ if ( pmtmr_ioport ) rc |= ioports_deny_access(d, pmtmr_ioport, pmtmr_ioport + 3); /* PCI configuration space (NB. 0xcf8 has special treatment). */ rc |= ioports_deny_access(d, 0xcfc, 0xcff); /* Command-line I/O ranges. */ process_dom0_ioports_disable(d); /* Modify I/O memory access permissions. */ /* Local APIC. */ if ( mp_lapic_addr != 0 ) { mfn = paddr_to_pfn(mp_lapic_addr); rc |= iomem_deny_access(d, mfn, mfn); } /* I/O APICs. */ for ( i = 0; i < nr_ioapics; i++ ) { mfn = paddr_to_pfn(mp_ioapics[i].mpc_apicaddr); if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) rc |= iomem_deny_access(d, mfn, mfn); } /* MSI range. */ rc |= iomem_deny_access(d, paddr_to_pfn(MSI_ADDR_BASE_LO), paddr_to_pfn(MSI_ADDR_BASE_LO + MSI_ADDR_DEST_ID_MASK)); /* HyperTransport range. */ if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) rc |= iomem_deny_access(d, paddr_to_pfn(0xfdULL << 32), paddr_to_pfn((1ULL << 40) - 1)); /* Remove access to E820_UNUSABLE I/O regions above 1MB. */ for ( i = 0; i < e820.nr_map; i++ ) { unsigned long sfn, efn; sfn = max_t(unsigned long, paddr_to_pfn(e820.map[i].addr), 0x100ul); efn = paddr_to_pfn(e820.map[i].addr + e820.map[i].size - 1); if ( (e820.map[i].type == E820_UNUSABLE) && (e820.map[i].size != 0) && (sfn <= efn) ) rc |= iomem_deny_access(d, sfn, efn); } /* Prevent access to HPET */ if ( hpet_address ) { u8 prot_flags = hpet_flags & ACPI_HPET_PAGE_PROTECT_MASK; mfn = paddr_to_pfn(hpet_address); if ( prot_flags == ACPI_HPET_PAGE_PROTECT4 ) rc |= iomem_deny_access(d, mfn, mfn); else if ( prot_flags == ACPI_HPET_PAGE_PROTECT64 ) rc |= iomem_deny_access(d, mfn, mfn + 15); else if ( ro_hpet ) rc |= rangeset_add_singleton(mmio_ro_ranges, mfn); } return rc; }
/* * Read from a diskdump-created dumpfile. */ int read_diskdump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr) { int ret; physaddr_t curpaddr; ulong pfn, page_offset; pfn = paddr_to_pfn(paddr); if (KDUMP_SPLIT()) { /* Find proper dd */ int i; unsigned long start_pfn; unsigned long end_pfn; for (i=0; i<num_dumpfiles; i++) { start_pfn = dd_list[i]->sub_header_kdump->start_pfn; end_pfn = dd_list[i]->sub_header_kdump->end_pfn; if ((pfn >= start_pfn) && (pfn <= end_pfn)) { dd = dd_list[i]; break; } } if (i == num_dumpfiles) { if (CRASHDEBUG(8)) fprintf(fp, "read_diskdump: SEEK_ERROR: " "paddr/pfn %llx/%lx beyond last dumpfile\n", (ulonglong)paddr, pfn); return SEEK_ERROR; } } curpaddr = paddr & ~((physaddr_t)(dd->block_size-1)); page_offset = paddr & ((physaddr_t)(dd->block_size-1)); if ((pfn >= dd->header->max_mapnr) || !page_is_ram(pfn)) { if (CRASHDEBUG(8)) { fprintf(fp, "read_diskdump: SEEK_ERROR: " "paddr/pfn: %llx/%lx ", (ulonglong)paddr, pfn); if (pfn >= dd->header->max_mapnr) fprintf(fp, "max_mapnr: %x\n", dd->header->max_mapnr); else fprintf(fp, "!page_is_ram\n"); } return SEEK_ERROR; } if (!page_is_dumpable(pfn)) { if ((dd->flags & (ZERO_EXCLUDED|ERROR_EXCLUDED)) == ERROR_EXCLUDED) { if (CRASHDEBUG(8)) fprintf(fp, "read_diskdump: PAGE_EXCLUDED: " "paddr/pfn: %llx/%lx\n", (ulonglong)paddr, pfn); return PAGE_EXCLUDED; } if (CRASHDEBUG(8)) fprintf(fp, "read_diskdump: zero-fill: " "paddr/pfn: %llx/%lx\n", (ulonglong)paddr, pfn); memset(bufptr, 0, cnt); return cnt; } if (!page_is_cached(curpaddr)) { if (CRASHDEBUG(8)) fprintf(fp, "read_diskdump: paddr/pfn: %llx/%lx" " -> cache physical page: %llx\n", (ulonglong)paddr, pfn, (ulonglong)curpaddr); if ((ret = cache_page(curpaddr)) < 0) { if (CRASHDEBUG(8)) fprintf(fp, "read_diskdump: " "%s: cannot cache page: %llx\n", ret == SEEK_ERROR ? "SEEK_ERROR" : "READ_ERROR", (ulonglong)curpaddr); return ret; } } else if (CRASHDEBUG(8)) fprintf(fp, "read_diskdump: paddr/pfn: %llx/%lx" " -> physical page is cached: %llx\n", (ulonglong)paddr, pfn, (ulonglong)curpaddr); memcpy(bufptr, dd->curbufptr + page_offset, cnt); return cnt; }
void pci_vtd_quirk(const struct pci_dev *pdev) { int seg = pdev->seg; int bus = pdev->bus; int dev = PCI_SLOT(pdev->devfn); int func = PCI_FUNC(pdev->devfn); int pos; bool_t ff; u32 val, val2; u64 bar; paddr_t pa; const char *action; if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) != PCI_VENDOR_ID_INTEL ) return; switch ( pci_conf_read16(seg, bus, dev, func, PCI_DEVICE_ID) ) { /* * Mask reporting Intel VT-d faults to IOH core logic: * - Some platform escalates VT-d faults to platform errors. * - This can cause system failure upon non-fatal VT-d faults. * - Potential security issue if malicious guest trigger VT-d faults. */ case 0x0e28: /* Xeon-E5v2 (IvyBridge) */ case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */ case 0x3728: /* Xeon C5500/C3500 (JasperForest) */ case 0x3c28: /* Sandybridge */ val = pci_conf_read32(seg, bus, dev, func, 0x1AC); pci_conf_write32(seg, bus, dev, func, 0x1AC, val | (1 << 31)); printk(XENLOG_INFO "Masked VT-d error signaling on %04x:%02x:%02x.%u\n", seg, bus, dev, func); break; /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */ case 0x3400 ... 0x3407: /* host bridges */ case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */ /* JasperForest (Intel Xeon Processor C5500/C3500 */ case 0x3700 ... 0x370f: /* host bridges */ case 0x3720 ... 0x3724: /* root ports */ /* Sandybridge-EP (Romley) */ case 0x3c00: /* host bridge */ case 0x3c01 ... 0x3c0b: /* root ports */ pos = pci_find_ext_capability(seg, bus, pdev->devfn, PCI_EXT_CAP_ID_ERR); if ( !pos ) { pos = pci_find_ext_capability(seg, bus, pdev->devfn, PCI_EXT_CAP_ID_VNDR); while ( pos ) { val = pci_conf_read32(seg, bus, dev, func, pos + PCI_VNDR_HEADER); if ( PCI_VNDR_HEADER_ID(val) == 4 && PCI_VNDR_HEADER_REV(val) == 1 ) { pos += PCI_VNDR_HEADER; break; } pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, pos, PCI_EXT_CAP_ID_VNDR); } ff = 0; } else ff = pcie_aer_get_firmware_first(pdev); if ( !pos ) { printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER capability?\n", seg, bus, dev, func); break; } val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK); val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK); if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) ) action = "Found masked"; else if ( !ff ) { pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK, val | PCI_ERR_UNC_UNSUP); pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK, val2 | PCI_ERR_COR_ADV_NFAT); action = "Masked"; } else action = "Must not mask"; /* XPUNCERRMSK Send Completion with Unsupported Request */ val = pci_conf_read32(seg, bus, dev, func, 0x20c); pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4)); printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n", action, seg, bus, dev, func); break; case 0x0040: case 0x0044: case 0x0048: /* Nehalem/Westmere */ case 0x0100: case 0x0104: case 0x0108: /* Sandybridge */ case 0x0150: case 0x0154: case 0x0158: /* Ivybridge */ case 0x0a00: case 0x0a04: case 0x0a08: case 0x0a0f: /* Haswell ULT */ case 0x0c00: case 0x0c04: case 0x0c08: case 0x0c0f: /* Haswell */ case 0x0d00: case 0x0d04: case 0x0d08: case 0x0d0f: /* Haswell */ case 0x1600: case 0x1604: case 0x1608: case 0x160f: /* Broadwell */ case 0x1610: case 0x1614: case 0x1618: /* Broadwell */ case 0x1900: case 0x1904: case 0x1908: case 0x190c: case 0x190f: /* Skylake */ case 0x1910: case 0x1918: case 0x191f: /* Skylake */ bar = pci_conf_read32(seg, bus, dev, func, 0x6c); bar = (bar << 32) | pci_conf_read32(seg, bus, dev, func, 0x68); pa = bar & 0x7ffffff000UL; /* bits 12...38 */ if ( (bar & 1) && pa && page_is_ram_type(paddr_to_pfn(pa), RAM_TYPE_RESERVED) ) { u32 __iomem *va = ioremap(pa, PAGE_SIZE); if ( va ) { __set_bit(0x1c8 * 8 + 20, va); iounmap(va); printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n", seg, bus, dev, func); } else printk(XENLOG_ERR "Could not map %"PRIpaddr" for %04x:%02x:%02x.%u\n", pa, seg, bus, dev, func); } else printk(XENLOG_WARNING "Bogus DMIBAR %#"PRIx64" on %04x:%02x:%02x.%u\n", bar, seg, bus, dev, func); break; } }