/* * Build the initial pagetable. */ static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) { unsigned long start_address, end_address; unsigned long pfn_to_map, pt_pfn = *start_pfn; static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; unsigned long pt_mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); unsigned long offset; int count = 0; int rc; /* Be conservative: even if we know there will be more pages already mapped, start the loop at the very beginning. */ pfn_to_map = *start_pfn; if ( *max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START) ) { minios_printk("WARNING: Mini-OS trying to use Xen virtual space. " "Truncating memory from %dMB to ", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); minios_printk("%dMB\n", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); }
/* * Build the initial pagetable. */ static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) { unsigned long start_address, end_address; unsigned long pfn_to_map, pt_pfn = *start_pfn; static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; pgentry_t *tab = (pgentry_t *)start_info.pt_base, page; unsigned long pt_mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); unsigned long offset; int count = 0; int rc; pfn_to_map = (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES; if ( *max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START) ) { printk("WARNING: Mini-OS trying to use Xen virtual space. " "Truncating memory from %dMB to ", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); printk("%dMB\n", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); }
void arch_mm_init(unsigned long *pt_pfn, unsigned long pt_base, unsigned long nr_pt_frames, unsigned long nr_pages) { unsigned long start_pfn = (nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES; unsigned long end_pfn = nr_pages; build_page_tables((pgentry_t *)pt_base, pt_pfn, pfn_to_virt(start_pfn), pfn_to_virt(end_pfn)); }
static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, unsigned long attrs) { void *ret; /* * Our max_low_pfn should have been backed off by 16MB in * mm/init.c to create DMA coherent space. Use that as the VA * for the pool. */ if (coherent_pool == NULL) { coherent_pool = gen_pool_create(PAGE_SHIFT, -1); if (coherent_pool == NULL) panic("Can't create %s() memory pool!", __func__); else gen_pool_add(coherent_pool, pfn_to_virt(max_low_pfn), hexagon_coherent_pool_size, -1); } ret = (void *) gen_pool_alloc(coherent_pool, size); if (ret) { memset(ret, 0, size); *dma_addr = (dma_addr_t) virt_to_phys(ret); } else *dma_addr = ~0; return ret; }
void init_mm(void) { unsigned long start_pfn, max_pfn; printk("go_mm: initializing\n"); arch_init_mm(&start_pfn, &max_pfn); printk("go_mm: initializing best fit page allocator for %lx-%lx\n", (unsigned long)pfn_to_virt(start_pfn), (unsigned long)pfn_to_virt(max_pfn)); init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); printk("go_mm: done\n"); arch_init_p2m(max_pfn); arch_init_demand_mapping_area(max_pfn); }
static pgentry_t new_pt_page(unsigned long *pt_pfn, pgentry_t *higher_tab, unsigned int higher_off, int level) { // *pt_pfn is already mapped by domain builder // remap *pt_pfn as readonly, suitable for page table/directory // update higher_tab[higher_off] with reference to *pt_pfn // (*pt_pfn)++; pgentry_t prot_e, prot_t; prot_e = prot_t = 0; switch ( level ) { case L1_FRAME: prot_e = L1_PROT; prot_t = L2_PROT; break; case L2_FRAME: prot_e = L2_PROT; prot_t = L3_PROT; break; #if defined(__x86_64__) case L3_FRAME: prot_e = L3_PROT; prot_t = L4_PROT; break; #endif default: fatal_error("new_pt_page: level?"); } void *pt_va = pfn_to_virt(*pt_pfn); memset(pt_va, 0, PAGE_SIZE); // all entries not present unsigned long pte0 = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | (prot_e & ~_PAGE_RW); HYPERVISOR_update_va_mapping((unsigned long)pt_va, __pte(pte0), UVMF_INVLPG); pgentry_t pte = (pfn_to_mfn(*pt_pfn) << PAGE_SHIFT) | prot_t; struct mmu_update mu; mu.ptr = (virt_to_mfn(higher_tab) << PAGE_SHIFT) + sizeof(pgentry_t)*higher_off; mu.val = pte; HYPERVISOR_mmu_update(&mu, 1, 0, DOMID_SELF); (*pt_pfn)++; return pte; }
unsigned long alloc_num_pages_aligned(unsigned long c, int zero_bits) { unsigned long i, l, virt, align_mask; i = 0; align_mask = ~((1 << zero_bits) - 1); while(i + c < mm.num_pages) { virt = (unsigned long)pfn_to_virt(i); if(virt != (virt & align_mask)) { i++; continue; } l = bitmap_num_free(i, c); if(l == c) { bitmap_set(i, c); return virt; } i += l + 1; } printk("Cannot allocate %lu pages\n", c); return 0; }
/* * Make pt_pfn a new 'level' page table frame and hook it into the page * table at offset in previous level MFN (pref_l_mfn). pt_pfn is a guest * PFN. */ static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, unsigned long offset, unsigned long level) { pgentry_t *tab = (pgentry_t *)start_info.pt_base; unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); pgentry_t prot_e, prot_t; mmu_update_t mmu_updates[1]; int rc; prot_e = prot_t = 0; DEBUG("Allocating new L%d pt frame for pfn=%lx, " "prev_l_mfn=%lx, offset=%lx", level, *pt_pfn, prev_l_mfn, offset); /* We need to clear the page, otherwise we might fail to map it as a page table page */ memset((void*) pt_page, 0, PAGE_SIZE); switch ( level ) { case L1_FRAME: prot_e = L1_PROT; prot_t = L2_PROT; break; case L2_FRAME: prot_e = L2_PROT; prot_t = L3_PROT; break; #if defined(__x86_64__) case L3_FRAME: prot_e = L3_PROT; prot_t = L4_PROT; break; #endif default: printk("new_pt_frame() called with invalid level number %d\n", level); do_exit(); break; } /* Make PFN a page table page */ #if defined(__x86_64__) tab = pte_to_virt(tab[l4_table_offset(pt_page)]); #endif tab = pte_to_virt(tab[l3_table_offset(pt_page)]); mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + sizeof(pgentry_t) * l1_table_offset(pt_page); mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | (prot_e & ~_PAGE_RW); if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) { printk("ERROR: PTE for new page table page could not be updated\n"); printk(" mmu_update failed with rc=%d\n", rc); do_exit(); } /* Hook the new page table page into the hierarchy */ mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset; mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t; if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 ) { printk("ERROR: mmu_update failed with rc=%d\n", rc); do_exit(); } *pt_pfn += 1; }
pfn_to_map = (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES; if ( *max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START) ) { printk("WARNING: Mini-OS trying to use Xen virtual space. " "Truncating memory from %dMB to ", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); printk("%dMB\n", ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20); } start_address = (unsigned long)pfn_to_virt(pfn_to_map); end_address = (unsigned long)pfn_to_virt(*max_pfn); /* We worked out the virtual memory range to map, now mapping loop */ printk("Mapping memory range 0x%lx - 0x%lx\n", start_address, end_address); while ( start_address < end_address ) { tab = (pgentry_t *)start_info.pt_base; pt_mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); #if defined(__x86_64__) offset = l4_table_offset(start_address); /* Need new L3 pt frame */ if ( !(start_address & L3_MASK) ) if ( need_pt_frame(start_address, L3_FRAME) )
/* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. */ asmlinkage void do_page_fault(struct pt_regs *regs) { struct task_struct *tsk; struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr, cause; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; int fault, code = SEGV_MAPERR; cause = regs->scause; addr = regs->sbadaddr; tsk = current; mm = tsk->mm; /* * Fault-in kernel-space virtual memory on-demand. * The 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) goto vmalloc_fault; /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->sstatus & SR_PIE)) local_irq_enable(); /* * If we're in an interrupt, have no user context, or are running * in an atomic region, then we must not take the fault. */ if (unlikely(in_atomic() || !mm)) goto no_context; if (user_mode(regs)) flags |= FAULT_FLAG_USER; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); retry: down_read(&mm->mmap_sem); vma = find_vma(mm, addr); if (unlikely(!vma)) goto bad_area; if (likely(vma->vm_start <= addr)) goto good_area; if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) goto bad_area; if (unlikely(expand_stack(vma, addr))) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it. */ good_area: code = SEGV_ACCERR; switch (cause) { case EXC_INST_ACCESS: if (!(vma->vm_flags & VM_EXEC)) goto bad_area; break; case EXC_LOAD_ACCESS: if (!(vma->vm_flags & VM_READ)) goto bad_area; break; case EXC_STORE_ACCESS: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; break; default: panic("%s: unhandled cause %lu", __func__, cause); } /* * If for any reason at all we could not handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, addr, flags); /* * If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_sem because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk)) return; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } if (fault & VM_FAULT_RETRY) { /* * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~(FAULT_FLAG_ALLOW_RETRY); flags |= FAULT_FLAG_TRIED; /* * No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map. * Fix it, but check if it's kernel or user first. */ bad_area: up_read(&mm->mmap_sem); /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { do_trap(regs, SIGSEGV, code, addr, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); die(regs, "Oops"); do_exit(SIGKILL); /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ out_of_memory: up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk); return; vmalloc_fault: { pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; int index; if (user_mode(regs)) goto bad_area; /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk->active_mm->pgd" here. * We might be inside an interrupt in the middle * of a task switch. */ index = pgd_index(addr); pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, addr); pud_k = pud_offset(pgd_k, addr); if (!pud_present(*pud_k)) goto no_context; /* Since the vmalloc area is global, it is unnecessary to copy individual PTEs */ pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); /* Make sure the actual PTE exists as well to * catch kernel vmalloc-area accesses to non-mapped * addresses. If we don't do this, this will just * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, addr); if (!pte_present(*pte_k)) goto no_context; return; } }