/* * Trying to stop swapping from a file is fraught with races, so * we repeat quite a bit here when we have to pause. swapoff() * isn't exactly timing-critical, so who cares (but this is /really/ * inefficient, ugh). * * We return 1 after having slept, which makes the process start over * from the beginning for this process.. */ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, unsigned int type, unsigned long page) { pte_t pte = *dir; if (pte_none(pte)) return 0; if (pte_present(pte)) { unsigned long page_nr = MAP_NR(pte_page(pte)); if (page_nr >= MAP_NR(high_memory)) return 0; if (!in_swap_cache(page_nr)) return 0; if (SWP_TYPE(in_swap_cache(page_nr)) != type) return 0; delete_from_swap_cache(page_nr); set_pte(dir, pte_mkdirty(pte)); return 0; } if (SWP_TYPE(pte_val(pte)) != type) return 0; read_swap_page(pte_val(pte), (char *) page); #if 0 /* Is this really needed here, hasn't it been solved elsewhere? */ flush_page_to_ram(page); #endif if (pte_val(*dir) != pte_val(pte)) { free_page(page); return 1; } set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)))); flush_tlb_page(vma, address); ++vma->vm_mm->rss; swap_free(pte_val(pte)); return 1; }
// init_mm.page_table_lock must be held before calling! void pram_writeable(void * vaddr, unsigned long size, int rw) { unsigned long addr = (unsigned long)vaddr & PAGE_MASK; unsigned long end = (unsigned long)vaddr + size; unsigned long start = addr; do { pram_page_writeable(addr, rw); addr += PAGE_SIZE; } while (addr && (addr < end)); /* * FIXME: can't use flush_tlb_page/range() until these * routines support flushing memory regions owned by * init_mm (so far only PPC versions). */ #if 0 if (end <= start + PAGE_SIZE) flush_tlb_page(find_vma(&init_mm,start), start); else flush_tlb_range(&init_mm, start, end); #else flush_tlb_all(); #endif }
pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { pte_t pte; pte = ptep_get_and_clear((vma)->vm_mm, address, ptep); flush_tlb_page(vma, address); return pte; }
static int page_clear_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { pte_val(*pte) &= ~_PAGE_CI; flush_tlb_page(NULL, addr); return 0; }
/* * The above separate functions for the no-page and wp-page * cases will go away (they mostly do the same thing anyway), * and we'll instead use only a general "handle_mm_fault()". * * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). */ static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { if (!pte_present(*pte)) { do_no_page(current, vma, address, write_access); return; } set_pte(pte, pte_mkyoung(*pte)); flush_tlb_page(vma, address); if (!write_access) return; if (pte_write(*pte)) { set_pte(pte, pte_mkdirty(*pte)); flush_tlb_page(vma, address); return; } do_wp_page(current, vma, address, write_access); }
int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { int young; young = ptep_test_and_clear_young(vma, address, ptep); if (young) flush_tlb_page(vma, address); return young; }
pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { struct mm_struct *mm = (vma)->vm_mm; pte_t pte; pte = ptep_get_and_clear(mm, address, ptep); if (pte_accessible(mm, pte)) flush_tlb_page(vma, address); return pte; }
static int cramfs_mmap(struct file *file, struct vm_area_struct *vma) { unsigned long address, length; struct inode *inode = file->f_dentry->d_inode; struct super_block *sb = inode->i_sb; /* this is only used in the case of read-only maps for XIP */ if (vma->vm_flags & VM_WRITE) return generic_file_mmap(file, vma); if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) return -EINVAL; address = PAGE_ALIGN(sb->CRAMFS_SB_LINEAR_PHYS_ADDR + OFFSET(inode)); address += vma->vm_pgoff << PAGE_SHIFT; length = vma->vm_end - vma->vm_start; if (length > inode->i_size) length = inode->i_size; length = PAGE_ALIGN(length); #if 0 /* Doing the following makes it slower and more broken. bdl */ /* * Accessing memory above the top the kernel knows about or * through a file pointer that was marked O_SYNC will be * done non-cached. */ vma->vm_page_prot = __pgprot((pgprot_val(vma->vm_page_prot) & ~_CACHE_MASK) | _CACHE_UNCACHED); #endif /* * Don't dump addresses that are not real memory to a core file. */ vma->vm_flags |= VM_IO; flush_tlb_page(vma, address); if (remap_page_range(vma->vm_start, address, length, vma->vm_page_prot)) return -EAGAIN; #ifdef DEBUG_CRAMFS_XIP printk("cramfs_mmap: mapped %s at 0x%08lx, length %lu to vma 0x%08lx" ", page_prot 0x%08lx\n", file->f_dentry->d_name.name, address, length, vma->vm_start, pgprot_val(vma->vm_page_prot)); #endif return 0; }
/* * Only sets the access flags (dirty, accessed, and * writable). Furthermore, we know it always gets set to a "more * permissive" setting, which allows most architectures to optimize * this. We return whether the PTE actually changed, which in turn * instructs the caller to do things like update__mmu_cache. This * used to be done in the caller, but sparc needs minor faults to * force that call on sun4c so we changed this macro slightly */ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { int changed = !pte_same(*ptep, entry); if (changed) { set_pte_at(vma->vm_mm, address, ptep, entry); flush_tlb_page(vma, address); } return changed; }
void m8xx_cpm_reset(uint host_page_addr) { volatile immap_t *imp; volatile cpm8xx_t *commproc; pte_t *pte; imp = (immap_t *)IMAP_ADDR; commproc = (cpm8xx_t *)&imp->im_cpm; #ifdef CONFIG_UCODE_PATCH /* Perform a reset. */ commproc->cp_cpcr = (CPM_CR_RST | CPM_CR_FLG); /* Wait for it. */ while (commproc->cp_cpcr & CPM_CR_FLG); cpm_load_patch(imp); #endif /* Set SDMA Bus Request priority 5. * On 860T, this also enables FEC priority 6. I am not sure * this is what we realy want for some applications, but the * manual recommends it. * Bit 25, FAM can also be set to use FEC aggressive mode (860T). */ imp->im_siu_conf.sc_sdcr = 1; /* Reclaim the DP memory for our use. */ m8xx_cpm_dpinit(); /* Set the host page for allocation. */ host_buffer = host_page_addr; /* Host virtual page address */ host_end = host_page_addr + PAGE_SIZE; /* We need to get this page early, so I have to do it the * hard way. */ if (get_pteptr(&init_mm, host_page_addr, &pte)) { pte_val(*pte) |= _PAGE_NO_CACHE; flush_tlb_page(init_mm.mmap, host_buffer); } else { panic("Huh? No CPM host page?"); } /* Tell everyone where the comm processor resides. */ cpmp = (cpm8xx_t *)commproc; }
static int page_clear_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { pte_val(*pte) &= ~_PAGE_CI; /* * Flush the page out of the TLB so that the new page flags get * picked up next time there's an access */ flush_tlb_page(NULL, addr); return 0; }
/* * This is called when relaxing access to a PTE. It's also called in the page * fault path when we don't hit any of the major fault cases, ie, a minor * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have * handled those two for us, we additionally deal with missing execute * permission here on some processors */ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty) { int changed; entry = set_access_flags_filter(entry, vma, dirty); changed = !pte_same(*(ptep), entry); if (changed) { if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(vma->vm_mm, ptep, entry, address); flush_tlb_page(vma, address); } return changed; }
static int page_set_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { unsigned long cl; pte_val(*pte) |= _PAGE_CI; flush_tlb_page(NULL, addr); for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo.dcache_block_size) mtspr(SPR_DCBFR, cl); return 0; }
/* * Called with mm->page_table_lock held to protect against other * threads/the swapper from ripping pte's out from under us. */ static int filemap_sync_pte(pte_t *ptep, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { pte_t pte = *ptep; if (pte_present(pte) && pte_dirty(pte)) { struct page *page; unsigned long pfn = pte_pfn(pte); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); if (!PageReserved(page) && ptep_test_and_clear_dirty(ptep)) { flush_tlb_page(vma, address); set_page_dirty(page); } } } return 0; }
static int page_set_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { unsigned long cl; pte_val(*pte) |= _PAGE_CI; /* * Flush the page out of the TLB so that the new page flags get * picked up next time there's an access */ flush_tlb_page(NULL, addr); /* Flush page out of dcache */ for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo.dcache_block_size) mtspr(SPR_DCBFR, cl); return 0; }
int reflect_irq_to_realmode( SysCallRegs_s * psCallRegs, int num ) { pgd_t *pPgd = pgd_offset( g_psKernelSeg, 0 ); pte_t *pPte = pte_offset( pPgd, 0 ); Virtual86Struct_s sRegs; uint32 *pIntVects = NULL; uint32 *pnStack; uint32 nFlags; num &= 0xff; memset( &sRegs, 0, sizeof( sRegs ) ); nFlags = cli(); kassertw( get_processor_id() == g_nBootCPU ); // We need access to the first page to read the IVT PTE_VALUE( *pPte ) |= PTE_PRESENT; flush_tlb_page( 0 ); sRegs.regs.eip = pIntVects[num] & 0xffff; sRegs.regs.cs = pIntVects[num] >> 16; pnStack = ( uint32 * )( ( v86Stack_seg << 4 ) + v86Stack_off ); pnStack[0] = 0xffffffff; sRegs.regs.esp = v86Stack_off; sRegs.regs.ss = v86Stack_seg; v86Stack_off -= V86_STACK_SIZE; put_cpu_flags( nFlags ); call_v86( &sRegs ); // do_call_v86( &sRegs, psCallRegs ); v86Stack_off += V86_STACK_SIZE; return ( 0 ); }
inline int make_page_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; spinlock_t *ptl; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) goto no_page; pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) goto no_page; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto no_page; BUG_ON(pmd_trans_huge(*pmd)); pte = pte_offset_map_lock(mm, pmd, addr, &ptl); if (!pte_present(*pte)) { spin_unlock(ptl); goto no_page; } ptep_set_wrprotect(mm, addr, pte); spin_unlock(ptl); #if !defined(CONFIG_GRAPHENE_BULK_IPC) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0) my_flush_tlb_page(vma, addr); #else flush_tlb_page(vma, addr); #endif DEBUG("make page COW at %lx\n", addr); return 0; no_page: return -EFAULT; }
void update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) { unsigned long pfn = pte_pfn(*ptep); struct page *page; if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); /* Invalidate old entry in TLBs */ flush_tlb_page(vma, addr); #if (DCACHE_WAY_SIZE > PAGE_SIZE) if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) { unsigned long phys = page_to_phys(page); unsigned long tmp; tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); __invalidate_icache_page_alias(tmp, phys); clear_bit(PG_arch_1, &page->flags); } #else if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags) && (vma->vm_flags & VM_EXEC) != 0) { unsigned long paddr = (unsigned long)kmap_atomic(page); __flush_dcache_page(paddr); __invalidate_icache_page(paddr); set_bit(PG_arch_1, &page->flags); kunmap_atomic((void *)paddr); } #endif }
static int move_one_page(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr) { struct mm_struct *mm = vma->vm_mm; struct pte_chain * pte_chain; int error = 0; pte_t *src, *dst; pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) return -1; spin_lock(&mm->page_table_lock); src = get_one_pte_map_nested(mm, old_addr); if (src) { /* * Look to see whether alloc_one_pte_map needs to perform a * memory allocation. If it does then we need to drop the * atomic kmap */ if (!page_table_present(mm, new_addr)) { pte_unmap_nested(src); src = NULL; } dst = alloc_one_pte_map(mm, new_addr); if (src == NULL) src = get_one_pte_map_nested(mm, old_addr); if (src) { error = copy_one_pte(vma, src, dst, old_addr, new_addr, &pte_chain); pte_unmap_nested(src); } pte_unmap(dst); } flush_tlb_page(vma, old_addr); spin_unlock(&mm->page_table_lock); pte_chain_free(pte_chain); return error; }
void m8xx_cpm_reset(uint host_page_addr) { volatile immap_t *imp; volatile cpm8xx_t *commproc; pte_t *pte; imp = (immap_t *)IMAP_ADDR; commproc = (cpm8xx_t *)&imp->im_cpm; #ifdef notdef /* We can't do this. It seems to blow away the microcode * patch that EPPC-Bug loaded for us. EPPC-Bug uses SCC1 for * Ethernet, SMC1 for the console, and I2C for serial EEPROM. * Our own drivers quickly reset all of these. */ /* Perform a reset. */ commproc->cp_cpcr = (CPM_CR_RST | CPM_CR_FLG); /* Wait for it. */ while (commproc->cp_cpcr & CPM_CR_FLG); #endif /* Set SDMA Bus Request priority 5. * On 860T, this also enables FEC priority 6. I am not sure * this is what we realy want for some applications, but the * manual recommends it. * Bit 25, FAM can also be set to use FEC aggressive mode (860T). */ imp->im_siu_conf.sc_sdcr = 1; /* Reclaim the DP memory for our use. */ dp_alloc_base = CPM_DATAONLY_BASE; dp_alloc_top = dp_alloc_base + CPM_DATAONLY_SIZE; /* Set the host page for allocation. */ host_buffer = host_page_addr; /* Host virtual page address */ host_end = host_page_addr + PAGE_SIZE; pte = va_to_pte(&init_task, host_page_addr); pte_val(*pte) |= _PAGE_NO_CACHE; flush_tlb_page(current->mm->mmap, host_buffer); /* Tell everyone where the comm processor resides. */ cpmp = (cpm8xx_t *)commproc; /* Initialize the CPM interrupt controller. */ ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cicr = (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) | (((5)/2) << 13) | CICR_HP_MASK; /* I hard coded the CPM interrupt to 5 above * since the CPM_INTERRUPT define is relative to * the linux irq structure not what the hardware * belives. -- Cort */ ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cimr = 0; /* Set our interrupt handler with the core CPU. */ if (request_irq(CPM_INTERRUPT, cpm_interrupt, 0, "cpm", NULL) != 0) panic("Could not allocate CPM IRQ!"); /* Install our own error handler. */ cpm_install_handler(CPMVEC_ERROR, cpm_error_interrupt, NULL); ((immap_t *)IMAP_ADDR)->im_cpic.cpic_cicr |= CICR_IEN; }
/* * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by * segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; int err = -EFAULT; *code_out = SEGV_MAPERR; /* * If the fault was during atomic operation, don't take the fault, just * fail. */ if (in_atomic()) goto out_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto out; else if (vma->vm_start <= address) goto good_area; else if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; else if (is_user && !ARCH_IS_STACKGROW(address)) goto out; else if (expand_stack(vma, address)) goto out; good_area: *code_out = SEGV_ACCERR; if (is_write && !(vma->vm_flags & VM_WRITE)) goto out; /* Don't require VM_READ|VM_EXEC for write faults! */ if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; do { int fault; survive: fault = handle_mm_fault(mm, vma, address, is_write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { err = -ENOMEM; goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; } BUG(); } if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); err = 0; /* * The below warning was added in place of * pte_mkyoung(); if (is_write) pte_mkdirty(); * If it's triggered, we'd see normally a hang here (a clean pte is * marked read-only to emulate the dirty bit). * However, the generic code can mark a PTE writable but clean on a * concurrent read fault, triggering this harmlessly. So comment it out. */ #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); out_nosemaphore: return err; /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: if (is_global_init(current)) { up_read(&mm->mmap_sem); yield(); down_read(&mm->mmap_sem); goto survive; } goto out; }
/* * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by * segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; int err = -EFAULT; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (is_write ? FAULT_FLAG_WRITE : 0); *code_out = SEGV_MAPERR; /* * If the fault was during atomic operation, don't take the fault, just * fail. */ if (in_atomic()) goto out_nosemaphore; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto out; else if (vma->vm_start <= address) goto good_area; else if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; else if (is_user && !ARCH_IS_STACKGROW(address)) goto out; else if (expand_stack(vma, address)) goto out; good_area: *code_out = SEGV_ACCERR; if (is_write && !(vma->vm_flags & VM_WRITE)) goto out; /* Don't require VM_READ|VM_EXEC for write faults! */ if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; do { int fault; fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; } BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); err = 0; /* * The below warning was added in place of * pte_mkyoung(); if (is_write) pte_mkdirty(); * If it's triggered, we'd see normally a hang here (a clean pte is * marked read-only to emulate the dirty bit). * However, the generic code can mark a PTE writable but clean on a * concurrent read fault, triggering this harmlessly. So comment it out. */ #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); out_nosemaphore: return err; out_of_memory: /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ up_read(&mm->mmap_sem); pagefault_out_of_memory(); return 0; }
/* mm->page_table_lock is held. mmap_sem is not held */ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone) { pte_t pte; swp_entry_t entry; /* Don't look at this pte if it's been accessed recently. */ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { mark_page_accessed(page); return 0; } /* Don't bother unmapping pages that are active */ if (PageActive(page)) return 0; /* Don't bother replenishing zones not under pressure.. */ if (!memclass(page->zone, classzone)) return 0; if (TryLockPage(page)) return 0; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ flush_cache_page(vma, address); pte = ptep_get_and_clear(page_table); flush_tlb_page(vma, address); if (pte_dirty(pte)) set_page_dirty(page); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. */ if (PageSwapCache(page)) { entry.val = page->index; swap_duplicate(entry); set_swap_pte: set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: mm->rss--; UnlockPage(page); { int freeable = page_count(page) - !!page->buffers <= 2; page_cache_release(page); return freeable; } } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. or if it's dirty but has backing store, * just mark the page dirty and drop it. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ if (page->mapping) goto drop_pte; if (!PageDirty(page)) goto drop_pte; /* * Anonymous buffercache pages can be left behind by * concurrent truncate and pagefault. */ if (page->buffers) goto preserve; /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ for (;;) { entry = get_swap_page(); if (!entry.val) break; /* Add it to the swap cache and mark it dirty * (adding to the page cache will clear the dirty * and uptodate bits, so we need to do it again) */ if (add_to_swap_cache(page, entry) == 0) { SetPageUptodate(page); set_page_dirty(page); goto set_swap_pte; } /* Raced with "speculative" read_swap_cache_async */ swap_free(entry); } /* No swap space left */ preserve: set_pte(page_table, pte); UnlockPage(page); return 0; }
/* * Establish a new mapping: * - flush the old one * - update the page tables * - inform the TLB about the new one */ static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) { set_pte(page_table, entry); flush_tlb_page(vma, address); update_mmu_cache(vma, address, entry); }
/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. */ void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t *page_dir; pmd_t *page_middle; pte_t *page_table, pte; unsigned long old_page, new_page; new_page = __get_free_page(GFP_KERNEL); page_dir = pgd_offset(vma->vm_mm, address); if (pgd_none(*page_dir)) goto end_wp_page; if (pgd_bad(*page_dir)) goto bad_wp_pagedir; page_middle = pmd_offset(page_dir, address); if (pmd_none(*page_middle)) goto end_wp_page; if (pmd_bad(*page_middle)) goto bad_wp_pagemiddle; page_table = pte_offset(page_middle, address); pte = *page_table; if (!pte_present(pte)) goto end_wp_page; if (pte_write(pte)) goto end_wp_page; old_page = pte_page(pte); if (old_page >= high_memory) goto bad_wp_page; tsk->min_flt++; /* * Do we need to copy? */ if (mem_map[MAP_NR(old_page)].count != 1) { if (new_page) { if (PageReserved(mem_map + MAP_NR(old_page))) ++vma->vm_mm->rss; copy_page(old_page,new_page); flush_page_to_ram(old_page); flush_page_to_ram(new_page); flush_cache_page(vma, address); set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); free_page(old_page); flush_tlb_page(vma, address); return; } flush_cache_page(vma, address); set_pte(page_table, BAD_PAGE); flush_tlb_page(vma, address); free_page(old_page); oom(tsk); return; } flush_cache_page(vma, address); set_pte(page_table, pte_mkdirty(pte_mkwrite(pte))); flush_tlb_page(vma, address); if (new_page) free_page(new_page); return; bad_wp_page: printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); send_sig(SIGKILL, tsk, 1); goto end_wp_page; bad_wp_pagemiddle: printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle)); send_sig(SIGKILL, tsk, 1); goto end_wp_page; bad_wp_pagedir: printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir)); send_sig(SIGKILL, tsk, 1); end_wp_page: if (new_page) free_page(new_page); return; }
int __init m8xx_enet_init(void) { struct device *dev; struct cpm_enet_private *cep; int i, j; unsigned char *eap; unsigned long mem_addr; pte_t *pte; bd_t *bd; volatile cbd_t *bdp; volatile cpm8xx_t *cp; volatile scc_t *sccp; volatile scc_enet_t *ep; volatile immap_t *immap; cp = cpmp; /* Get pointer to Communication Processor */ immap = (immap_t *)IMAP_ADDR; /* and to internal registers */ bd = (bd_t *)res; /* Allocate some private information. */ cep = (struct cpm_enet_private *)kmalloc(sizeof(*cep), GFP_KERNEL); /*memset(cep, 0, sizeof(*cep));*/ __clear_user(cep,sizeof(*cep)); /* Create an Ethernet device instance. */ dev = init_etherdev(0, 0); /* Get pointer to SCC area in parameter RAM. */ ep = (scc_enet_t *)(&cp->cp_dparam[PROFF_ENET]); /* And another to the SCC register area. */ sccp = (volatile scc_t *)(&cp->cp_scc[SCC_ENET]); cep->sccp = (scc_t *)sccp; /* Keep the pointer handy */ /* Disable receive and transmit in case EPPC-Bug started it. */ sccp->scc_gsmrl &= ~(SCC_GSMRL_ENR | SCC_GSMRL_ENT); /* Cookbook style from the MPC860 manual..... * Not all of this is necessary if EPPC-Bug has initialized * the network. * So far we are lucky, all board configurations use the same * pins, or at least the same I/O Port for these functions..... * It can't last though...... */ /* Configure port A pins for Txd and Rxd. */ immap->im_ioport.iop_papar |= (PA_ENET_RXD | PA_ENET_TXD); immap->im_ioport.iop_padir &= ~(PA_ENET_RXD | PA_ENET_TXD); immap->im_ioport.iop_paodr &= ~PA_ENET_TXD; /* Configure port C pins to enable CLSN and RENA. */ immap->im_ioport.iop_pcpar &= ~(PC_ENET_CLSN | PC_ENET_RENA); immap->im_ioport.iop_pcdir &= ~(PC_ENET_CLSN | PC_ENET_RENA); immap->im_ioport.iop_pcso |= (PC_ENET_CLSN | PC_ENET_RENA); /* Configure port A for TCLK and RCLK. */ immap->im_ioport.iop_papar |= (PA_ENET_TCLK | PA_ENET_RCLK); immap->im_ioport.iop_padir &= ~(PA_ENET_TCLK | PA_ENET_RCLK); /* Configure Serial Interface clock routing. * First, clear all SCC bits to zero, then set the ones we want. */ cp->cp_sicr &= ~SICR_ENET_MASK; cp->cp_sicr |= SICR_ENET_CLKRT; /* Manual says set SDDR, but I can't find anything with that * name. I think it is a misprint, and should be SDCR. This * has already been set by the communication processor initialization. */ /* Allocate space for the buffer descriptors in the DP ram. * These are relative offsets in the DP ram address space. * Initialize base addresses for the buffer descriptors. */ i = m8xx_cpm_dpalloc(sizeof(cbd_t) * RX_RING_SIZE); ep->sen_genscc.scc_rbase = i; cep->rx_bd_base = (cbd_t *)&cp->cp_dpmem[i]; i = m8xx_cpm_dpalloc(sizeof(cbd_t) * TX_RING_SIZE); ep->sen_genscc.scc_tbase = i; cep->tx_bd_base = (cbd_t *)&cp->cp_dpmem[i]; cep->dirty_tx = cep->cur_tx = cep->tx_bd_base; cep->cur_rx = cep->rx_bd_base; /* Issue init Rx BD command for SCC. * Manual says to perform an Init Rx parameters here. We have * to perform both Rx and Tx because the SCC may have been * already running. * In addition, we have to do it later because we don't yet have * all of the BD control/status set properly. cp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_INIT_RX) | CPM_CR_FLG; while (cp->cp_cpcr & CPM_CR_FLG); */ /* Initialize function code registers for big-endian. */ ep->sen_genscc.scc_rfcr = SCC_EB; ep->sen_genscc.scc_tfcr = SCC_EB; /* Set maximum bytes per receive buffer. * This appears to be an Ethernet frame size, not the buffer * fragment size. It must be a multiple of four. */ ep->sen_genscc.scc_mrblr = PKT_MAXBLR_SIZE; /* Set CRC preset and mask. */ ep->sen_cpres = 0xffffffff; ep->sen_cmask = 0xdebb20e3; ep->sen_crcec = 0; /* CRC Error counter */ ep->sen_alec = 0; /* alignment error counter */ ep->sen_disfc = 0; /* discard frame counter */ ep->sen_pads = 0x8888; /* Tx short frame pad character */ ep->sen_retlim = 15; /* Retry limit threshold */ ep->sen_maxflr = PKT_MAXBUF_SIZE; /* maximum frame length register */ ep->sen_minflr = PKT_MINBUF_SIZE; /* minimum frame length register */ ep->sen_maxd1 = PKT_MAXBUF_SIZE; /* maximum DMA1 length */ ep->sen_maxd2 = PKT_MAXBUF_SIZE; /* maximum DMA2 length */ /* Clear hash tables. */ ep->sen_gaddr1 = 0; ep->sen_gaddr2 = 0; ep->sen_gaddr3 = 0; ep->sen_gaddr4 = 0; ep->sen_iaddr1 = 0; ep->sen_iaddr2 = 0; ep->sen_iaddr3 = 0; ep->sen_iaddr4 = 0; /* Set Ethernet station address. * * If we performed a MBX diskless boot, the Ethernet controller * has been initialized and we copy the address out into our * own structure. */ eap = (unsigned char *)&(ep->sen_paddrh); #ifndef CONFIG_MBX for (i=5; i>=0; i--) *eap++ = dev->dev_addr[i] = bd->bi_enetaddr[i]; #else for (i=5; i>=0; i--) dev->dev_addr[i] = *eap++; #endif ep->sen_pper = 0; /* 'cause the book says so */ ep->sen_taddrl = 0; /* temp address (LSB) */ ep->sen_taddrm = 0; ep->sen_taddrh = 0; /* temp address (MSB) */ /* Now allocate the host memory pages and initialize the * buffer descriptors. */ bdp = cep->tx_bd_base; for (i=0; i<TX_RING_SIZE; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = 0; bdp->cbd_bufaddr = 0; bdp++; } /* Set the last buffer to wrap. */ bdp--; bdp->cbd_sc |= BD_SC_WRAP; bdp = cep->rx_bd_base; for (i=0; i<CPM_ENET_RX_PAGES; i++) { /* Allocate a page. */ mem_addr = __get_free_page(GFP_KERNEL); /* Make it uncached. */ pte = va_to_pte(&init_task, mem_addr); pte_val(*pte) |= _PAGE_NO_CACHE; flush_tlb_page(current->mm->mmap, mem_addr); /* Initialize the BD for every fragment in the page. */ for (j=0; j<CPM_ENET_RX_FRPPG; j++) { bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR; bdp->cbd_bufaddr = __pa(mem_addr); mem_addr += CPM_ENET_RX_FRSIZE; bdp++; } } /* Set the last buffer to wrap. */ bdp--; bdp->cbd_sc |= BD_SC_WRAP; /* Let's re-initialize the channel now. We have to do it later * than the manual describes because we have just now finished * the BD initialization. */ cp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_INIT_TRX) | CPM_CR_FLG; while (cp->cp_cpcr & CPM_CR_FLG); cep->skb_cur = cep->skb_dirty = 0; sccp->scc_scce = 0xffff; /* Clear any pending events */ /* Enable interrupts for transmit error, complete frame * received, and any transmit buffer we have also set the * interrupt flag. */ sccp->scc_sccm = (SCCE_ENET_TXE | SCCE_ENET_RXF | SCCE_ENET_TXB); /* Install our interrupt handler. */ cpm_install_handler(CPMVEC_ENET, cpm_enet_interrupt, dev); /* Set GSMR_H to enable all normal operating modes. * Set GSMR_L to enable Ethernet to MC68160. */ sccp->scc_gsmrh = 0; sccp->scc_gsmrl = (SCC_GSMRL_TCI | SCC_GSMRL_TPL_48 | SCC_GSMRL_TPP_10 | SCC_GSMRL_MODE_ENET); /* Set sync/delimiters. */ sccp->scc_dsr = 0xd555; /* Set processing mode. Use Ethernet CRC, catch broadcast, and * start frame search 22 bit times after RENA. */ sccp->scc_pmsr = (SCC_PMSR_ENCRC | SCC_PMSR_NIB22); /* It is now OK to enable the Ethernet transmitter. * Unfortunately, there are board implementation differences here. */ #ifdef CONFIG_MBX immap->im_ioport.iop_pcpar |= PC_ENET_TENA; immap->im_ioport.iop_pcdir &= ~PC_ENET_TENA; #endif #if defined(CONFIG_RPXLITE) || defined(CONFIG_RPXCLASSIC) cp->cp_pbpar |= PB_ENET_TENA; cp->cp_pbdir |= PB_ENET_TENA; /* And while we are here, set the configuration to enable ethernet. */ *((volatile uint *)RPX_CSR_ADDR) &= ~BCSR0_ETHLPBK; *((volatile uint *)RPX_CSR_ADDR) |= (BCSR0_ETHEN | BCSR0_COLTESTDIS | BCSR0_FULLDPLXDIS); #endif #ifdef CONFIG_BSEIP cp->cp_pbpar |= PB_ENET_TENA; cp->cp_pbdir |= PB_ENET_TENA; /* BSE uses port B and C for PHY control. */ cp->cp_pbpar &= ~(PB_BSE_POWERUP | PB_BSE_FDXDIS); cp->cp_pbdir |= (PB_BSE_POWERUP | PB_BSE_FDXDIS); cp->cp_pbdat |= (PB_BSE_POWERUP | PB_BSE_FDXDIS); immap->im_ioport.iop_pcpar &= ~PC_BSE_LOOPBACK; immap->im_ioport.iop_pcdir |= PC_BSE_LOOPBACK; immap->im_ioport.iop_pcso &= ~PC_BSE_LOOPBACK; immap->im_ioport.iop_pcdat &= ~PC_BSE_LOOPBACK; #endif dev->base_addr = (unsigned long)ep; dev->priv = cep; dev->name = "CPM_ENET"; /* The CPM Ethernet specific entries in the device structure. */ dev->open = cpm_enet_open; dev->hard_start_xmit = cpm_enet_start_xmit; dev->stop = cpm_enet_close; dev->get_stats = cpm_enet_get_stats; dev->set_multicast_list = set_multicast_list; /* And last, enable the transmit and receive processing. */ sccp->scc_gsmrl |= (SCC_GSMRL_ENR | SCC_GSMRL_ENT); printk("CPM ENET Version 0.1, "); for (i=0; i<5; i++) printk("%02x:", dev->dev_addr[i]); printk("%02x\n", dev->dev_addr[5]); return 0; }
/* * The swap-out functions return 1 if they successfully * threw something out, and we got a free page. It returns * zero if it couldn't do anything, and any other value * indicates it decreased rss, but the page was shared. * * NOTE! If it sleeps, it *must* return 1 to make sure we * don't continue with the swap-out. Otherwise we may be * using a process that no longer actually exists (it might * have died while we slept). */ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) { pte_t pte; swp_entry_t entry; struct page * page; int onlist; pte = *page_table; if (!pte_present(pte)) goto out_failed; page = pte_page(pte); if ((!VALID_PAGE(page)) || PageReserved(page)) goto out_failed; if (mm->swap_cnt) mm->swap_cnt--; onlist = PageActive(page); /* Don't look at this pte if it's been accessed recently. */ if (ptep_test_and_clear_young(page_table)) { age_page_up(page); goto out_failed; } if (!onlist) /* The page is still mapped, so it can't be freeable... */ age_page_down_ageonly(page); /* * If the page is in active use by us, or if the page * is in active use by others, don't unmap it or * (worse) start unneeded IO. */ if (page->age > 0) goto out_failed; if (TryLockPage(page)) goto out_failed; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ pte = ptep_get_and_clear(page_table); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. * * Return 0, as we didn't actually free any real * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { entry.val = page->index; if (pte_dirty(pte)) set_page_dirty(page); set_swap_pte: swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: UnlockPage(page); mm->rss--; flush_tlb_page(vma, address); deactivate_page(page); page_cache_release(page); out_failed: return 0; } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ flush_cache_page(vma, address); if (!pte_dirty(pte)) goto drop_pte; /* * Ok, it's really dirty. That means that * we should either create a new swap cache * entry for it, or we should write it back * to its own backing store. */ if (page->mapping) { set_page_dirty(page); goto drop_pte; } /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ entry = get_swap_page(); if (!entry.val) goto out_unlock_restore; /* No swap space left */ /* Add it to the swap cache and mark it dirty */ add_to_swap_cache(page, entry); set_page_dirty(page); goto set_swap_pte; out_unlock_restore: set_pte(page_table, pte); UnlockPage(page); return 0; }
static int fault_in_page(int taskid, struct vm_area_struct *vma, unsigned long address, int write) { static unsigned last_address; static int last_task, loop_counter; struct task_struct *tsk = task[taskid]; pgd_t *pgd; pmd_t *pmd; pte_t *pte; if (!tsk || !tsk->mm) return 1; if (!vma || (write && !(vma->vm_flags & VM_WRITE))) goto bad_area; if (vma->vm_start > address) goto bad_area; if (address == last_address && taskid == last_task) { loop_counter++; } else { loop_counter = 0; last_address = address; last_task = taskid; } if (loop_counter == WRITE_LIMIT && !write) { printk("MSC bug? setting write request\n"); stats.errors++; write = 1; } if (loop_counter == LOOP_LIMIT) { printk("MSC bug? failing request\n"); stats.errors++; return 1; } pgd = pgd_offset(vma->vm_mm, address); pmd = pmd_alloc(pgd,address); if(!pmd) goto no_memory; pte = pte_alloc(pmd, address); if(!pte) goto no_memory; if(!pte_present(*pte)) { handle_mm_fault(tsk, vma, address, write); goto finish_up; } set_pte(pte, pte_mkyoung(*pte)); flush_tlb_page(vma, address); if(!write) goto finish_up; if(pte_write(*pte)) { set_pte(pte, pte_mkdirty(*pte)); flush_tlb_page(vma, address); goto finish_up; } handle_mm_fault(tsk, vma, address, write); /* Fall through for do_wp_page */ finish_up: stats.success++; return 0; no_memory: stats.failure++; oom(tsk); return 1; bad_area: stats.failure++; tsk->tss.sig_address = address; tsk->tss.sig_desc = SUBSIG_NOMAPPING; send_sig(SIGSEGV, tsk, 1); return 1; }
int sys_realint( int num, struct RMREGS *rm ) { pgd_t *pPgd = pgd_offset( g_psKernelSeg, 0 ); pte_t *pPte = pte_offset( pPgd, 0 ); Thread_s *psThread = CURRENT_THREAD; Virtual86Struct_s sRegs; uint32 *pIntVects = NULL; uint32 *pnStack; uint32 nFlags; sRegs.regs.eax = rm->EAX; sRegs.regs.orig_eax = rm->EAX; sRegs.regs.ebx = rm->EBX; sRegs.regs.ecx = rm->ECX; sRegs.regs.edx = rm->EDX; sRegs.regs.edi = rm->EDI; sRegs.regs.esi = rm->ESI; sRegs.regs.ebp = rm->EBP; sRegs.regs.eflags = rm->flags; sRegs.regs.ds = rm->DS; sRegs.regs.es = rm->ES; sRegs.regs.fs = rm->FS; sRegs.regs.gs = rm->GS; nFlags = cli(); // We need access to the first page to read the IVT PTE_VALUE( *pPte ) |= PTE_PRESENT; flush_tlb_page( 0 ); sRegs.regs.eip = pIntVects[num] & 0xffff; sRegs.regs.cs = pIntVects[num] >> 16; //printk( "sys_realint(%d) -> %04x:%04lx\n", num, sRegs.regs.cs, sRegs.regs.eip ); atomic_inc( &psThread->tr_nInV86 ); kassertw( atomic_read( &psThread->tr_nInV86 ) == 1 ); while ( get_processor_id() != g_nBootCPU ) { // printk( "sys_call_v86() wrong CPU (%d), will schedule\n", get_processor_id() ); Schedule(); } pnStack = ( uint32 * )( ( v86Stack_seg << 4 ) + v86Stack_off ); pnStack[0] = 0xffffffff; sRegs.regs.esp = v86Stack_off; sRegs.regs.ss = v86Stack_seg; v86Stack_off -= V86_STACK_SIZE; put_cpu_flags( nFlags ); call_v86( &sRegs ); v86Stack_off += V86_STACK_SIZE; atomic_dec( &psThread->tr_nInV86 ); rm->EAX = sRegs.regs.eax; rm->EBX = sRegs.regs.ebx; rm->ECX = sRegs.regs.ecx; rm->EDX = sRegs.regs.edx; rm->EDI = sRegs.regs.edi; rm->ESI = sRegs.regs.esi; rm->EBP = sRegs.regs.ebp; rm->flags = sRegs.regs.eflags; rm->DS = sRegs.regs.ds; rm->ES = sRegs.regs.es; rm->FS = sRegs.regs.fs; rm->GS = sRegs.regs.gs; return ( 0 ); }
int memory_remove_pte(struct local_page *lp) { unsigned long del_user_va = lp->user_va; unsigned long del_slab_va = lp->slab_va; unsigned long del_pfn = page_to_pfn(virt_to_page(del_slab_va)); struct vm_area_struct *del_vma = lp->vma; struct mm_struct *del_mm = del_vma->vm_mm; #if(DEBUG) printk("[%s]\n", __FUNCTION__); printk("del_user_va: %p\n", del_user_va); printk("del_slab_va: %p\n", del_slab_va); printk("del_pfn: %p\n", del_pfn); printk("del_vma: %p\n", del_vma); printk("del_mm: %p\n", del_mm); #endif // TODO: find PTE (need to be changed for x86) pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *ptep; pgd = pgd_offset(del_mm, del_user_va); if (pgd_none(*pgd) || pgd_bad(*pgd)) { printk("<error> invalid pgd\n"); return -1; } pud = pud_offset(pgd, del_user_va); if (pud_none(*pud) || pud_bad(*pud)) { printk("<error> invalid pud\n"); return -1; } pmd = pmd_offset(pud, del_user_va); if (pmd_none(*pmd) || pmd_bad(*pmd)) { printk("<error> invalid pmd\n"); return -1; } ptep = pte_offset_kernel(pmd, del_user_va); if (!ptep) { printk("<error> invalid pte\n"); return -1; } #if(DEBUG) printk("ptep: %p\n", ptep); printk("pte: %p\n", *ptep); printk("pfn: %p\n", pte_pfn(*ptep)); #endif // flush cache flush_cache_page(del_vma, del_user_va, del_pfn); // clear PTE pte_clear(del_mm, del_user_va, ptep); // flush TLB flush_tlb_page(del_vma, del_user_va); return 0; }