/* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. */ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { pgd_t * src_pgd, * dst_pgd; unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; int error = 0, cow; cow = (vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE; src_pgd = pgd_offset(src, address); dst_pgd = pgd_offset(dst, address); flush_cache_range(src, vma->vm_start, vma->vm_end); flush_cache_range(dst, vma->vm_start, vma->vm_end); while (address < end) { error = copy_pmd_range(dst_pgd++, src_pgd++, address, end - address, cow); if (error) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; #ifdef CONFIG_BESTA if (!address) break; /* unsigned overflow */ #endif } /* Note that the src ptes get c-o-w treatment, so they change too. */ flush_tlb_range(src, vma->vm_start, vma->vm_end); flush_tlb_range(dst, vma->vm_start, vma->vm_end); return error; }
static int move_page_tables(struct mm_struct * mm, unsigned long new_addr, unsigned long old_addr, unsigned long len) { unsigned long offset = len; flush_cache_range(mm, old_addr, old_addr + len); flush_tlb_range(mm, old_addr, old_addr + len); /* * This is not the clever way to do this, but we're taking the * easy way out on the assumption that most remappings will be * only a few pages.. This also makes error recovery easier. */ while (offset) { offset -= PAGE_SIZE; if (move_one_page(mm, old_addr + offset, new_addr + offset)) goto oops_we_failed; } return 0; /* * Ok, the move failed because we didn't have enough pages for * the new page table tree. This is unlikely, but we have to * take the possibility into account. In that case we just move * all the pages back (this will work, because we still have * the old page tables) */ oops_we_failed: flush_cache_range(mm, new_addr, new_addr + len); while ((offset += PAGE_SIZE) < len) move_one_page(mm, new_addr + offset, old_addr + offset); zap_page_range(mm, new_addr, new_addr + len); flush_tlb_range(mm, new_addr, new_addr + len); return -1; }
static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff, unsigned long partial) { do { struct mm_struct *mm = mpnt->vm_mm; unsigned long start = mpnt->vm_start; unsigned long end = mpnt->vm_end; unsigned long len = end - start; unsigned long diff; /* mapping wholly truncated? */ if (mpnt->vm_pgoff >= pgoff) { flush_cache_range(mm, start, end); zap_page_range(mm, start, len); flush_tlb_range(mm, start, end); continue; } /* mapping wholly unaffected? */ len = len >> PAGE_SHIFT; diff = pgoff - mpnt->vm_pgoff; if (diff >= len) continue; /* Ok, partially affected.. */ start += diff << PAGE_SHIFT; len = (len - diff) << PAGE_SHIFT; flush_cache_range(mm, start, end); zap_page_range(mm, start, len); flush_tlb_range(mm, start, end); } while ((mpnt = mpnt->vm_next_share) != NULL); }
/* Note: this is only safe if the mm semaphore is held when called. */ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; phys_addr -= from; dir = pgd_offset(current->mm, from); flush_cache_range(current->mm, beg, end); if (from >= end) BUG(); do { pmd_t *pmd = pmd_alloc(dir, from); error = -ENOMEM; if (!pmd) break; error = remap_pmd_range(pmd, from, end - from, phys_addr + from, prot); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (from && (from < end)); flush_tlb_range(current->mm, beg, end); return error; }
static int filemap_sync(struct vm_area_struct * vma, unsigned long address, size_t size, unsigned int flags) { pgd_t * dir; unsigned long end = address + size; int error = 0; /* Aquire the lock early; it may be possible to avoid dropping * and reaquiring it repeatedly. */ spin_lock(&vma->vm_mm->page_table_lock); dir = pgd_offset(vma->vm_mm, address); flush_cache_range(vma, address, end); if (address >= end) BUG(); do { error |= filemap_sync_pmd_range(dir, address, end, vma, flags); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); flush_tlb_range(vma, end - size, end); spin_unlock(&vma->vm_mm->page_table_lock); return error; }
/* sys_cacheflush -- flush the processor cache. */ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op) { struct vm_area_struct *vma; if (len == 0) return 0; /* We only support op 0 now, return error if op is non-zero.*/ if (op) return -EINVAL; /* Check for overflow */ if (addr + len < addr) return -EFAULT; /* * Verify that the specified address region actually belongs * to this process. */ vma = find_vma(current->mm, addr); if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end) return -EFAULT; flush_cache_range(vma, addr, addr + len); return 0; }
int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = address; unsigned long end = address + size; dir = pgd_offset(current->mm, address); flush_cache_range(current->mm, beg, end); if (address >= end) BUG(); do { pmd_t *pmd = pmd_alloc(dir, address); error = -ENOMEM; if (!pmd) break; error = zeromap_pmd_range(pmd, address, end - address, prot); if (error) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); flush_tlb_range(current->mm, beg, end); return error; }
int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; struct mm_struct *mm = vma->vm_mm; int space = GET_IOSPACE(pfn); unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; /* See comment in mm/memory.c remap_pfn_range */ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; vma->vm_pgoff = (offset >> PAGE_SHIFT) | ((unsigned long)space << 28UL); offset -= from; dir = pgd_offset(mm, from); flush_cache_range(vma, beg, end); while (from < end) { pmd_t *pmd = pmd_alloc(mm, dir, from); error = -ENOMEM; if (!pmd) break; error = io_remap_pmd_range(mm, pmd, from, end - from, offset + from, prot, space); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range(vma, beg, end); return error; }
/* * We could optimize the case where the cache argument is not BCACHE but * that seems very atypical use ... */ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long bytes, unsigned int cache) { struct vm_area_struct* vma; if (bytes == 0) return 0; if (!access_ok(VERIFY_WRITE, (void __user *) addr, bytes)) return -EFAULT; if (cache == DCACHE) { vma = find_vma(current->mm, (unsigned long) addr); if (vma) { #ifdef CONFIG_MIPS_BRCM97XXX brcm_r4k_flush_cache_range(vma,(unsigned long)addr,((unsigned long)addr) + bytes); #else flush_cache_range(vma,(unsigned long)addr,((unsigned long)addr) + bytes); #endif } else { flush_cache_all(); } } else if (cache == ICACHE) // THT PR17203 Added. We don't know if we can use IC_F_L on BMIPS3300 { flush_icache_range(addr, addr + bytes); } else { flush_cache_all(); // THT PR17203, for now.... } return 0; }
int remap_page_range(unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; offset -= from; dir = pgd_offset(current->mm, from); flush_cache_range(current->mm, beg, end); while (from < end) { pmd_t *pmd = pmd_alloc(dir, from); error = -ENOMEM; if (!pmd) break; error = remap_pmd_range(pmd, from, end - from, offset + from, prot); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; #ifdef CONFIG_BESTA if (!from) break; /* unsigned overflow */ #endif dir++; } flush_tlb_range(current->mm, beg, end); return error; }
int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = address; unsigned long end = address + size; pte_t zero_pte; zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot)); dir = pgd_offset(current->mm, address); flush_cache_range(current->mm, beg, end); while (address < end) { pmd_t *pmd = pmd_alloc(dir, address); error = -ENOMEM; if (!pmd) break; error = zeromap_pmd_range(pmd, address, end - address, zero_pte); if (error) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; #ifdef CONFIG_BESTA if (!address) break; /* unsigned overflow */ #endif dir++; } flush_tlb_range(current->mm, beg, end); return error; }
int vmap_page_range (unsigned long from, unsigned long size, unsigned long vaddr) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; vaddr -= from; dir = pgd_offset(current->mm, from); flush_cache_range(current->mm, beg, end); while (from < end) { pmd_t *pmd = pmd_alloc(dir, from); error = -ENOMEM; if (!pmd) break; error = vmap_pmd_range(pmd, from, end - from, vaddr + from); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range(current->mm, beg, end); return error; }
int io_remap_page_range(unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot, int space) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; prot = __pgprot(pg_iobits); offset -= from; dir = pgd_offset(current->mm, from); flush_cache_range(current->mm, beg, end); while (from < end) { pmd_t *pmd = pmd_alloc(dir, from); error = -ENOMEM; if (!pmd) break; error = io_remap_pmd_range(pmd, from, end - from, offset + from, prot, space); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range(current->mm, beg, end); return error; }
static unsigned long msync_page_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pgd_t *pgd; unsigned long next; unsigned long ret = 0; /* For hugepages we can't go walking the page table normally, * but that's ok, hugetlbfs is memory based, so we don't need * to do anything more on an msync(). */ if (vma->vm_flags & VM_HUGETLB) return 0; BUG_ON(addr >= end); pgd = pgd_offset(vma->vm_mm, addr); flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; ret += msync_pud_range(vma, pgd, addr, next); } while (pgd++, addr = next, addr != end); return ret; }
int io_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot, int space) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; struct mm_struct *mm = vma->vm_mm; prot = __pgprot(pg_iobits); offset -= from; dir = pgd_offset(mm, from); flush_cache_range(vma, beg, end); spin_lock(&mm->page_table_lock); while (from < end) { pmd_t *pmd = pmd_alloc(current->mm, dir, from); error = -ENOMEM; if (!pmd) break; error = io_remap_pmd_range(pmd, from, end - from, offset + from, prot, space); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, beg, end); return error; }
/* sys_cacheflush -- flush the processor cache. */ asmlinkage int sys_cacheflush(unsigned long addr, int scope, int cache, unsigned long len) { struct vm_area_struct *vma; if (len == 0) return 0; /* Check for overflow */ if (addr + len < addr) return -EFAULT; /* * Verify that the specified address region actually belongs * to this process. */ vma = find_vma(current->mm, addr); if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end) return -EFAULT; /* Ignore the scope and cache arguments. */ flush_cache_range(vma, addr, addr + len); return 0; }
/* * We could optimize the case where the cache argument is not BCACHE but * that seems very atypical use ... */ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long bytes, unsigned int cache) { struct vm_area_struct* vma; if (bytes == 0) return 0; if (!access_ok(VERIFY_WRITE, (void __user *) addr, bytes)) return -EFAULT; if (cache & DCACHE) { vma = find_vma(current->mm, (unsigned long) addr); if (vma) { flush_cache_range(vma,(unsigned long)addr,((unsigned long)addr) + bytes); } else { __flush_cache_all(); } } if (cache & ICACHE) { flush_icache_range(addr, addr + bytes); } return 0; }
/* * Some architectures need cache flushes when we set/clear a * breakpoint: */ static void kgdb_flush_swbreak_addr(unsigned long addr) { if (!CACHE_FLUSH_IS_SAFE) return; if (current->mm && current->mm->mmap_cache) { flush_cache_range(current->mm->mmap_cache, addr, addr + BREAK_INSTR_SIZE); } /* Force flush instruction cache if it was outside the mm */ flush_icache_range(addr, addr + BREAK_INSTR_SIZE); }
/* * For fun, we are using the MMU for this. */ static inline size_t read_zero_pagealigned(char * buf, size_t size) { struct mm_struct *mm; struct vm_area_struct * vma; unsigned long addr=(unsigned long)buf; mm = current->mm; /* Oops, this was forgotten before. -ben */ down(&mm->mmap_sem); /* For private mappings, just map in zero pages. */ for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { unsigned long count; if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0) goto out_up; if (vma->vm_flags & VM_SHARED) break; count = vma->vm_end - addr; if (count > size) count = size; flush_cache_range(mm, addr, addr + count); zap_page_range(mm, addr, count); zeromap_page_range(addr, count, PAGE_COPY); flush_tlb_range(mm, addr, addr + count); size -= count; buf += count; addr += count; if (size == 0) goto out_up; } up(&mm->mmap_sem); /* The shared case is hard. Let's do the conventional zeroing. */ do { unsigned long unwritten = clear_user(buf, PAGE_SIZE); if (unwritten) return size + unwritten - PAGE_SIZE; if (current->need_resched) schedule(); buf += PAGE_SIZE; size -= PAGE_SIZE; } while (size); return size; out_up: up(&mm->mmap_sem); return size; }
/* * This routine is called from the page fault handler to remove a * range of active mappings at this point */ void remove_mapping (struct task_struct *task, unsigned long start, unsigned long end) { unsigned long beg = start; pgd_t *dir; down (&task->mm->mmap_sem); dir = pgd_offset (task->mm, start); flush_cache_range (task->mm, beg, end); while (start < end){ remove_mapping_pmd_range (dir, start, end - start); start = (start + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range (task->mm, beg, end); up (&task->mm->mmap_sem); }
/* * remove user pages in a given range. */ int zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) { pgd_t * dir; unsigned long end = address + size; dir = pgd_offset(mm, address); flush_cache_range(mm, end - size, end); while (address < end) { zap_pmd_range(dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; #ifdef CONFIG_BESTA if (!address) break; /* unsigned overflow */ #endif dir++; } flush_tlb_range(mm, end - size, end); return 0; }
static int __filemap_sync(struct vm_area_struct *vma, unsigned long address, size_t size, unsigned int flags) { pgd_t *pgd; unsigned long end = address + size; unsigned long next; int i; int error = 0; /* Aquire the lock early; it may be possible to avoid dropping * and reaquiring it repeatedly. */ spin_lock(&vma->vm_mm->page_table_lock); pgd = pgd_offset(vma->vm_mm, address); flush_cache_range(vma, address, end); /* For hugepages we can't go walking the page table normally, * but that's ok, hugetlbfs is memory based, so we don't need * to do anything more on an msync() */ if (is_vm_hugetlb_page(vma)) goto out; if (address >= end) BUG(); for (i = pgd_index(address); i <= pgd_index(end-1); i++) { next = (address + PGDIR_SIZE) & PGDIR_MASK; if (next <= address || next > end) next = end; error |= filemap_sync_pud_range(pgd, address, next, vma, flags); address = next; pgd++; } /* * Why flush ? filemap_sync_pte already flushed the tlbs with the * dirty bits. */ flush_tlb_range(vma, end - size, end); out: spin_unlock(&vma->vm_mm->page_table_lock); return error; }
/* * Some architectures need cache flushes when we set/clear a * breakpoint: */ static void kgdb_flush_swbreak_addr(unsigned long addr) { if (!CACHE_FLUSH_IS_SAFE) return; if (current->mm) { int i; for (i = 0; i < VMACACHE_SIZE; i++) { if (!current->vmacache[i]) continue; flush_cache_range(current->vmacache[i], addr, addr + BREAK_INSTR_SIZE); } } /* Force flush instruction cache if it was outside the mm */ flush_icache_range(addr, addr + BREAK_INSTR_SIZE); }
/* * remove user pages in a given range. */ void do_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) { mmu_gather_t *tlb; pgd_t * dir; unsigned long start = address, end = address + size; int freed = 0; dir = pgd_offset(mm, address); /* * This is a long-lived spinlock. That's fine. * There's no contention, because the page table * lock only protects against kswapd anyway, and * even if kswapd happened to be looking at this * process we _want_ it to get stuck. */ if (address >= end) BUG(); spin_lock(&mm->page_table_lock); flush_cache_range(mm, address, end); tlb = tlb_gather_mmu(mm); do { freed += zap_pmd_range(tlb, dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); /* this will flush any remaining tlb entries */ tlb_finish_mmu(tlb, start, end); /* * Update rss for the mm_struct (not necessarily current->mm) * Notice that rss is an unsigned long. */ if (mm->rss > freed) mm->rss -= freed; else mm->rss = 0; spin_unlock(&mm->page_table_lock); }
static int filemap_sync(struct vm_area_struct * vma, unsigned long address, size_t size, unsigned int flags) { pgd_t * dir; unsigned long end = address + size; int error = 0; /* Aquire the lock early; it may be possible to avoid dropping * and reaquiring it repeatedly. */ spin_lock(&vma->vm_mm->page_table_lock); dir = pgd_offset(vma->vm_mm, address); flush_cache_range(vma, address, end); /* For hugepages we can't go walking the page table normally, * but that's ok, hugetlbfs is memory based, so we don't need * to do anything more on an msync() */ if (is_vm_hugetlb_page(vma)) goto out; if (address >= end) BUG(); do { error |= filemap_sync_pmd_range(dir, address, end, vma, flags); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); /* * Why flush ? filemap_sync_pte already flushed the tlbs with the * dirty bits. */ flush_tlb_range(vma, end - size, end); out: spin_unlock(&vma->vm_mm->page_table_lock); return error; }
/* * This isn't really reliable by any means.. */ int mem_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) { struct task_struct *tsk; pgd_t *src_dir, *dest_dir; pmd_t *src_middle, *dest_middle; pte_t *src_table, *dest_table; unsigned long stmp, dtmp, mapnr; struct vm_area_struct *src_vma = NULL; /* Get the source's task information */ tsk = get_task(inode->i_ino >> 16); if (!tsk) return -ESRCH; /* Ensure that we have a valid source area. (Has to be mmap'ed and have valid page information.) We can't map shared memory at the moment because working out the vm_area_struct & nattach stuff isn't worth it. */ src_vma = tsk->mm->mmap; stmp = vma->vm_offset; while (stmp < vma->vm_offset + (vma->vm_end - vma->vm_start)) { while (src_vma && stmp > src_vma->vm_end) src_vma = src_vma->vm_next; if (!src_vma || (src_vma->vm_flags & VM_SHM)) return -EINVAL; src_dir = pgd_offset(tsk->mm, stmp); if (pgd_none(*src_dir)) return -EINVAL; if (pgd_bad(*src_dir)) { printk("Bad source page dir entry %08lx\n", pgd_val(*src_dir)); return -EINVAL; } src_middle = pmd_offset(src_dir, stmp); if (pmd_none(*src_middle)) return -EINVAL; if (pmd_bad(*src_middle)) { printk("Bad source page middle entry %08lx\n", pmd_val(*src_middle)); return -EINVAL; } src_table = pte_offset(src_middle, stmp); if (pte_none(*src_table)) return -EINVAL; if (stmp < src_vma->vm_start) { if (!(src_vma->vm_flags & VM_GROWSDOWN)) return -EINVAL; if (src_vma->vm_end - stmp > current->rlim[RLIMIT_STACK].rlim_cur) return -EINVAL; } stmp += PAGE_SIZE; } src_vma = tsk->mm->mmap; stmp = vma->vm_offset; dtmp = vma->vm_start; flush_cache_range(vma->vm_mm, vma->vm_start, vma->vm_end); flush_cache_range(src_vma->vm_mm, src_vma->vm_start, src_vma->vm_end); while (dtmp < vma->vm_end) { while (src_vma && stmp > src_vma->vm_end) src_vma = src_vma->vm_next; src_dir = pgd_offset(tsk->mm, stmp); src_middle = pmd_offset(src_dir, stmp); src_table = pte_offset(src_middle, stmp); dest_dir = pgd_offset(current->mm, dtmp); dest_middle = pmd_alloc(dest_dir, dtmp); if (!dest_middle) return -ENOMEM; dest_table = pte_alloc(dest_middle, dtmp); if (!dest_table) return -ENOMEM; if (!pte_present(*src_table)) do_no_page(tsk, src_vma, stmp, 1); if ((vma->vm_flags & VM_WRITE) && !pte_write(*src_table)) do_wp_page(tsk, src_vma, stmp, 1); set_pte(src_table, pte_mkdirty(*src_table)); set_pte(dest_table, *src_table); mapnr = MAP_NR(pte_page(*src_table)); if (mapnr < MAP_NR(high_memory)) mem_map[mapnr].count++; stmp += PAGE_SIZE; dtmp += PAGE_SIZE; } flush_tlb_range(vma->vm_mm, vma->vm_start, vma->vm_end); flush_tlb_range(src_vma->vm_mm, src_vma->vm_start, src_vma->vm_end); return 0; }