/* * map a kernel virtual address or kernel logical address to a phys address */ static inline u32 physical_address(u32 virt, int write) { struct page *page; /* kernel static-mapped address */ DPRINTK(" get physical address: virt %x , write %d\n", virt, write); if (virt_addr_valid(virt)) { return __pa((u32) virt); } if (virt >= high_memory) return 0; if (virt >= TASK_SIZE) { page = follow_page(find_extend_vma(&init_mm, virt), (u32) virt, write); } else { page = follow_page(find_extend_vma(current->mm, virt), (u32) virt, write); } if (pfn_valid(page_to_pfn(page))) { return ((page_to_pfn(page) << PAGE_SHIFT) | ((u32) virt & (PAGE_SIZE - 1))); } else { return 0; } }
/* * This routine checks the page boundaries, and that the offset is * within the task area. It then calls get_long() to read a long. */ static int read_long(struct task_struct * tsk, unsigned long addr, unsigned long * result) { struct vm_area_struct * vma = find_extend_vma(tsk, addr); if (!vma) return -EIO; if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) { unsigned long low,high; struct vm_area_struct * vma_high = vma; if (addr + sizeof(long) >= vma->vm_end) { vma_high = vma->vm_next; if (!vma_high || vma_high->vm_start != vma->vm_end) return -EIO; } low = get_long(tsk, vma, addr & ~(sizeof(long)-1)); high = get_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1)); switch (addr & (sizeof(long)-1)) { case 1: low >>= 8; low |= high << 24; break; case 2: low >>= 16; low |= high << 16; break; case 3: low >>= 24; low |= high << 8; break; } *result = low; } else
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) { int i = 0; do { struct vm_area_struct * vma; vma = find_extend_vma(mm, start); if ( !vma || (!force && ((write && (!(vma->vm_flags & VM_WRITE))) || (!write && (!(vma->vm_flags & VM_READ))) ) )) { if (i) return i; return -EFAULT; } spin_lock(&mm->page_table_lock); do { struct page *map; while (!(map = follow_page(mm, start, write))) { spin_unlock(&mm->page_table_lock); switch (handle_mm_fault(mm, vma, start, write)) { case 1: tsk->min_flt++; break; case 2: tsk->maj_flt++; break; case 0: if (i) return i; return -EFAULT; default: if (i) return i; return -ENOMEM; } spin_lock(&mm->page_table_lock); } if (pages) { pages[i] = get_page_map(map); /* FIXME: call the correct function, * depending on the type of the found page */ if (pages[i]) page_cache_get(pages[i]); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; len--; } while(len && start < vma->vm_end); spin_unlock(&mm->page_table_lock); } while(len); return i; }
/** * __get_user_pages() - pin user pages in memory * @tsk: task_struct of target task * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. Or NULL, if caller * only intends to ensure the pages are faulted in. * @vmas: array of pointers to vmas corresponding to each page. * Or NULL if the caller does not require them. * @nonblocking: whether waiting for disk IO or mmap_sem contention * * Returns number of pages pinned. This may be fewer than the number * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. Each page returned must be released * with a put_page() call when it is finished with. vmas will only * remain valid while mmap_sem is held. * * Must be called with mmap_sem held. It may be released. See below. * * __get_user_pages walks a process's page tables and takes a reference to * each struct page that each user address corresponds to at a given * instant. That is, it takes the page that would be accessed if a user * thread accesses the given user virtual address at that instant. * * This does not guarantee that the page exists in the user mappings when * __get_user_pages returns, and there may even be a completely different * page there in some cases (eg. if mmapped pagecache has been invalidated * and subsequently re faulted). However it does guarantee that the page * won't be freed completely. And mostly callers simply care that the page * contains data that was valid *at some point in time*. Typically, an IO * or similar operation cannot guarantee anything stronger anyway because * locks can't be held over the syscall boundary. * * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If * the page is written to, set_page_dirty (or set_page_dirty_lock, as * appropriate) must be called after the page is finished with, and * before put_page is called. * * If @nonblocking != NULL, __get_user_pages will not wait for disk IO * or mmap_sem contention, and if waiting is needed to pin all pages, * *@nonblocking will be set to 0. Further, if @gup_flags does not * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in * this case. * * A caller using such a combination of @nonblocking and @gup_flags * must therefore hold the mmap_sem for reading only, and recognize * when it's been released. Otherwise, it must be held for either * reading or writing and will not be released. * * In most cases, get_user_pages or get_user_pages_fast should be used * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. */ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *nonblocking) { long i = 0; unsigned int page_mask; struct vm_area_struct *vma = NULL; if (!nr_pages) return 0; VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); /* * If FOLL_FORCE is set then do not force a full fault as the hinting * fault information is unrelated to the reference behaviour of a task * using the address space */ if (!(gup_flags & FOLL_FORCE)) gup_flags |= FOLL_NUMA; do { struct page *page; unsigned int foll_flags = gup_flags; unsigned int page_increm; /* first iteration or cross vma bound */ if (!vma || start >= vma->vm_end) { vma = find_extend_vma(mm, start); if (!vma && in_gate_area(mm, start)) { int ret; ret = get_gate_page(mm, start & PAGE_MASK, gup_flags, &vma, pages ? &pages[i] : NULL); if (ret) return i ? : ret; page_mask = 0; goto next_page; } if (!vma || check_vma_flags(vma, gup_flags)) return i ? : -EFAULT; if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &nr_pages, i, gup_flags, nonblocking); continue; } }
/* * map a kernel virtual address or kernel logical address to a phys address */ static inline u32 physical_address(u32 virt, int write) { struct page *page; struct vm_area_struct *vm; struct mm_struct * mm = (virt >= TASK_SIZE)? &init_mm : current->mm; unsigned int vm_flags; unsigned int flags; /* kernel static-mapped address */ DPRINTK(" get physical address: virt %x , write %d\n", virt, write); if (virt_addr_valid(virt)) { return __pa((u32) virt); } if (virt >= (u32)high_memory) return 0; /* * Require read or write permissions. */ vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm = find_extend_vma(mm, virt); if (!vm || (vm->vm_flags & (VM_IO | VM_PFNMAP)) || !(vm_flags & vm->vm_flags)){ return 0; } flags = FOLL_PTE_EXIST | FOLL_TOUCH; flags |= (write)? FOLL_WRITE : 0; page = follow_page(vm, (u32) virt, flags); if (pfn_valid(page_to_pfn(page))) { return ((page_to_pfn(page) << PAGE_SHIFT) | ((u32) virt & (PAGE_SIZE - 1))); } else /* page == 0, otherwise should never happen, since its being checked inside follow_page->vm_normal_page */ { return 0; } }
int __m4u_get_user_pages(int eModuleID, struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { int i; unsigned long vm_flags; int trycnt; if (nr_pages <= 0) return 0; //VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); if(!!pages != !!(gup_flags & FOLL_GET)) { M4UMSG(" error: __m4u_get_user_pages !!pages != !!(gup_flags & FOLL_GET), pages=0x%x, gup_flags & FOLL_GET=0x%x \n", (unsigned int)pages, gup_flags & FOLL_GET); } /* * Require read or write permissions. * If FOLL_FORCE is set, we only require the "MAY" flags. */ vm_flags = (gup_flags & FOLL_WRITE) ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm_flags &= (gup_flags & FOLL_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; M4UDBG("Trying to get_user_pages from start vaddr 0x%08x with %d pages\n", start, nr_pages); do { struct vm_area_struct *vma; M4UDBG("For a new vma area from 0x%08x\n", start); vma = find_extend_vma(mm, start); if (!vma) { M4UMSG("error: the vma is not found, start=0x%x, module=%d \n", (unsigned int)start, eModuleID); return i ? i : -EFAULT; } if( ((~vma->vm_flags) & (VM_IO|VM_PFNMAP|VM_SHARED|VM_WRITE)) == 0 ) { M4UMSG("error: m4u_get_pages(): bypass pmem garbage pages! vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); return i ? i : -EFAULT;; } if(vma->vm_flags & VM_IO) { M4UDBG("warning: vma is marked as VM_IO \n"); } if(vma->vm_flags & VM_PFNMAP) { M4UMSG("error: vma permission is not correct, vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); M4UMSG("hint: maybe the memory is remapped with un-permitted vma->vm_flags! \n"); //m4u_dump_maps(start); return i ? i : -EFAULT;; } if(!(vm_flags & vma->vm_flags)) { M4UMSG("error: vm_flags invalid, vm_flags=0x%x, vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)vm_flags, (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); //m4u_dump_maps(start); return i ? : -EFAULT; } do { struct page *page; unsigned int foll_flags = gup_flags; /* * If we have a pending SIGKILL, don't keep faulting * pages and potentially allocating memory. */ if (unlikely(fatal_signal_pending(current))) return i ? i : -ERESTARTSYS; MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF)); page = follow_page(vma, start, foll_flags); MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000); while (!page) { int ret; M4UDBG("Trying to allocate for %dth page(vaddr: 0x%08x)\n", i, start); MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagStart, eModuleID, start&(~0xFFF)); ret = handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagEnd, eModuleID, 0x1000); if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) { M4UMSG("handle_mm_fault() error: no memory, aaddr:0x%08lx (%d pages are allocated), module=%d\n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : -ENOMEM; } if (ret & (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) { M4UMSG("handle_mm_fault() error: invalide memory address, vaddr:0x%lx (%d pages are allocated), module=%d\n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : -EFAULT; } BUG(); } if (ret & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; /* * The VM_FAULT_WRITE bit tells us that * do_wp_page has broken COW when necessary, * even if maybe_mkwrite decided not to set * pte_write. We can thus safely do subsequent * page lookups as if they were reads. But only * do so when looping for pte_write is futile: * in some cases userspace may also be wanting * to write to the gotten user page, which a * read fault here might prevent (a readonly * page might get reCOWed by userspace write). */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) foll_flags &= ~FOLL_WRITE; MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF)); page = follow_page(vma, start, foll_flags); MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000); } if (IS_ERR(page)) { M4UMSG("handle_mm_fault() error: faulty page is returned, vaddr:0x%lx (%d pages are allocated), module=%d \n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : PTR_ERR(page); } if (pages) { pages[i] = page; MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagStart, eModuleID, start&(~0xFFF)); /* Use retry version to guarantee it will succeed in getting the lock */ trycnt = 3000; do { if (trylock_page(page)) { mlock_vma_page(page); unlock_page(page); //make sure hw pte is not 0 { int i; for(i=0; i<3000; i++) { if(!m4u_user_v2p(start)) { handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE)? FAULT_FLAG_WRITE : 0); cond_resched(); } else break; } if(i==3000) M4UMSG("error: cannot handle_mm_fault to get hw pte: va=0x%x\n", start); } break; } } while (trycnt-- > 0); if(PageMlocked(page)==0) { M4UMSG("Can't mlock page\n"); dump_page(page); } else { unsigned int pfn = page_to_pfn(page); if(pfn < mlock_cnt_size) { pMlock_cnt[page_to_pfn(page)]++; } else { M4UERR("mlock_cnt_size is too small: pfn=%d, size=%d\n", pfn, mlock_cnt_size); } //M4UMSG("lock page:\n"); //dump_page(page); } MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagEnd, eModuleID, 0x1000); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; nr_pages--; } while (nr_pages && start < vma->vm_end); } while (nr_pages);
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) { struct mm_struct *mm; struct vm_area_struct *vma; struct page *page; void *old_buf = buf; mm = get_task_mm(tsk); if (!mm) return 0; down_read(&mm->mmap_sem); /* ignore errors, just check how much was sucessfully transfered */ while (len) { int bytes, ret, offset; void *maddr; unsigned long paddr; int xip = 0; #ifdef CONFIG_CRAMFS_XIP_DEBUGGABLE if (xip_enable_debug && !write) { vma = find_extend_vma(mm, addr); if (vma && (vma->vm_flags & VM_XIP)) xip = find_xip_untouched_entry(mm, addr, &paddr); } #endif if (xip) { maddr = ioremap(paddr, PAGE_SIZE); if (!maddr) break; page = NULL; } else { ret = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); if (ret <= 0) break; maddr = kmap(page); } bytes = len; offset = addr & (PAGE_SIZE-1); if (bytes > PAGE_SIZE-offset) bytes = PAGE_SIZE-offset; if (write) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); set_page_dirty_lock(page); } else { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); } if (xip) iounmap(maddr); else { kunmap(page); page_cache_release(page); } len -= bytes; buf += bytes; addr += bytes; } up_read(&mm->mmap_sem); mmput(mm); return buf - old_buf; }
/* * Please read Documentation/cachetlb.txt before using this function, * accessing foreign memory spaces can cause cache coherency problems. * * Accessing a VM_IO area is even more dangerous, therefore the function * fails if pages is != NULL and a VM_IO area is found. */ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) { int i; unsigned int flags; /* * Require read or write permissions. * If 'force' is set, we only require the "MAY" flags. */ flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; do { struct vm_area_struct * vma; vma = find_extend_vma(mm, start); if ( !vma || (pages && vma->vm_flags & VM_IO) || !(flags & vma->vm_flags) ) return i ? : -EFAULT; spin_lock(&mm->page_table_lock); do { struct page *map; while (!(map = follow_page(mm, start, write))) { spin_unlock(&mm->page_table_lock); switch (handle_mm_fault(mm, vma, start, write)) { case 1: tsk->min_flt++; break; case 2: tsk->maj_flt++; break; case 0: if (i) return i; return -EFAULT; default: if (i) return i; return -ENOMEM; } spin_lock(&mm->page_table_lock); } if (pages) { pages[i] = get_page_map(map); /* FIXME: call the correct function, * depending on the type of the found page */ if (!pages[i]) goto bad_page; page_cache_get(pages[i]); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; len--; } while(len && start < vma->vm_end); spin_unlock(&mm->page_table_lock); } while(len); out: return i; /* * We found an invalid page in the VMA. Release all we have * so far and fail. */ bad_page: spin_unlock(&mm->page_table_lock); while (i--) page_cache_release(pages[i]); i = -EFAULT; goto out; }
/* Translate virtual address to physical address. */ unsigned long xencomm_vtop(unsigned long vaddr) { struct page *page; struct vm_area_struct *vma; if (vaddr == 0) return 0UL; if (REGION_NUMBER(vaddr) == 5) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *ptep; /* On ia64, TASK_SIZE refers to current. It is not initialized during boot. Furthermore the kernel is relocatable and __pa() doesn't work on addresses. */ if (vaddr >= KERNEL_START && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) return vaddr - kernel_virtual_offset; /* In kernel area -- virtually mapped. */ pgd = pgd_offset_k(vaddr); if (pgd_none(*pgd) || pgd_bad(*pgd)) return ~0UL; pud = pud_offset(pgd, vaddr); if (pud_none(*pud) || pud_bad(*pud)) return ~0UL; pmd = pmd_offset(pud, vaddr); if (pmd_none(*pmd) || pmd_bad(*pmd)) return ~0UL; ptep = pte_offset_kernel(pmd, vaddr); if (!ptep) return ~0UL; return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK); } if (vaddr > TASK_SIZE) { /* percpu variables */ if (REGION_NUMBER(vaddr) == 7 && REGION_OFFSET(vaddr) >= (1ULL << IA64_MAX_PHYS_BITS)) ia64_tpa(vaddr); /* kernel address */ return __pa(vaddr); } vma = find_extend_vma(current->mm, vaddr); if (!vma) return ~0UL; /* We assume the page is modified. */ page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH); if (!page) return ~0UL; return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); }