/* * map a kernel virtual address or kernel logical address to a phys address */ static inline u32 physical_address(u32 virt, int write) { struct page *page; /* kernel static-mapped address */ DPRINTK(" get physical address: virt %x , write %d\n", virt, write); if (virt_addr_valid(virt)) { return __pa((u32) virt); } if (virt >= high_memory) return 0; if (virt >= TASK_SIZE) { page = follow_page(find_extend_vma(&init_mm, virt), (u32) virt, write); } else { page = follow_page(find_extend_vma(current->mm, virt), (u32) virt, write); } if (pfn_valid(page_to_pfn(page))) { return ((page_to_pfn(page) << PAGE_SHIFT) | ((u32) virt & (PAGE_SIZE - 1))); } else { return 0; } }
static void dump_vdso_pages(struct vm_area_struct * vma) { int i; if (!vma || test_thread_flag(TIF_32BIT)) { printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); for (i=0; i<vdso32_pages; i++) { struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); struct page *upg = (vma && vma->vm_mm) ? follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) : NULL; dump_one_vdso_page(pg, upg); } } if (!vma || !test_thread_flag(TIF_32BIT)) { printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); for (i=0; i<vdso64_pages; i++) { struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); struct page *upg = (vma && vma->vm_mm) ? follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) : NULL; dump_one_vdso_page(pg, upg); } } }
ErrorStack MasstreeStoragePimpl::verify_single_thread_border( thread::Thread* context, KeySlice low_fence, HighFence high_fence, MasstreeBorderPage* page) { CHECK_ERROR(verify_page_basic(context, page, kMasstreeBorderPageType, low_fence, high_fence)); // check consecutive_inserts_. this should be consistent whether it's moved or not. bool sorted = true; for (SlotIndex i = 1; i < page->get_key_count(); ++i) { KeySlice prev = page->get_slice(i - 1); KeySlice slice = page->get_slice(i); KeyLength prev_len = page->get_remainder_length(i - 1); KeyLength len = page->get_remainder_length(i); if (prev > slice || (prev == slice && prev_len > len)) { sorted = false; break; } } CHECK_AND_ASSERT(page->is_consecutive_inserts() == sorted); if (page->is_moved()) { CHECK_ERROR(verify_single_thread_border( context, low_fence, HighFence(page->get_foster_fence(), false), context->resolve_cast<MasstreeBorderPage>(page->get_foster_minor()))); CHECK_ERROR(verify_single_thread_border( context, page->get_foster_fence(), high_fence, context->resolve_cast<MasstreeBorderPage>(page->get_foster_major()))); return kRetOk; } CHECK_AND_ASSERT(!page->is_moved()); CHECK_AND_ASSERT(page->get_key_count() <= kBorderPageMaxSlots); for (SlotIndex i = 0; i < page->get_key_count(); ++i) { CHECK_AND_ASSERT(!page->get_owner_id(i)->lock_.is_keylocked()); CHECK_AND_ASSERT(!page->get_owner_id(i)->lock_.is_rangelocked()); CHECK_AND_ASSERT(!page->get_owner_id(i)->xct_id_.is_being_written()); CHECK_AND_ASSERT(page->get_owner_id(i)->xct_id_.get_epoch().is_valid()); CHECK_AND_ASSERT(page->verify_slot_lengthes(i)); KeySlice slice = page->get_slice(i); CHECK_AND_ASSERT(slice >= low_fence); CHECK_AND_ASSERT(slice < high_fence.slice_ || page->is_high_fence_supremum()); if (page->does_point_to_layer(i)) { CHECK_AND_ASSERT(page->get_owner_id(i)->xct_id_.is_next_layer()); CHECK_AND_ASSERT(!page->get_next_layer(i)->is_both_null()); MasstreePage* next; // TASK(Hideaki) probably two versions: always follow volatile vs snapshot // so far check volatile only WRAP_ERROR_CODE(follow_page(context, true, page->get_next_layer(i), &next)); CHECK_ERROR(verify_single_thread_layer(context, page->get_layer() + 1, next)); } else { CHECK_AND_ASSERT(!page->get_owner_id(i)->xct_id_.is_next_layer()); } } return kRetOk; }
static unsigned int shrink_pages(struct mm_struct *mm, struct list_head *zone0_page_list, struct list_head *zone1_page_list, unsigned int num_to_scan) { unsigned long addr; unsigned int isolate_pages_countter = 0; struct vm_area_struct *vma = mm->mmap; while (vma != NULL) { for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { struct page *page; /*get the page address from virtual memory address */ page = follow_page(vma, addr, FOLL_GET); if (page && !IS_ERR(page)) { put_page(page); /* only moveable, anonymous and not dirty pages can be swapped */ if ((!PageUnevictable(page)) && (!PageDirty(page)) && ((PageAnon(page))) && (0 == page_is_file_cache(page))) { switch (page_zone_id(page)) { case 0: if (!isolate_lru_page_compcache(page)) { /* isolate page from LRU and add to temp list */ /*create new page list, it will be used in shrink_page_list */ list_add_tail(&page->lru, zone0_page_list); isolate_pages_countter++; } break; case 1: if (!isolate_lru_page_compcache(page)) { /* isolate page from LRU and add to temp list */ /*create new page list, it will be used in shrink_page_list */ list_add_tail(&page->lru, zone1_page_list); isolate_pages_countter++; } break; default: break; } } } if (isolate_pages_countter >= num_to_scan) { return isolate_pages_countter; } } vma = vma->vm_next; } return isolate_pages_countter; }
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) { int i = 0; do { struct vm_area_struct * vma; vma = find_extend_vma(mm, start); if ( !vma || (!force && ((write && (!(vma->vm_flags & VM_WRITE))) || (!write && (!(vma->vm_flags & VM_READ))) ) )) { if (i) return i; return -EFAULT; } spin_lock(&mm->page_table_lock); do { struct page *map; while (!(map = follow_page(mm, start, write))) { spin_unlock(&mm->page_table_lock); switch (handle_mm_fault(mm, vma, start, write)) { case 1: tsk->min_flt++; break; case 2: tsk->maj_flt++; break; case 0: if (i) return i; return -EFAULT; default: if (i) return i; return -ENOMEM; } spin_lock(&mm->page_table_lock); } if (pages) { pages[i] = get_page_map(map); /* FIXME: call the correct function, * depending on the type of the found page */ if (pages[i]) page_cache_get(pages[i]); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; len--; } while(len && start < vma->vm_end); spin_unlock(&mm->page_table_lock); } while(len); return i; }
static int vma_lock_mapping_one(struct mm_struct *mm, unsigned long addr, size_t len, unsigned long mappings[], int cnt) { unsigned long end = addr + len; struct vm_area_struct *vma; struct page *page; for (vma = find_vma(mm, addr); vma && (vma->vm_start <= addr) && (addr < end); addr += vma->vm_end - vma->vm_start, vma = vma->vm_next) { struct anon_vma *anon; page = follow_page(vma, addr, 0); if (IS_ERR_OR_NULL(page) || !page->mapping) continue; anon = page_get_anon_vma(page); if (!anon) { struct address_space *mapping; get_page(page); mapping = page_mapping(page); if (mapping_can_locked( (unsigned long)mapping, mappings, cnt)) { mutex_lock(&mapping->i_mmap_mutex); mappings[cnt++] = (unsigned long)mapping; } put_page(page); } else { if (mapping_can_locked( (unsigned long)anon | PAGE_MAPPING_ANON, mappings, cnt)) { anon_vma_lock_write(anon); mappings[cnt++] = (unsigned long)page->mapping; } put_anon_vma(anon); } if (cnt == G2D_MAX_VMA_MAPPING) break; } return cnt; }
/* * map a kernel virtual address or kernel logical address to a phys address */ static inline u32 physical_address(u32 virt, int write) { struct page *page; struct vm_area_struct *vm; struct mm_struct * mm = (virt >= TASK_SIZE)? &init_mm : current->mm; unsigned int vm_flags; unsigned int flags; /* kernel static-mapped address */ DPRINTK(" get physical address: virt %x , write %d\n", virt, write); if (virt_addr_valid(virt)) { return __pa((u32) virt); } if (virt >= (u32)high_memory) return 0; /* * Require read or write permissions. */ vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm = find_extend_vma(mm, virt); if (!vm || (vm->vm_flags & (VM_IO | VM_PFNMAP)) || !(vm_flags & vm->vm_flags)){ return 0; } flags = FOLL_PTE_EXIST | FOLL_TOUCH; flags |= (write)? FOLL_WRITE : 0; page = follow_page(vm, (u32) virt, flags); if (pfn_valid(page_to_pfn(page))) { return ((page_to_pfn(page) << PAGE_SHIFT) | ((u32) virt & (PAGE_SIZE - 1))); } else /* page == 0, otherwise should never happen, since its being checked inside follow_page->vm_normal_page */ { return 0; } }
ErrorStack MasstreeStoragePimpl::verify_single_thread_intermediate( thread::Thread* context, KeySlice low_fence, HighFence high_fence, MasstreeIntermediatePage* page) { CHECK_ERROR( verify_page_basic(context, page, kMasstreeIntermediatePageType, low_fence, high_fence)); if (page->is_moved()) { CHECK_ERROR(verify_single_thread_intermediate( context, low_fence, HighFence(page->get_foster_fence(), false), context->resolve_cast<MasstreeIntermediatePage>(page->get_foster_minor()))); CHECK_ERROR(verify_single_thread_intermediate( context, page->get_foster_fence(), high_fence, context->resolve_cast<MasstreeIntermediatePage>(page->get_foster_major()))); return kRetOk; } uint8_t key_count = page->get_key_count(); CHECK_AND_ASSERT(key_count <= kMaxIntermediateSeparators); KeySlice previous_low = low_fence; for (uint8_t i = 0; i <= key_count; ++i) { HighFence mini_high(0, false); if (i < key_count) { mini_high.slice_ = page->get_separator(i); mini_high.supremum_ = false; CHECK_AND_ASSERT(high_fence.supremum_ || mini_high.slice_ < high_fence.slice_); if (i == 0) { CHECK_AND_ASSERT(mini_high.slice_ > low_fence); } else { CHECK_AND_ASSERT(mini_high.slice_ > page->get_separator(i - 1)); } } else { mini_high = high_fence; } MasstreeIntermediatePage::MiniPage& minipage = page->get_minipage(i); uint8_t mini_count = minipage.key_count_; CHECK_AND_ASSERT(mini_count <= kMaxIntermediateMiniSeparators); KeySlice page_low = previous_low; for (uint8_t j = 0; j <= mini_count; ++j) { HighFence page_high(0, false); if (j < mini_count) { page_high.slice_ = minipage.separators_[j]; page_high.supremum_ = false; CHECK_AND_ASSERT(page_high.slice_ < mini_high.slice_ || mini_high.supremum_); if (j == 0) { CHECK_AND_ASSERT(page_high.slice_ > previous_low); } else { CHECK_AND_ASSERT(page_high.slice_ > minipage.separators_[j - 1]); } } else { page_high = mini_high; } CHECK_AND_ASSERT(!minipage.pointers_[j].is_both_null()); MasstreePage* next; // TASK(Hideaki) probably two versions: always follow volatile vs snapshot // so far check volatile only WRAP_ERROR_CODE(follow_page(context, true, &minipage.pointers_[j], &next)); CHECK_AND_ASSERT(next->get_layer() == page->get_layer()); CHECK_AND_ASSERT(next->get_btree_level() + 1U == page->get_btree_level()); if (next->is_border()) { CHECK_ERROR(verify_single_thread_border( context, page_low, page_high, reinterpret_cast<MasstreeBorderPage*>(next))); } else { CHECK_ERROR(verify_single_thread_intermediate( context, page_low, page_high, reinterpret_cast<MasstreeIntermediatePage*>(next))); } page_low = page_high.slice_; } previous_low = mini_high.slice_; } return kRetOk; }
int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) { unsigned long ptr, end; int err; struct mm_struct * mm; struct vm_area_struct * vma = 0; struct page * map; int i; int datain = (rw == READ); /* Make sure the iobuf is not already mapped somewhere. */ if (iobuf->nr_pages) return -EINVAL; mm = current->mm; dprintk ("map_user_kiobuf: begin\n"); ptr = va & PAGE_MASK; end = (va + len + PAGE_SIZE - 1) & PAGE_MASK; err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT); if (err) return err; down(&mm->mmap_sem); err = -EFAULT; iobuf->locked = 0; iobuf->offset = va & ~PAGE_MASK; iobuf->length = len; i = 0; /* * First of all, try to fault in all of the necessary pages */ while (ptr < end) { if (!vma || ptr >= vma->vm_end) { vma = find_vma(current->mm, ptr); if (!vma) goto out_unlock; if (vma->vm_start > ptr) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_unlock; if (expand_stack(vma, ptr)) goto out_unlock; } if (((datain) && (!(vma->vm_flags & VM_WRITE))) || (!(vma->vm_flags & VM_READ))) { err = -EACCES; goto out_unlock; } } if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0) goto out_unlock; spin_lock(&mm->page_table_lock); map = follow_page(ptr); if (!map) { spin_unlock(&mm->page_table_lock); dprintk (KERN_ERR "Missing page in map_user_kiobuf\n"); goto out_unlock; } map = get_page_map(map); if (map) atomic_inc(&map->count); else printk (KERN_INFO "Mapped page missing [%d]\n", i); spin_unlock(&mm->page_table_lock); iobuf->maplist[i] = map; iobuf->nr_pages = ++i; ptr += PAGE_SIZE; } up(&mm->mmap_sem); dprintk ("map_user_kiobuf: end OK\n"); return 0; out_unlock: up(&mm->mmap_sem); unmap_kiobuf(iobuf); dprintk ("map_user_kiobuf: end %d\n", err); return err; }
int __m4u_get_user_pages(int eModuleID, struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { int i; unsigned long vm_flags; int trycnt; if (nr_pages <= 0) return 0; //VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); if(!!pages != !!(gup_flags & FOLL_GET)) { M4UMSG(" error: __m4u_get_user_pages !!pages != !!(gup_flags & FOLL_GET), pages=0x%x, gup_flags & FOLL_GET=0x%x \n", (unsigned int)pages, gup_flags & FOLL_GET); } /* * Require read or write permissions. * If FOLL_FORCE is set, we only require the "MAY" flags. */ vm_flags = (gup_flags & FOLL_WRITE) ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm_flags &= (gup_flags & FOLL_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; M4UDBG("Trying to get_user_pages from start vaddr 0x%08x with %d pages\n", start, nr_pages); do { struct vm_area_struct *vma; M4UDBG("For a new vma area from 0x%08x\n", start); vma = find_extend_vma(mm, start); if (!vma) { M4UMSG("error: the vma is not found, start=0x%x, module=%d \n", (unsigned int)start, eModuleID); return i ? i : -EFAULT; } if( ((~vma->vm_flags) & (VM_IO|VM_PFNMAP|VM_SHARED|VM_WRITE)) == 0 ) { M4UMSG("error: m4u_get_pages(): bypass pmem garbage pages! vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); return i ? i : -EFAULT;; } if(vma->vm_flags & VM_IO) { M4UDBG("warning: vma is marked as VM_IO \n"); } if(vma->vm_flags & VM_PFNMAP) { M4UMSG("error: vma permission is not correct, vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); M4UMSG("hint: maybe the memory is remapped with un-permitted vma->vm_flags! \n"); //m4u_dump_maps(start); return i ? i : -EFAULT;; } if(!(vm_flags & vma->vm_flags)) { M4UMSG("error: vm_flags invalid, vm_flags=0x%x, vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)vm_flags, (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); //m4u_dump_maps(start); return i ? : -EFAULT; } do { struct page *page; unsigned int foll_flags = gup_flags; /* * If we have a pending SIGKILL, don't keep faulting * pages and potentially allocating memory. */ if (unlikely(fatal_signal_pending(current))) return i ? i : -ERESTARTSYS; MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF)); page = follow_page(vma, start, foll_flags); MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000); while (!page) { int ret; M4UDBG("Trying to allocate for %dth page(vaddr: 0x%08x)\n", i, start); MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagStart, eModuleID, start&(~0xFFF)); ret = handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagEnd, eModuleID, 0x1000); if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) { M4UMSG("handle_mm_fault() error: no memory, aaddr:0x%08lx (%d pages are allocated), module=%d\n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : -ENOMEM; } if (ret & (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) { M4UMSG("handle_mm_fault() error: invalide memory address, vaddr:0x%lx (%d pages are allocated), module=%d\n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : -EFAULT; } BUG(); } if (ret & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; /* * The VM_FAULT_WRITE bit tells us that * do_wp_page has broken COW when necessary, * even if maybe_mkwrite decided not to set * pte_write. We can thus safely do subsequent * page lookups as if they were reads. But only * do so when looping for pte_write is futile: * in some cases userspace may also be wanting * to write to the gotten user page, which a * read fault here might prevent (a readonly * page might get reCOWed by userspace write). */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) foll_flags &= ~FOLL_WRITE; MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF)); page = follow_page(vma, start, foll_flags); MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000); } if (IS_ERR(page)) { M4UMSG("handle_mm_fault() error: faulty page is returned, vaddr:0x%lx (%d pages are allocated), module=%d \n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : PTR_ERR(page); } if (pages) { pages[i] = page; MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagStart, eModuleID, start&(~0xFFF)); /* Use retry version to guarantee it will succeed in getting the lock */ trycnt = 3000; do { if (trylock_page(page)) { mlock_vma_page(page); unlock_page(page); //make sure hw pte is not 0 { int i; for(i=0; i<3000; i++) { if(!m4u_user_v2p(start)) { handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE)? FAULT_FLAG_WRITE : 0); cond_resched(); } else break; } if(i==3000) M4UMSG("error: cannot handle_mm_fault to get hw pte: va=0x%x\n", start); } break; } } while (trycnt-- > 0); if(PageMlocked(page)==0) { M4UMSG("Can't mlock page\n"); dump_page(page); } else { unsigned int pfn = page_to_pfn(page); if(pfn < mlock_cnt_size) { pMlock_cnt[page_to_pfn(page)]++; } else { M4UERR("mlock_cnt_size is too small: pfn=%d, size=%d\n", pfn, mlock_cnt_size); } //M4UMSG("lock page:\n"); //dump_page(page); } MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagEnd, eModuleID, 0x1000); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; nr_pages--; } while (nr_pages && start < vma->vm_end); } while (nr_pages);
ErrorStack MasstreeStoragePimpl::fatify_first_root_double(thread::Thread* context) { MasstreeIntermediatePage* root; WRAP_ERROR_CODE(get_first_root(context, true, &root)); ASSERT_ND(root->is_locked()); ASSERT_ND(!root->is_moved()); // assure that all children have volatile version for (MasstreeIntermediatePointerIterator it(root); it.is_valid(); it.next()) { if (it.get_pointer().volatile_pointer_.is_null()) { MasstreePage* child; WRAP_ERROR_CODE(follow_page( context, true, const_cast<DualPagePointer*>(&it.get_pointer()), &child)); } ASSERT_ND(!it.get_pointer().volatile_pointer_.is_null()); } std::vector<Child> original_children = list_children(root); ASSERT_ND(original_children.size() * 2U <= kMaxIntermediatePointers); std::vector<Child> new_children; for (const Child& child : original_children) { CHECK_ERROR(split_a_child(context, root, child, &new_children)); } ASSERT_ND(new_children.size() >= original_children.size()); memory::NumaCoreMemory* memory = context->get_thread_memory(); memory::PagePoolOffset new_offset = memory->grab_free_volatile_page(); if (new_offset == 0) { return ERROR_STACK(kErrorCodeMemoryNoFreePages); } // from now on no failure (we grabbed a free page). VolatilePagePointer new_pointer = combine_volatile_page_pointer( context->get_numa_node(), kVolatilePointerFlagSwappable, // pointer to root page might be swapped! get_first_root_pointer().volatile_pointer_.components.mod_count + 1, new_offset); MasstreeIntermediatePage* new_root = context->resolve_newpage_cast<MasstreeIntermediatePage>(new_pointer); new_root->initialize_volatile_page( get_id(), new_pointer, 0, root->get_btree_level(), // same as current root. this is not grow_root kInfimumSlice, kSupremumSlice); // no concurrent access to the new page, but just for the sake of assertion in the func. PageVersionLockScope new_scope(context, new_root->get_version_address()); new_root->split_foster_migrate_records_new_first_root(&new_children); ASSERT_ND(count_children(new_root) == new_children.size()); verify_new_root(context, new_root, new_children); // set the new first-root pointer. assorted::memory_fence_release(); get_first_root_pointer().volatile_pointer_.word = new_pointer.word; // first-root snapshot pointer is unchanged. // old root page and the direct children are now retired assorted::memory_fence_acq_rel(); root->set_moved(); // not quite moved, but assertions assume that. root->set_retired(); context->collect_retired_volatile_page( construct_volatile_page_pointer(root->header().page_id_)); for (const Child& child : original_children) { MasstreePage* original_page = context->resolve_cast<MasstreePage>(child.pointer_); if (original_page->is_moved()) { PageVersionLockScope scope(context, original_page->get_version_address()); original_page->set_retired(); context->collect_retired_volatile_page(child.pointer_); } else { // This means, the page had too small records to split. We must keep it. } } assorted::memory_fence_acq_rel(); LOG(INFO) << "Split done. " << original_children.size() << " -> " << new_children.size(); return kRetOk; }
/* * Please read Documentation/cachetlb.txt before using this function, * accessing foreign memory spaces can cause cache coherency problems. * * Accessing a VM_IO area is even more dangerous, therefore the function * fails if pages is != NULL and a VM_IO area is found. */ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) { int i; unsigned int flags; /* * Require read or write permissions. * If 'force' is set, we only require the "MAY" flags. */ flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; do { struct vm_area_struct * vma; vma = find_extend_vma(mm, start); if ( !vma || (pages && vma->vm_flags & VM_IO) || !(flags & vma->vm_flags) ) return i ? : -EFAULT; spin_lock(&mm->page_table_lock); do { struct page *map; while (!(map = follow_page(mm, start, write))) { spin_unlock(&mm->page_table_lock); switch (handle_mm_fault(mm, vma, start, write)) { case 1: tsk->min_flt++; break; case 2: tsk->maj_flt++; break; case 0: if (i) return i; return -EFAULT; default: if (i) return i; return -ENOMEM; } spin_lock(&mm->page_table_lock); } if (pages) { pages[i] = get_page_map(map); /* FIXME: call the correct function, * depending on the type of the found page */ if (!pages[i]) goto bad_page; page_cache_get(pages[i]); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; len--; } while(len && start < vma->vm_end); spin_unlock(&mm->page_table_lock); } while(len); out: return i; /* * We found an invalid page in the VMA. Release all we have * so far and fail. */ bad_page: spin_unlock(&mm->page_table_lock); while (i--) page_cache_release(pages[i]); i = -EFAULT; goto out; }
/* Translate virtual address to physical address. */ unsigned long xencomm_vtop(unsigned long vaddr) { struct page *page; struct vm_area_struct *vma; if (vaddr == 0) return 0UL; if (REGION_NUMBER(vaddr) == 5) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *ptep; /* On ia64, TASK_SIZE refers to current. It is not initialized during boot. Furthermore the kernel is relocatable and __pa() doesn't work on addresses. */ if (vaddr >= KERNEL_START && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) return vaddr - kernel_virtual_offset; /* In kernel area -- virtually mapped. */ pgd = pgd_offset_k(vaddr); if (pgd_none(*pgd) || pgd_bad(*pgd)) return ~0UL; pud = pud_offset(pgd, vaddr); if (pud_none(*pud) || pud_bad(*pud)) return ~0UL; pmd = pmd_offset(pud, vaddr); if (pmd_none(*pmd) || pmd_bad(*pmd)) return ~0UL; ptep = pte_offset_kernel(pmd, vaddr); if (!ptep) return ~0UL; return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK); } if (vaddr > TASK_SIZE) { /* percpu variables */ if (REGION_NUMBER(vaddr) == 7 && REGION_OFFSET(vaddr) >= (1ULL << IA64_MAX_PHYS_BITS)) ia64_tpa(vaddr); /* kernel address */ return __pa(vaddr); } vma = find_extend_vma(current->mm, vaddr); if (!vma) return ~0UL; /* We assume the page is modified. */ page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH); if (!page) return ~0UL; return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); }