Beispiel #1
0
/*
 * map a kernel virtual address or kernel logical address to a phys address
 */
static inline u32 physical_address(u32 virt, int write)
{
    struct page *page;
       /* kernel static-mapped address */
    DPRINTK(" get physical address: virt %x , write %d\n", virt, write);
    if (virt_addr_valid(virt)) 
    {
        return __pa((u32) virt);
    }
    if (virt >= high_memory)
	    return 0;
    
    if (virt >= TASK_SIZE)
    {
        page = follow_page(find_extend_vma(&init_mm, virt), (u32) virt, write);
    }
    else
    {
        page = follow_page(find_extend_vma(current->mm, virt), (u32) virt, write);
    }
    
    if (pfn_valid(page_to_pfn(page)))
    {
        return ((page_to_pfn(page) << PAGE_SHIFT) |
                       ((u32) virt & (PAGE_SIZE - 1)));
    }
    else
    {
        return 0;
    }
}
Beispiel #2
0
static void dump_vdso_pages(struct vm_area_struct * vma)
{
	int i;

	if (!vma || test_thread_flag(TIF_32BIT)) {
		printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
		for (i=0; i<vdso32_pages; i++) {
			struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
			struct page *upg = (vma && vma->vm_mm) ?
				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
				: NULL;
			dump_one_vdso_page(pg, upg);
		}
	}
	if (!vma || !test_thread_flag(TIF_32BIT)) {
		printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
		for (i=0; i<vdso64_pages; i++) {
			struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
			struct page *upg = (vma && vma->vm_mm) ?
				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
				: NULL;
			dump_one_vdso_page(pg, upg);
		}
	}
}
ErrorStack MasstreeStoragePimpl::verify_single_thread_border(
  thread::Thread* context,
  KeySlice low_fence,
  HighFence high_fence,
  MasstreeBorderPage* page) {
  CHECK_ERROR(verify_page_basic(context, page, kMasstreeBorderPageType, low_fence, high_fence));
  // check consecutive_inserts_. this should be consistent whether it's moved or not.
  bool sorted = true;
  for (SlotIndex i = 1; i < page->get_key_count(); ++i) {
    KeySlice prev = page->get_slice(i - 1);
    KeySlice slice = page->get_slice(i);
    KeyLength prev_len = page->get_remainder_length(i - 1);
    KeyLength len = page->get_remainder_length(i);
    if (prev > slice || (prev == slice && prev_len > len)) {
      sorted = false;
      break;
    }
  }
  CHECK_AND_ASSERT(page->is_consecutive_inserts() == sorted);

  if (page->is_moved()) {
    CHECK_ERROR(verify_single_thread_border(
      context,
      low_fence,
      HighFence(page->get_foster_fence(), false),
      context->resolve_cast<MasstreeBorderPage>(page->get_foster_minor())));
    CHECK_ERROR(verify_single_thread_border(
      context,
      page->get_foster_fence(),
      high_fence,
      context->resolve_cast<MasstreeBorderPage>(page->get_foster_major())));
    return kRetOk;
  }

  CHECK_AND_ASSERT(!page->is_moved());
  CHECK_AND_ASSERT(page->get_key_count() <= kBorderPageMaxSlots);
  for (SlotIndex i = 0; i < page->get_key_count(); ++i) {
    CHECK_AND_ASSERT(!page->get_owner_id(i)->lock_.is_keylocked());
    CHECK_AND_ASSERT(!page->get_owner_id(i)->lock_.is_rangelocked());
    CHECK_AND_ASSERT(!page->get_owner_id(i)->xct_id_.is_being_written());
    CHECK_AND_ASSERT(page->get_owner_id(i)->xct_id_.get_epoch().is_valid());
    CHECK_AND_ASSERT(page->verify_slot_lengthes(i));
    KeySlice slice = page->get_slice(i);
    CHECK_AND_ASSERT(slice >= low_fence);
    CHECK_AND_ASSERT(slice < high_fence.slice_ || page->is_high_fence_supremum());
    if (page->does_point_to_layer(i)) {
      CHECK_AND_ASSERT(page->get_owner_id(i)->xct_id_.is_next_layer());
      CHECK_AND_ASSERT(!page->get_next_layer(i)->is_both_null());
      MasstreePage* next;
      // TASK(Hideaki) probably two versions: always follow volatile vs snapshot
      // so far check volatile only
      WRAP_ERROR_CODE(follow_page(context, true, page->get_next_layer(i), &next));
      CHECK_ERROR(verify_single_thread_layer(context, page->get_layer() + 1, next));
    } else {
      CHECK_AND_ASSERT(!page->get_owner_id(i)->xct_id_.is_next_layer());
    }
  }

  return kRetOk;
}
Beispiel #4
0
static unsigned int shrink_pages(struct mm_struct *mm,
				 struct list_head *zone0_page_list,
				 struct list_head *zone1_page_list,
				 unsigned int num_to_scan)
{
	unsigned long addr;
	unsigned int isolate_pages_countter = 0;

	struct vm_area_struct *vma = mm->mmap;
	while (vma != NULL) {

		for (addr = vma->vm_start; addr < vma->vm_end;
		     addr += PAGE_SIZE) {
			struct page *page;
			/*get the page address from virtual memory address */
			page = follow_page(vma, addr, FOLL_GET);

			if (page && !IS_ERR(page)) {

				put_page(page);
				/* only moveable, anonymous and not dirty pages can be swapped  */
				if ((!PageUnevictable(page))
				    && (!PageDirty(page)) && ((PageAnon(page)))
				    && (0 == page_is_file_cache(page))) {
					switch (page_zone_id(page)) {
					case 0:
						if (!isolate_lru_page_compcache(page)) {
							/* isolate page from LRU and add to temp list  */
							/*create new page list, it will be used in shrink_page_list */
							list_add_tail(&page->lru, zone0_page_list);
							isolate_pages_countter++;
						}
						break;
					case 1:
						if (!isolate_lru_page_compcache(page)) {
							/* isolate page from LRU and add to temp list  */
							/*create new page list, it will be used in shrink_page_list */
							list_add_tail(&page->lru, zone1_page_list);
							isolate_pages_countter++;
						}
						break;
					default:
						break;
					}
				}
			}

			if (isolate_pages_countter >= num_to_scan) {
				return isolate_pages_countter;
			}
		}

		vma = vma->vm_next;
	}

	return isolate_pages_countter;
}
Beispiel #5
0
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas)
{
	int i = 0;

	do {
		struct vm_area_struct *	vma;

		vma = find_extend_vma(mm, start);

		if ( !vma ||
		    (!force &&
		     	((write && (!(vma->vm_flags & VM_WRITE))) ||
		    	 (!write && (!(vma->vm_flags & VM_READ))) ) )) {
			if (i) return i;
			return -EFAULT;
		}

		spin_lock(&mm->page_table_lock);
		do {
			struct page *map;
			while (!(map = follow_page(mm, start, write))) {
				spin_unlock(&mm->page_table_lock);
				switch (handle_mm_fault(mm, vma, start, write)) {
				case 1:
					tsk->min_flt++;
					break;
				case 2:
					tsk->maj_flt++;
					break;
				case 0:
					if (i) return i;
					return -EFAULT;
				default:
					if (i) return i;
					return -ENOMEM;
				}
				spin_lock(&mm->page_table_lock);
			}
			if (pages) {
				pages[i] = get_page_map(map);
				/* FIXME: call the correct function,
				 * depending on the type of the found page
				 */
				if (pages[i])
					page_cache_get(pages[i]);
			}
			if (vmas)
				vmas[i] = vma;
			i++;
			start += PAGE_SIZE;
			len--;
		} while(len && start < vma->vm_end);
		spin_unlock(&mm->page_table_lock);
	} while(len);
	return i;
}
static int vma_lock_mapping_one(struct mm_struct *mm, unsigned long addr,
				size_t len, unsigned long mappings[], int cnt)
{
	unsigned long end = addr + len;
	struct vm_area_struct *vma;
	struct page *page;

	for (vma = find_vma(mm, addr);
		vma && (vma->vm_start <= addr) && (addr < end);
		addr += vma->vm_end - vma->vm_start, vma = vma->vm_next) {
		struct anon_vma *anon;

		page = follow_page(vma, addr, 0);
		if (IS_ERR_OR_NULL(page) || !page->mapping)
			continue;

		anon = page_get_anon_vma(page);
		if (!anon) {
			struct address_space *mapping;
			get_page(page);
			mapping = page_mapping(page);
			if (mapping_can_locked(
				(unsigned long)mapping, mappings, cnt)) {
				mutex_lock(&mapping->i_mmap_mutex);
				mappings[cnt++] = (unsigned long)mapping;
			}
			put_page(page);
		} else {
			if (mapping_can_locked(
					(unsigned long)anon | PAGE_MAPPING_ANON,
					mappings, cnt)) {
				anon_vma_lock_write(anon);
				mappings[cnt++] = (unsigned long)page->mapping;
			}
			put_anon_vma(anon);
		}

		if (cnt == G2D_MAX_VMA_MAPPING)
			break;
	}

	return cnt;
}
Beispiel #7
0
/*
 * map a kernel virtual address or kernel logical address to a phys address
 */
static inline u32 physical_address(u32 virt, int write)
{
    struct page *page;
    struct vm_area_struct *vm;
    struct mm_struct * mm = (virt >= TASK_SIZE)? &init_mm : current->mm;
    unsigned int vm_flags;
    unsigned int flags;

    /* kernel static-mapped address */
    DPRINTK(" get physical address: virt %x , write %d\n", virt, write);
    if (virt_addr_valid(virt)) 
    {
        return __pa((u32) virt);
    }
    if (virt >= (u32)high_memory)
	    return 0;
    
    /* 
    * Require read or write permissions.
    */
    vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);

    vm = find_extend_vma(mm, virt);
    if (!vm || (vm->vm_flags & (VM_IO | VM_PFNMAP))
		|| !(vm_flags & vm->vm_flags)){
	return 0;
    }
    flags = FOLL_PTE_EXIST | FOLL_TOUCH;
    flags |= (write)? FOLL_WRITE : 0;
		 
    page = follow_page(vm, (u32) virt, flags);
    
    if (pfn_valid(page_to_pfn(page)))
    {
        return ((page_to_pfn(page) << PAGE_SHIFT) |
                       ((u32) virt & (PAGE_SIZE - 1)));
    }
    else /* page == 0, otherwise should never happen, since its being checked inside follow_page->vm_normal_page */
    {
        return 0;
    }
}
ErrorStack MasstreeStoragePimpl::verify_single_thread_intermediate(
  thread::Thread* context,
  KeySlice low_fence,
  HighFence high_fence,
  MasstreeIntermediatePage* page) {
  CHECK_ERROR(
    verify_page_basic(context, page, kMasstreeIntermediatePageType, low_fence, high_fence));

  if (page->is_moved()) {
    CHECK_ERROR(verify_single_thread_intermediate(
      context,
      low_fence,
      HighFence(page->get_foster_fence(), false),
      context->resolve_cast<MasstreeIntermediatePage>(page->get_foster_minor())));
    CHECK_ERROR(verify_single_thread_intermediate(
      context,
      page->get_foster_fence(),
      high_fence,
      context->resolve_cast<MasstreeIntermediatePage>(page->get_foster_major())));
    return kRetOk;
  }

  uint8_t key_count = page->get_key_count();
  CHECK_AND_ASSERT(key_count <= kMaxIntermediateSeparators);
  KeySlice previous_low = low_fence;
  for (uint8_t i = 0; i <= key_count; ++i) {
    HighFence mini_high(0, false);
    if (i < key_count) {
      mini_high.slice_ = page->get_separator(i);
      mini_high.supremum_ = false;
      CHECK_AND_ASSERT(high_fence.supremum_ || mini_high.slice_ < high_fence.slice_);
      if (i == 0) {
        CHECK_AND_ASSERT(mini_high.slice_ > low_fence);
      } else {
        CHECK_AND_ASSERT(mini_high.slice_ > page->get_separator(i - 1));
      }
    } else {
      mini_high = high_fence;
    }

    MasstreeIntermediatePage::MiniPage& minipage = page->get_minipage(i);
    uint8_t mini_count = minipage.key_count_;
    CHECK_AND_ASSERT(mini_count <= kMaxIntermediateMiniSeparators);
    KeySlice page_low = previous_low;
    for (uint8_t j = 0; j <= mini_count; ++j) {
      HighFence page_high(0, false);
      if (j < mini_count) {
        page_high.slice_ = minipage.separators_[j];
        page_high.supremum_ = false;
        CHECK_AND_ASSERT(page_high.slice_ < mini_high.slice_ || mini_high.supremum_);
        if (j == 0) {
          CHECK_AND_ASSERT(page_high.slice_ > previous_low);
        } else {
          CHECK_AND_ASSERT(page_high.slice_ > minipage.separators_[j - 1]);
        }
      } else {
        page_high = mini_high;
      }
      CHECK_AND_ASSERT(!minipage.pointers_[j].is_both_null());
      MasstreePage* next;
      // TASK(Hideaki) probably two versions: always follow volatile vs snapshot
      // so far check volatile only
      WRAP_ERROR_CODE(follow_page(context, true, &minipage.pointers_[j], &next));
      CHECK_AND_ASSERT(next->get_layer() == page->get_layer());
      CHECK_AND_ASSERT(next->get_btree_level() + 1U == page->get_btree_level());
      if (next->is_border()) {
        CHECK_ERROR(verify_single_thread_border(
          context,
          page_low,
          page_high,
          reinterpret_cast<MasstreeBorderPage*>(next)));
      } else {
        CHECK_ERROR(verify_single_thread_intermediate(
          context,
          page_low,
          page_high,
          reinterpret_cast<MasstreeIntermediatePage*>(next)));
      }

      page_low = page_high.slice_;
    }

    previous_low = mini_high.slice_;
  }

  return kRetOk;
}
Beispiel #9
0
int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
{
	unsigned long		ptr, end;
	int			err;
	struct mm_struct *	mm;
	struct vm_area_struct *	vma = 0;
	struct page *		map;
	int			i;
	int			datain = (rw == READ);
	
	/* Make sure the iobuf is not already mapped somewhere. */
	if (iobuf->nr_pages)
		return -EINVAL;

	mm = current->mm;
	dprintk ("map_user_kiobuf: begin\n");
	
	ptr = va & PAGE_MASK;
	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
	if (err)
		return err;

	down(&mm->mmap_sem);

	err = -EFAULT;
	iobuf->locked = 0;
	iobuf->offset = va & ~PAGE_MASK;
	iobuf->length = len;
	
	i = 0;
	
	/* 
	 * First of all, try to fault in all of the necessary pages
	 */
	while (ptr < end) {
		if (!vma || ptr >= vma->vm_end) {
			vma = find_vma(current->mm, ptr);
			if (!vma) 
				goto out_unlock;
			if (vma->vm_start > ptr) {
				if (!(vma->vm_flags & VM_GROWSDOWN))
					goto out_unlock;
				if (expand_stack(vma, ptr))
					goto out_unlock;
			}
			if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
					(!(vma->vm_flags & VM_READ))) {
				err = -EACCES;
				goto out_unlock;
			}
		}
		if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0) 
			goto out_unlock;
		spin_lock(&mm->page_table_lock);
		map = follow_page(ptr);
		if (!map) {
			spin_unlock(&mm->page_table_lock);
			dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
			goto out_unlock;
		}
		map = get_page_map(map);
		if (map)
			atomic_inc(&map->count);
		else
			printk (KERN_INFO "Mapped page missing [%d]\n", i);
		spin_unlock(&mm->page_table_lock);
		iobuf->maplist[i] = map;
		iobuf->nr_pages = ++i;
		
		ptr += PAGE_SIZE;
	}

	up(&mm->mmap_sem);
	dprintk ("map_user_kiobuf: end OK\n");
	return 0;

 out_unlock:
	up(&mm->mmap_sem);
	unmap_kiobuf(iobuf);
	dprintk ("map_user_kiobuf: end %d\n", err);
	return err;
}
Beispiel #10
0
int __m4u_get_user_pages(int eModuleID, struct task_struct *tsk, struct mm_struct *mm, 
                     unsigned long start, int nr_pages, unsigned int gup_flags,
                     struct page **pages, struct vm_area_struct **vmas)
{
        int i;
        unsigned long vm_flags;
	int trycnt;

        if (nr_pages <= 0)
                return 0;

        //VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
        if(!!pages != !!(gup_flags & FOLL_GET)) {
            M4UMSG(" error: __m4u_get_user_pages !!pages != !!(gup_flags & FOLL_GET), pages=0x%x, gup_flags & FOLL_GET=0x%x \n",
                    (unsigned int)pages, gup_flags & FOLL_GET);
        }

        /*   
         * Require read or write permissions.
         * If FOLL_FORCE is set, we only require the "MAY" flags.
         */
        vm_flags  = (gup_flags & FOLL_WRITE) ?
                        (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
        vm_flags &= (gup_flags & FOLL_FORCE) ?
                        (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
        i = 0; 

        M4UDBG("Trying to get_user_pages from start vaddr 0x%08x with %d pages\n", start, nr_pages);

        do { 
                struct vm_area_struct *vma;
                M4UDBG("For a new vma area from 0x%08x\n", start);
                vma = find_extend_vma(mm, start);

                if (!vma)
                {
                    M4UMSG("error: the vma is not found, start=0x%x, module=%d \n", 
                           (unsigned int)start, eModuleID);
                    return i ? i : -EFAULT;
                } 
                if( ((~vma->vm_flags) & (VM_IO|VM_PFNMAP|VM_SHARED|VM_WRITE)) == 0 )
                {
                    M4UMSG("error: m4u_get_pages(): bypass pmem garbage pages! vma->vm_flags=0x%x, start=0x%x, module=%d \n", 
                            (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID);
                	return i ? i : -EFAULT;;
                }                     
                if(vma->vm_flags & VM_IO)
                {
                	  M4UDBG("warning: vma is marked as VM_IO \n");
                }
                if(vma->vm_flags & VM_PFNMAP)
                {
                    M4UMSG("error: vma permission is not correct, vma->vm_flags=0x%x, start=0x%x, module=%d \n", 
                            (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID);
                    M4UMSG("hint: maybe the memory is remapped with un-permitted vma->vm_flags! \n");          
                    //m4u_dump_maps(start);
                    return i ? i : -EFAULT;;
                }
                if(!(vm_flags & vma->vm_flags)) 
                {
                    M4UMSG("error: vm_flags invalid, vm_flags=0x%x, vma->vm_flags=0x%x, start=0x%x, module=%d \n", 
                           (unsigned int)vm_flags,
                           (unsigned int)(vma->vm_flags), 
                           (unsigned int)start,
                            eModuleID);
                    //m4u_dump_maps(start);                  
                    return i ? : -EFAULT;
                }

                do {
                        struct page *page;
                        unsigned int foll_flags = gup_flags;
                        /*
                         * If we have a pending SIGKILL, don't keep faulting
                         * pages and potentially allocating memory.
                         */
                        if (unlikely(fatal_signal_pending(current)))
                                return i ? i : -ERESTARTSYS;
                        MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF));
                        page = follow_page(vma, start, foll_flags);
                        MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000);
                        while (!page) {
                                int ret;

                                M4UDBG("Trying to allocate for %dth page(vaddr: 0x%08x)\n", i, start);
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagStart, eModuleID, start&(~0xFFF));
                                ret = handle_mm_fault(mm, vma, start,
                                        (foll_flags & FOLL_WRITE) ?
                                        FAULT_FLAG_WRITE : 0);
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagEnd, eModuleID, 0x1000);
                                if (ret & VM_FAULT_ERROR) {
                                        if (ret & VM_FAULT_OOM) {
                                                M4UMSG("handle_mm_fault() error: no memory, aaddr:0x%08lx (%d pages are allocated), module=%d\n", 
                                                start, i, eModuleID);
                                                //m4u_dump_maps(start);
                                                return i ? i : -ENOMEM;
					                    }
                                        if (ret &
                                            (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) {
                                                M4UMSG("handle_mm_fault() error: invalide memory address, vaddr:0x%lx (%d pages are allocated), module=%d\n", 
                                                start, i, eModuleID);
                                                //m4u_dump_maps(start);
                                                return i ? i : -EFAULT;
					                    }
                                        BUG();
                                }
                                if (ret & VM_FAULT_MAJOR)
                                        tsk->maj_flt++;
                                else
                                        tsk->min_flt++;

                                /*
                                 * The VM_FAULT_WRITE bit tells us that
                                 * do_wp_page has broken COW when necessary,
                                 * even if maybe_mkwrite decided not to set
                                 * pte_write. We can thus safely do subsequent
                                 * page lookups as if they were reads. But only
                                 * do so when looping for pte_write is futile:
                                 * in some cases userspace may also be wanting
                                 * to write to the gotten user page, which a
                                 * read fault here might prevent (a readonly
                                 * page might get reCOWed by userspace write).
                                 */
                                if ((ret & VM_FAULT_WRITE) &&
                                    !(vma->vm_flags & VM_WRITE))
                                        foll_flags &= ~FOLL_WRITE;
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF));
                                page = follow_page(vma, start, foll_flags);
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000);
                        }
                        if (IS_ERR(page)) {
                                M4UMSG("handle_mm_fault() error: faulty page is returned, vaddr:0x%lx (%d pages are allocated), module=%d \n", 
                                        start, i, eModuleID);
                                //m4u_dump_maps(start);
                                return i ? i : PTR_ERR(page);
			            }
                        if (pages) {
                                pages[i] = page;
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagStart, eModuleID, start&(~0xFFF));
				
				/* Use retry version to guarantee it will succeed in getting the lock */
				trycnt = 3000;
				do {
					if (trylock_page(page)) {
						mlock_vma_page(page);
						unlock_page(page);

                        //make sure hw pte is not 0
                        {
                            int i;
                            for(i=0; i<3000; i++)
                            {   
                                if(!m4u_user_v2p(start))
                                {
                                    handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE)? FAULT_FLAG_WRITE : 0);
                                    cond_resched();
                                }
                                else
                                    break;
                            }
                            if(i==3000)
                                M4UMSG("error: cannot handle_mm_fault to get hw pte: va=0x%x\n", start);
                        }

                        break;
					}
				} while (trycnt-- > 0);

                                if(PageMlocked(page)==0)
                                {
                                    M4UMSG("Can't mlock page\n");
                                    dump_page(page);
                                }
                                else
                                {
                                    unsigned int pfn = page_to_pfn(page);
                                    if(pfn < mlock_cnt_size)
                                    {
                                        pMlock_cnt[page_to_pfn(page)]++;
                                    }
                                    else
                                    {
                                        M4UERR("mlock_cnt_size is too small: pfn=%d, size=%d\n", pfn, mlock_cnt_size);
                                    }
                                    
                                    //M4UMSG("lock page:\n");
                                    //dump_page(page);
                                }
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagEnd, eModuleID, 0x1000);

                        }
                        if (vmas)
                                vmas[i] = vma;
                        i++;
                        start += PAGE_SIZE;
                        nr_pages--;
                } while (nr_pages && start < vma->vm_end);
        } while (nr_pages);
ErrorStack MasstreeStoragePimpl::fatify_first_root_double(thread::Thread* context) {
  MasstreeIntermediatePage* root;
  WRAP_ERROR_CODE(get_first_root(context, true, &root));
  ASSERT_ND(root->is_locked());
  ASSERT_ND(!root->is_moved());

  // assure that all children have volatile version
  for (MasstreeIntermediatePointerIterator it(root); it.is_valid(); it.next()) {
    if (it.get_pointer().volatile_pointer_.is_null()) {
      MasstreePage* child;
      WRAP_ERROR_CODE(follow_page(
        context,
        true,
        const_cast<DualPagePointer*>(&it.get_pointer()),
        &child));
    }
    ASSERT_ND(!it.get_pointer().volatile_pointer_.is_null());
  }

  std::vector<Child> original_children = list_children(root);
  ASSERT_ND(original_children.size() * 2U <= kMaxIntermediatePointers);
  std::vector<Child> new_children;
  for (const Child& child : original_children) {
    CHECK_ERROR(split_a_child(context, root, child, &new_children));
  }
  ASSERT_ND(new_children.size() >= original_children.size());

  memory::NumaCoreMemory* memory = context->get_thread_memory();
  memory::PagePoolOffset new_offset = memory->grab_free_volatile_page();
  if (new_offset == 0) {
    return ERROR_STACK(kErrorCodeMemoryNoFreePages);
  }
  // from now on no failure (we grabbed a free page).

  VolatilePagePointer new_pointer = combine_volatile_page_pointer(
    context->get_numa_node(),
    kVolatilePointerFlagSwappable,  // pointer to root page might be swapped!
    get_first_root_pointer().volatile_pointer_.components.mod_count + 1,
    new_offset);
  MasstreeIntermediatePage* new_root
    = context->resolve_newpage_cast<MasstreeIntermediatePage>(new_pointer);
  new_root->initialize_volatile_page(
    get_id(),
    new_pointer,
    0,
    root->get_btree_level(),  // same as current root. this is not grow_root
    kInfimumSlice,
    kSupremumSlice);
  // no concurrent access to the new page, but just for the sake of assertion in the func.
  PageVersionLockScope new_scope(context, new_root->get_version_address());
  new_root->split_foster_migrate_records_new_first_root(&new_children);
  ASSERT_ND(count_children(new_root) == new_children.size());
  verify_new_root(context, new_root, new_children);

  // set the new first-root pointer.
  assorted::memory_fence_release();
  get_first_root_pointer().volatile_pointer_.word = new_pointer.word;
  // first-root snapshot pointer is unchanged.

  // old root page and the direct children are now retired
  assorted::memory_fence_acq_rel();
  root->set_moved();  // not quite moved, but assertions assume that.
  root->set_retired();
  context->collect_retired_volatile_page(
    construct_volatile_page_pointer(root->header().page_id_));
  for (const Child& child : original_children) {
    MasstreePage* original_page = context->resolve_cast<MasstreePage>(child.pointer_);
    if (original_page->is_moved()) {
      PageVersionLockScope scope(context, original_page->get_version_address());
      original_page->set_retired();
      context->collect_retired_volatile_page(child.pointer_);
    } else {
      // This means, the page had too small records to split. We must keep it.
    }
  }
  assorted::memory_fence_acq_rel();

  LOG(INFO) << "Split done. " << original_children.size() << " -> " << new_children.size();

  return kRetOk;
}
Beispiel #12
0
/*
 * Please read Documentation/cachetlb.txt before using this function,
 * accessing foreign memory spaces can cause cache coherency problems.
 *
 * Accessing a VM_IO area is even more dangerous, therefore the function
 * fails if pages is != NULL and a VM_IO area is found.
 */
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas)
{
	int i;
	unsigned int flags;

	/*
	 * Require read or write permissions.
	 * If 'force' is set, we only require the "MAY" flags.
	 */
	flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
	flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
	i = 0;

	do {
		struct vm_area_struct *	vma;

		vma = find_extend_vma(mm, start);

		if ( !vma || (pages && vma->vm_flags & VM_IO) || !(flags & vma->vm_flags) )
			return i ? : -EFAULT;

		spin_lock(&mm->page_table_lock);
		do {
			struct page *map;
			while (!(map = follow_page(mm, start, write))) {
				spin_unlock(&mm->page_table_lock);
				switch (handle_mm_fault(mm, vma, start, write)) {
				case 1:
					tsk->min_flt++;
					break;
				case 2:
					tsk->maj_flt++;
					break;
				case 0:
					if (i) return i;
					return -EFAULT;
				default:
					if (i) return i;
					return -ENOMEM;
				}
				spin_lock(&mm->page_table_lock);
			}
			if (pages) {
				pages[i] = get_page_map(map);
				/* FIXME: call the correct function,
				 * depending on the type of the found page
				 */
				if (!pages[i])
					goto bad_page;
				page_cache_get(pages[i]);
			}
			if (vmas)
				vmas[i] = vma;
			i++;
			start += PAGE_SIZE;
			len--;
		} while(len && start < vma->vm_end);
		spin_unlock(&mm->page_table_lock);
	} while(len);
out:
	return i;

	/*
	 * We found an invalid page in the VMA.  Release all we have
	 * so far and fail.
	 */
bad_page:
	spin_unlock(&mm->page_table_lock);
	while (i--)
		page_cache_release(pages[i]);
	i = -EFAULT;
	goto out;
}
Beispiel #13
0
/* Translate virtual address to physical address.  */
unsigned long
xencomm_vtop(unsigned long vaddr)
{
	struct page *page;
	struct vm_area_struct *vma;

	if (vaddr == 0)
		return 0UL;

	if (REGION_NUMBER(vaddr) == 5) {
		pgd_t *pgd;
		pud_t *pud;
		pmd_t *pmd;
		pte_t *ptep;

		/* On ia64, TASK_SIZE refers to current.  It is not initialized
		   during boot.
		   Furthermore the kernel is relocatable and __pa() doesn't
		   work on  addresses.  */
		if (vaddr >= KERNEL_START
		    && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE))
			return vaddr - kernel_virtual_offset;

		/* In kernel area -- virtually mapped.  */
		pgd = pgd_offset_k(vaddr);
		if (pgd_none(*pgd) || pgd_bad(*pgd))
			return ~0UL;

		pud = pud_offset(pgd, vaddr);
		if (pud_none(*pud) || pud_bad(*pud))
			return ~0UL;

		pmd = pmd_offset(pud, vaddr);
		if (pmd_none(*pmd) || pmd_bad(*pmd))
			return ~0UL;

		ptep = pte_offset_kernel(pmd, vaddr);
		if (!ptep)
			return ~0UL;

		return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
	}

	if (vaddr > TASK_SIZE) {
		/* percpu variables */
		if (REGION_NUMBER(vaddr) == 7 &&
		    REGION_OFFSET(vaddr) >= (1ULL << IA64_MAX_PHYS_BITS))
			ia64_tpa(vaddr);

		/* kernel address */
		return __pa(vaddr);
	}

	vma = find_extend_vma(current->mm, vaddr);
	if (!vma)
		return ~0UL;

	/* We assume the page is modified.  */
	page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
	if (!page)
		return ~0UL;

	return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
}