Beispiel #1
0
static int kswapd_balance_pgdat(pg_data_t * pgdat)
{
	int need_more_balance = 0, i;
	zone_t * zone;

	for (i = pgdat->nr_zones-1; i >= 0; i--) {
		zone = pgdat->node_zones + i;
		debug_lock_break(0);
#ifndef CONFIG_PREEMPT
		if (unlikely(current->need_resched))
			schedule();
#endif
		if (!zone->need_balance)
			continue;
		if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
			zone->need_balance = 0;
			__set_current_state(TASK_INTERRUPTIBLE);
			schedule_timeout(HZ);
			continue;
		}
		if (check_classzone_need_balance(zone))
			need_more_balance = 1;
		else
			zone->need_balance = 0;
	}

	return need_more_balance;
}
Beispiel #2
0
/*
 * Search the dentry child list for the specified parent,
 * and move any unused dentries to the end of the unused
 * list for prune_dcache(). We descend to the next level
 * whenever the d_subdirs list is non-empty and continue
 * searching.
 */
static int select_parent(struct dentry * parent)
{
	struct dentry *this_parent = parent;
	struct list_head *next;
	int found = 0;

	DEFINE_LOCK_COUNT();

	spin_lock(&dcache_lock);
repeat:
	next = this_parent->d_subdirs.next;
resume:
	while (next != &this_parent->d_subdirs) {
		struct list_head *tmp = next;
		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
		next = tmp->next;
		if (!atomic_read(&dentry->d_count)) {
			list_del(&dentry->d_lru);
			list_add(&dentry->d_lru, dentry_unused.prev);
			found++;
		}
		if (TEST_LOCK_COUNT(500) && found > 10) {
			debug_lock_break(1);
			if (conditional_schedule_needed())
				goto out;
			RESET_LOCK_COUNT();
		}
		/*
		 * Descend a level if the d_subdirs list is non-empty.
		 */
		if (!list_empty(&dentry->d_subdirs)) {
			this_parent = dentry;
#ifdef DCACHE_DEBUG
printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
dentry->d_parent->d_name.name, dentry->d_name.name, found);
#endif
			goto repeat;
		}
	}
	/*
	 * All done at this level ... ascend and resume the search.
	 */
	if (this_parent != parent) {
		next = this_parent->d_child.next; 
		this_parent = this_parent->d_parent;
#ifdef DCACHE_DEBUG
printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
#endif
		goto resume;
	}
out:
	spin_unlock(&dcache_lock);
	return found;
}
Beispiel #3
0
void prune_dcache(int count)
{
	DEFINE_LOCK_COUNT();

	spin_lock(&dcache_lock);

redo:
	for (;;) {
		struct dentry *dentry;
		struct list_head *tmp;

		if (TEST_LOCK_COUNT(100)) {
			RESET_LOCK_COUNT();
			debug_lock_break(1);
			if (conditional_schedule_needed()) {
				break_spin_lock(&dcache_lock);
				goto redo;
			}
		}

		tmp = dentry_unused.prev;

		if (tmp == &dentry_unused)
			break;
		list_del_init(tmp);
		dentry = list_entry(tmp, struct dentry, d_lru);

		/* If the dentry was recently referenced, don't free it. */
		if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
			dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
			list_add(&dentry->d_lru, &dentry_unused);
			continue;
		}
		dentry_stat.nr_unused--;

		/* Unused dentry with a count? */
		if (atomic_read(&dentry->d_count))
			BUG();

		prune_one_dentry(dentry);
		if (!--count)
			break;
	}
	spin_unlock(&dcache_lock);
}
Beispiel #4
0
static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
				     unsigned long address, unsigned long size,
				     pgprot_t prot)
{
	unsigned long end;

	debug_lock_break(1);
	break_spin_lock(&mm->page_table_lock);

	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
		pte_t oldpage = ptep_get_and_clear(pte);
		set_pte(pte, zero_pte);
		forget_pte(oldpage);
		address += PAGE_SIZE;
		pte++;
	} while (address && (address < end));
}
Beispiel #5
0
static inline void close_files(struct files_struct * files)
{
    int i, j;

    j = 0;
    for (;;) {
        unsigned long set;
        i = j * __NFDBITS;
        if (i >= files->max_fdset || i >= files->max_fds)
            break;
        set = files->open_fds->fds_bits[j++];
        while (set) {
            if (set & 1) {
                struct file * file = xchg(&files->fd[i], NULL);
                if (file)
                    filp_close(file, files);
            }
            i++;
            set >>= 1;
            debug_lock_break(1);
            conditional_schedule();
        }
    }
}
Beispiel #6
0
/*
 * We completely avoid races by reading each swap page in advance,
 * and then search for the process using it.  All the necessary
 * page table adjustments can then be made atomically.
 */
static int try_to_unuse(unsigned int type)
{
	struct swap_info_struct * si = &swap_info[type];
	struct mm_struct *start_mm;
	unsigned short *swap_map;
	unsigned short swcount;
	struct page *page;
	swp_entry_t entry;
	int i = 0;
	int retval = 0;
	int reset_overflow = 0;

	/*
	 * When searching mms for an entry, a good strategy is to
	 * start at the first mm we freed the previous entry from
	 * (though actually we don't notice whether we or coincidence
	 * freed the entry).  Initialize this start_mm with a hold.
	 *
	 * A simpler strategy would be to start at the last mm we
	 * freed the previous entry from; but that would take less
	 * advantage of mmlist ordering (now preserved by swap_out()),
	 * which clusters forked address spaces together, most recent
	 * child immediately after parent.  If we race with dup_mmap(),
	 * we very much want to resolve parent before child, otherwise
	 * we may miss some entries: using last mm would invert that.
	 */
	start_mm = &init_mm;
	atomic_inc(&init_mm.mm_users);

	/*
	 * Keep on scanning until all entries have gone.  Usually,
	 * one pass through swap_map is enough, but not necessarily:
	 * mmput() removes mm from mmlist before exit_mmap() and its
	 * zap_page_range().  That's not too bad, those entries are
	 * on their way out, and handled faster there than here.
	 * do_munmap() behaves similarly, taking the range out of mm's
	 * vma list before zap_page_range().  But unfortunately, when
	 * unmapping a part of a vma, it takes the whole out first,
	 * then reinserts what's left after (might even reschedule if
	 * open() method called) - so swap entries may be invisible
	 * to swapoff for a while, then reappear - but that is rare.
	 */
	while ((i = find_next_to_unuse(si, i))) {
		/* 
		 * Get a page for the entry, using the existing swap
		 * cache page if there is one.  Otherwise, get a clean
		 * page and read the swap into it. 
		 */
		swap_map = &si->swap_map[i];
		entry = SWP_ENTRY(type, i);
		page = read_swap_cache_async(entry);
		if (!page) {
			/*
			 * Either swap_duplicate() failed because entry
			 * has been freed independently, and will not be
			 * reused since sys_swapoff() already disabled
			 * allocation from here, or alloc_page() failed.
			 */
			if (!*swap_map)
				continue;
			retval = -ENOMEM;
			break;
		}

		/*
		 * Don't hold on to start_mm if it looks like exiting.
		 */
		if (atomic_read(&start_mm->mm_users) == 1) {
			mmput(start_mm);
			start_mm = &init_mm;
			atomic_inc(&init_mm.mm_users);
		}

		/*
		 * Wait for and lock page.  When do_swap_page races with
		 * try_to_unuse, do_swap_page can handle the fault much
		 * faster than try_to_unuse can locate the entry.  This
		 * apparently redundant "wait_on_page" lets try_to_unuse
		 * defer to do_swap_page in such a case - in some tests,
		 * do_swap_page and try_to_unuse repeatedly compete.
		 */
		wait_on_page(page);
		lock_page(page);

		/*
		 * Remove all references to entry, without blocking.
		 * Whenever we reach init_mm, there's no address space
		 * to search, but use it as a reminder to search shmem.
		 */
		swcount = *swap_map;
		if (swcount > 1) {
			flush_page_to_ram(page);
			if (start_mm == &init_mm)
				shmem_unuse(entry, page);
			else
				unuse_process(start_mm, entry, page);
		}
		if (*swap_map > 1) {
			int set_start_mm = (*swap_map >= swcount);
			struct list_head *p = &start_mm->mmlist;
			struct mm_struct *new_start_mm = start_mm;
			struct mm_struct *mm;

			spin_lock(&mmlist_lock);
			while (*swap_map > 1 &&
					(p = p->next) != &start_mm->mmlist) {
				mm = list_entry(p, struct mm_struct, mmlist);
				swcount = *swap_map;
				if (mm == &init_mm) {
					set_start_mm = 1;
					shmem_unuse(entry, page);
				} else
					unuse_process(mm, entry, page);
				if (set_start_mm && *swap_map < swcount) {
					new_start_mm = mm;
					set_start_mm = 0;
				}
			}
			atomic_inc(&new_start_mm->mm_users);
			spin_unlock(&mmlist_lock);
			mmput(start_mm);
			start_mm = new_start_mm;
		}

		/*
		 * How could swap count reach 0x7fff when the maximum
		 * pid is 0x7fff, and there's no way to repeat a swap
		 * page within an mm (except in shmem, where it's the
		 * shared object which takes the reference count)?
		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
		 *
		 * If that's wrong, then we should worry more about
		 * exit_mmap() and do_munmap() cases described above:
		 * we might be resetting SWAP_MAP_MAX too early here.
		 * We know "Undead"s can happen, they're okay, so don't
		 * report them; but do report if we reset SWAP_MAP_MAX.
		 */
		if (*swap_map == SWAP_MAP_MAX) {
			swap_list_lock();
			swap_device_lock(si);
			nr_swap_pages++;
			*swap_map = 1;
			swap_device_unlock(si);
			swap_list_unlock();
			reset_overflow = 1;
		}

		/*
		 * If a reference remains (rare), we would like to leave
		 * the page in the swap cache; but try_to_swap_out could
		 * then re-duplicate the entry once we drop page lock,
		 * so we might loop indefinitely; also, that page could
		 * not be swapped out to other storage meanwhile.  So:
		 * delete from cache even if there's another reference,
		 * after ensuring that the data has been saved to disk -
		 * since if the reference remains (rarer), it will be
		 * read from disk into another page.  Splitting into two
		 * pages would be incorrect if swap supported "shared
		 * private" pages, but they are handled by tmpfs files.
		 * Note shmem_unuse already deleted its from swap cache.
		 */
		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
			rw_swap_page(WRITE, page);
			lock_page(page);
		}
		if (PageSwapCache(page))
			delete_from_swap_cache(page);

		/*
		 * So we could skip searching mms once swap count went
		 * to 1, we did not mark any present ptes as dirty: must
		 * mark page dirty so try_to_swap_out will preserve it.
		 */
		SetPageDirty(page);
		UnlockPage(page);
		page_cache_release(page);

		/*
		 * Make sure that we aren't completely killing
		 * interactive performance.  Interruptible check on
		 * signal_pending() would be nice, but changes the spec?
		 */
		debug_lock_break(551);
		if (current->need_resched)
			schedule();
	}