Exemple #1
0
/* 
 * Primitive swap readahead code. We simply read an aligned block of
 * (1 << page_cluster) entries in the swap area. This method is chosen
 * because it doesn't cost us any seek time.  We also make sure to queue
 * the 'original' request together with the readahead ones...  
 */
void swapin_readahead(swp_entry_t entry)
{
	int i, num;
	struct page *new_page;
	unsigned long offset;

	/*
	 * Get the number of handles we should do readahead io to. Also,
	 * grab temporary references on them, releasing them as io completes.
	 */
	num = valid_swaphandles(entry, &offset);
	for (i = 0; i < num; offset++, i++) {
		/* Don't block on I/O for read-ahead */
		if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster
				* (1 << page_cluster)) {
			while (i++ < num)
				swap_free(SWP_ENTRY(SWP_TYPE(entry), offset++));
			break;
		}
		/* Ok, do the async read-ahead now */
		new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0);
		if (new_page != NULL)
			page_cache_release(new_page);
		swap_free(SWP_ENTRY(SWP_TYPE(entry), offset));
	}
	return;
}
Exemple #2
0
swp_entry_t get_swap_page(void)
{
	struct swap_info_struct * p;
	unsigned long offset;
	swp_entry_t entry;
	int type, wrapped = 0;

	entry.val = 0;	/* Out of memory */
	swap_list_lock();
	type = swap_list.next;
	if (type < 0)
		goto out;
	if (nr_swap_pages <= 0)
		goto out;

	while (1) {
		p = &swap_info[type];
		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
			swap_device_lock(p);
			offset = scan_swap_map(p);
			swap_device_unlock(p);
			if (offset) {
				entry = SWP_ENTRY(type,offset);
				type = swap_info[type].next;
				if (type < 0 ||
					p->prio != swap_info[type].prio) {
						swap_list.next = swap_list.head;
				} else {
					swap_list.next = type;
				}
				goto out;
			}
		}
		type = p->next;
		if (!wrapped) {
			if (type < 0 || p->prio != swap_info[type].prio) {
				type = swap_list.head;
				wrapped = 1;
			}
		} else
			if (type < 0)
				goto out;	/* out of swap space */
	}
out:
	swap_list_unlock();
	return entry;
}
Exemple #3
0
/* 
 * Primitive swap readahead code. We simply read an aligned block of
 * (1 << page_cluster) entries in the swap area. This method is chosen
 * because it doesn't cost us any seek time.  We also make sure to queue
 * the 'original' request together with the readahead ones...  
 */
void swapin_readahead(swp_entry_t entry)
{
	int i, num;
	struct page *new_page;
	unsigned long offset;

	/*
	 * Get the number of handles we should do readahead io to.
	 */
	num = valid_swaphandles(entry, &offset);
	for (i = 0; i < num; offset++, i++) {
		/* Ok, do the async read-ahead now */
		new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
		if (!new_page)
			break;
		page_cache_release(new_page);
	}
	return;
}
Exemple #4
0
unsigned long get_swap_page(void)
{
	struct swap_info_struct * p;
	unsigned long offset, entry;
	int type, wrapped = 0;

	type = swap_list.next;
	if (type < 0)
		return 0;
	if (nr_swap_pages == 0)
		return 0;

	while (1) {
		p = &swap_info[type];
		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
			offset = scan_swap_map(p);
			if (offset) {
				entry = SWP_ENTRY(type,offset);
				type = swap_info[type].next;
				if (type < 0 ||
					p->prio != swap_info[type].prio) 
				{
						swap_list.next = swap_list.head;
				}
				else
				{
					swap_list.next = type;
				}
				return entry;
			}
		}
		type = p->next;
		if (!wrapped) {
			if (type < 0 || p->prio != swap_info[type].prio) {
				type = swap_list.head;
				wrapped = 1;
			}
		} else if (type < 0) {
			return 0;	/* out of swap space */
		}
	}
}
Exemple #5
0
/*
 * We completely avoid races by reading each swap page in advance,
 * and then search for the process using it.  All the necessary
 * page table adjustments can then be made atomically.
 */
static int try_to_unuse(unsigned int type)
{
	struct swap_info_struct * si = &swap_info[type];
	struct mm_struct *start_mm;
	unsigned short *swap_map;
	unsigned short swcount;
	struct page *page;
	swp_entry_t entry;
	int i = 0;
	int retval = 0;
	int reset_overflow = 0;

	/*
	 * When searching mms for an entry, a good strategy is to
	 * start at the first mm we freed the previous entry from
	 * (though actually we don't notice whether we or coincidence
	 * freed the entry).  Initialize this start_mm with a hold.
	 *
	 * A simpler strategy would be to start at the last mm we
	 * freed the previous entry from; but that would take less
	 * advantage of mmlist ordering (now preserved by swap_out()),
	 * which clusters forked address spaces together, most recent
	 * child immediately after parent.  If we race with dup_mmap(),
	 * we very much want to resolve parent before child, otherwise
	 * we may miss some entries: using last mm would invert that.
	 */
	start_mm = &init_mm;
	atomic_inc(&init_mm.mm_users);

	/*
	 * Keep on scanning until all entries have gone.  Usually,
	 * one pass through swap_map is enough, but not necessarily:
	 * mmput() removes mm from mmlist before exit_mmap() and its
	 * zap_page_range().  That's not too bad, those entries are
	 * on their way out, and handled faster there than here.
	 * do_munmap() behaves similarly, taking the range out of mm's
	 * vma list before zap_page_range().  But unfortunately, when
	 * unmapping a part of a vma, it takes the whole out first,
	 * then reinserts what's left after (might even reschedule if
	 * open() method called) - so swap entries may be invisible
	 * to swapoff for a while, then reappear - but that is rare.
	 */
	while ((i = find_next_to_unuse(si, i))) {
		/* 
		 * Get a page for the entry, using the existing swap
		 * cache page if there is one.  Otherwise, get a clean
		 * page and read the swap into it. 
		 */
		swap_map = &si->swap_map[i];
		entry = SWP_ENTRY(type, i);
		page = read_swap_cache_async(entry);
		if (!page) {
			/*
			 * Either swap_duplicate() failed because entry
			 * has been freed independently, and will not be
			 * reused since sys_swapoff() already disabled
			 * allocation from here, or alloc_page() failed.
			 */
			if (!*swap_map)
				continue;
			retval = -ENOMEM;
			break;
		}

		/*
		 * Don't hold on to start_mm if it looks like exiting.
		 */
		if (atomic_read(&start_mm->mm_users) == 1) {
			mmput(start_mm);
			start_mm = &init_mm;
			atomic_inc(&init_mm.mm_users);
		}

		/*
		 * Wait for and lock page.  When do_swap_page races with
		 * try_to_unuse, do_swap_page can handle the fault much
		 * faster than try_to_unuse can locate the entry.  This
		 * apparently redundant "wait_on_page" lets try_to_unuse
		 * defer to do_swap_page in such a case - in some tests,
		 * do_swap_page and try_to_unuse repeatedly compete.
		 */
		wait_on_page(page);
		lock_page(page);

		/*
		 * Remove all references to entry, without blocking.
		 * Whenever we reach init_mm, there's no address space
		 * to search, but use it as a reminder to search shmem.
		 */
		swcount = *swap_map;
		if (swcount > 1) {
			flush_page_to_ram(page);
			if (start_mm == &init_mm)
				shmem_unuse(entry, page);
			else
				unuse_process(start_mm, entry, page);
		}
		if (*swap_map > 1) {
			int set_start_mm = (*swap_map >= swcount);
			struct list_head *p = &start_mm->mmlist;
			struct mm_struct *new_start_mm = start_mm;
			struct mm_struct *mm;

			spin_lock(&mmlist_lock);
			while (*swap_map > 1 &&
					(p = p->next) != &start_mm->mmlist) {
				mm = list_entry(p, struct mm_struct, mmlist);
				swcount = *swap_map;
				if (mm == &init_mm) {
					set_start_mm = 1;
					shmem_unuse(entry, page);
				} else
					unuse_process(mm, entry, page);
				if (set_start_mm && *swap_map < swcount) {
					new_start_mm = mm;
					set_start_mm = 0;
				}
			}
			atomic_inc(&new_start_mm->mm_users);
			spin_unlock(&mmlist_lock);
			mmput(start_mm);
			start_mm = new_start_mm;
		}

		/*
		 * How could swap count reach 0x7fff when the maximum
		 * pid is 0x7fff, and there's no way to repeat a swap
		 * page within an mm (except in shmem, where it's the
		 * shared object which takes the reference count)?
		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
		 *
		 * If that's wrong, then we should worry more about
		 * exit_mmap() and do_munmap() cases described above:
		 * we might be resetting SWAP_MAP_MAX too early here.
		 * We know "Undead"s can happen, they're okay, so don't
		 * report them; but do report if we reset SWAP_MAP_MAX.
		 */
		if (*swap_map == SWAP_MAP_MAX) {
			swap_list_lock();
			swap_device_lock(si);
			nr_swap_pages++;
			*swap_map = 1;
			swap_device_unlock(si);
			swap_list_unlock();
			reset_overflow = 1;
		}

		/*
		 * If a reference remains (rare), we would like to leave
		 * the page in the swap cache; but try_to_swap_out could
		 * then re-duplicate the entry once we drop page lock,
		 * so we might loop indefinitely; also, that page could
		 * not be swapped out to other storage meanwhile.  So:
		 * delete from cache even if there's another reference,
		 * after ensuring that the data has been saved to disk -
		 * since if the reference remains (rarer), it will be
		 * read from disk into another page.  Splitting into two
		 * pages would be incorrect if swap supported "shared
		 * private" pages, but they are handled by tmpfs files.
		 * Note shmem_unuse already deleted its from swap cache.
		 */
		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
			rw_swap_page(WRITE, page);
			lock_page(page);
		}
		if (PageSwapCache(page))
			delete_from_swap_cache(page);

		/*
		 * So we could skip searching mms once swap count went
		 * to 1, we did not mark any present ptes as dirty: must
		 * mark page dirty so try_to_swap_out will preserve it.
		 */
		SetPageDirty(page);
		UnlockPage(page);
		page_cache_release(page);

		/*
		 * Make sure that we aren't completely killing
		 * interactive performance.  Interruptible check on
		 * signal_pending() would be nice, but changes the spec?
		 */
		if (current->need_resched)
			schedule();
	}
Exemple #6
0
/*
 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
 */
asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
{
    struct shmid_kernel *shp;
    struct vm_area_struct *shmd;
    int err = -EINVAL;
    unsigned int id;
    unsigned long addr;
    unsigned long len;

    down(&current->mm->mmap_sem);
    lock_kernel();
    if (shmid < 0) {
        /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
        goto out;
    }

    shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
    if (shp == IPC_UNUSED || shp == IPC_NOID) {
        /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
        goto out;
    }

    if (!(addr = (ulong) shmaddr)) {
        if (shmflg & SHM_REMAP)
            goto out;
        err = -ENOMEM;
        addr = 0;
again:
        if (!(addr = get_unmapped_area(addr, shp->u.shm_segsz)))
            goto out;
        if(addr & (SHMLBA - 1)) {
            addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1);
            goto again;
        }
    } else if (addr & (SHMLBA-1)) {
        if (shmflg & SHM_RND)
            addr &= ~(SHMLBA-1);       /* round down */
        else
            goto out;
    }
    /*
     * Check if addr exceeds TASK_SIZE (from do_mmap)
     */
    len = PAGE_SIZE*shp->shm_npages;
    err = -EINVAL;
    if (addr >= TASK_SIZE || len > TASK_SIZE  || addr > TASK_SIZE - len)
        goto out;
    /*
     * If shm segment goes below stack, make sure there is some
     * space left for the stack to grow (presently 4 pages).
     */
    if (addr < current->mm->start_stack &&
            addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
    {
        /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
        goto out;
    }
    if (!(shmflg & SHM_REMAP))
        if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))) {
            /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
            	addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
            goto out;
        }

    err = -EACCES;
    if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
        goto out;
    err = -EIDRM;
    if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
        goto out;

    err = -ENOMEM;
    shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
    if (!shmd)
        goto out;
    if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
        kmem_cache_free(vm_area_cachep, shmd);
        err = -EIDRM;
        goto out;
    }

    shmd->vm_pte = SWP_ENTRY(SHM_SWP_TYPE, id);
    shmd->vm_start = addr;
    shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
    shmd->vm_mm = current->mm;
    shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED;
    shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED
                     | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
                     | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
    shmd->vm_file = NULL;
    shmd->vm_offset = 0;
    shmd->vm_ops = &shm_vm_ops;

    shp->u.shm_nattch++;            /* prevent destruction */
    if (shp->u.shm_nattch > 0xffff - NR_TASKS || (err = shm_map (shmd))) {
        if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
            killseg(id);
        kmem_cache_free(vm_area_cachep, shmd);
        goto out;
    }

    insert_attach(shp,shmd);  /* insert shmd into shp->attaches */

    shp->u.shm_lpid = current->pid;
    shp->u.shm_atime = CURRENT_TIME;

    *raddr = addr;
    err = 0;
out:
    unlock_kernel();
    up(&current->mm->mmap_sem);
    return err;
}
Exemple #7
0
static int unswap_by_move(unsigned short *map, unsigned long max,
			  unsigned long start, unsigned long n_pages)
{
	struct task_struct *p;
	unsigned long entry, rover = (start == 1) ? n_pages+1 : 1;
	unsigned long i, j;

	DPRINTK( "unswapping %lu..%lu by moving in swap\n",
			 start, start+n_pages-1 );
	
	/* can free the allocated pages by moving them to other swap pages */
	for( i = start; i < start+n_pages; ++i ) {
		if (!map[i]) {
			map[i] = SWAP_MAP_BAD;
			DPRINTK( "unswap: page %lu was free\n", i );
			continue;
		}
		else if (map[i] == SWAP_MAP_BAD) {
			printk( KERN_ERR "get_stram_region: page %lu already "
					"reserved??\n", i );
		}
		DPRINTK( "unswap: page %lu is alloced, count=%u\n", i, map[i] );

		/* find a free page not in our region */
		for( j = rover; j != rover-1; j = (j == max-1) ? 1 : j+1 ) {
			if (j >= start && j < start+n_pages)
				continue;
			if (!map[j]) {
				rover = j+1;
				break;
			}
		}
		if (j == rover-1) {
			printk( KERN_ERR "get_stram_region: not enough free swap "
					"pages now??\n" );
			return( -ENOMEM );
		}
		DPRINTK( "unswap: map[i=%lu]=%u map[j=%lu]=%u nr_swap=%u\n",
				 i, map[i], j, map[j], nr_swap_pages );
		
		--nr_swap_pages;
		entry = SWP_ENTRY( stram_swap_type, j );
		if (stram_swap_info->lowest_bit == j)
			stram_swap_info->lowest_bit++;
		if (stram_swap_info->highest_bit == j)
			stram_swap_info->highest_bit--;
		
		memcpy( SWAP_ADDR(j), SWAP_ADDR(i), PAGE_SIZE );
#ifdef DO_PROC
		stat_swap_move++;
#endif

		while( map[i] ) {
			read_lock(&tasklist_lock);
			for_each_task(p) {
				if (unswap_process( p->mm, SWP_ENTRY( stram_swap_type, i ),
									entry, 1 )) {
					read_unlock(&tasklist_lock);
					map[j]++;
					goto repeat;
				}
			}
			read_unlock(&tasklist_lock);
			if (map[i] && map[i] != SWAP_MAP_MAX) {
				printk( KERN_ERR "get_stram_region: ST-RAM swap page %lu "
						"not used by any process\n", i );
				/* quit while loop and overwrite bad map entry */
				break;
			}
			else if (!map[i]) {
				/* somebody else must have swapped in that page, so free the
				 * new one (we're moving to) */
				DPRINTK( "unswap: map[i] became 0, also clearing map[j]\n" );
				map[j] = 0;
			}
		  repeat:
		}

		DPRINTK( "unswap: map[i=%lu]=%u map[j=%lu]=%u nr_swap=%u\n",
				 i, map[i], j, map[j], nr_swap_pages );
		map[i] = SWAP_MAP_BAD;
		if (stram_swap_info->lowest_bit == i)
			stram_swap_info->lowest_bit++;
		if (stram_swap_info->highest_bit == i)
			stram_swap_info->highest_bit--;
		--nr_swap_pages;
	}
	return( 0 );
}
#endif

static int unswap_by_read(unsigned short *map, unsigned long max,
			  unsigned long start, unsigned long n_pages)
{
	struct task_struct *p;
	unsigned long entry, page;
	unsigned long i;
	struct page *page_map;

	DPRINTK( "unswapping %lu..%lu by reading in\n",
			 start, start+n_pages-1 );

	for( i = start; i < start+n_pages; ++i ) {
		if (map[i] == SWAP_MAP_BAD) {
			printk( KERN_ERR "get_stram_region: page %lu already "
					"reserved??\n", i );
			continue;
		}

		if (map[i]) {
			entry = SWP_ENTRY(stram_swap_type, i);
			DPRINTK("unswap: map[i=%lu]=%u nr_swap=%u\n",
				i, map[i], nr_swap_pages);

			/* Get a page for the entry, using the existing
			   swap cache page if there is one.  Otherwise,
			   get a clean page and read the swap into it. */
			page_map = read_swap_cache(entry);
			if (page_map) {
				page = page_address(page_map);
				read_lock(&tasklist_lock);
				for_each_task(p)
					unswap_process(p->mm, entry, page
						       /* , 0 */);
				read_unlock(&tasklist_lock);
				shm_unuse(entry, page);
				/* Now get rid of the extra reference to
				   the temporary page we've been using. */
				if (PageSwapCache(page_map))
					delete_from_swap_cache(page_map);
				__free_page(page_map);
	#ifdef DO_PROC
				stat_swap_force++;
	#endif
			}
			else if (map[i])
				return -ENOMEM;
		}

		DPRINTK( "unswap: map[i=%lu]=%u nr_swap=%u\n",
				 i, map[i], nr_swap_pages );
		map[i] = SWAP_MAP_BAD;
		if (stram_swap_info->lowest_bit == i)
			stram_swap_info->lowest_bit++;
		if (stram_swap_info->highest_bit == i)
			stram_swap_info->highest_bit--;
		--nr_swap_pages;
	}

	return 0;
}

/*
 * reserve a region in ST-RAM swap space for an allocation
 */
static void *get_stram_region( unsigned long n_pages )
{
	unsigned short *map = stram_swap_info->swap_map;
	unsigned long max = stram_swap_info->max;
	unsigned long start, total_free, region_free;
	int err;
	void *ret = NULL;
	
	DPRINTK( "get_stram_region(n_pages=%lu)\n", n_pages );

	down(&stram_swap_sem);

	/* disallow writing to the swap device now */
	stram_swap_info->flags = SWP_USED;

	/* find a region of n_pages pages in the swap space including as much free
	 * pages as possible (and excluding any already-reserved pages). */
	if (!(start = find_free_region( n_pages, &total_free, &region_free )))
		goto end;
	DPRINTK( "get_stram_region: region starts at %lu, has %lu free pages\n",
			 start, region_free );

#if 0
	err = ((total_free-region_free >= n_pages-region_free) ?
		   unswap_by_move( map, max, start, n_pages ) :
		   unswap_by_read( map, max, start, n_pages ));
#else
	err = unswap_by_read(map, max, start, n_pages);
#endif

	if (err)
		goto end;

	ret = SWAP_ADDR(start);
  end:
	/* allow using swap device again */
	stram_swap_info->flags = SWP_WRITEOK;
	up(&stram_swap_sem);
	DPRINTK( "get_stram_region: returning %p\n", ret );
	return( ret );
}