/** * vxfs_get_page - read a page into memory. * @ip: inode to read from * @n: page number * * Description: * vxfs_get_page reads the @n th page of @ip into the pagecache. * * Returns: * The wanted page on success, else a NULL pointer. */ struct page * vxfs_get_page(struct address_space *mapping, u_long n) { struct page * pp; pp = read_cache_page(mapping, n, (filler_t*)mapping->a_ops->readpage, NULL); if (!IS_ERR(pp)) { wait_on_page(pp); kmap(pp); if (!Page_Uptodate(pp)) goto fail; /** if (!PageChecked(pp)) **/ /** vxfs_check_page(pp); **/ if (PageError(pp)) goto fail; } return (pp); fail: vxfs_put_page(pp); return ERR_PTR(-EIO); }
/* read a range of the data via the page cache */ static int blkmtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { mtd_raw_dev_data_t *rawdevice = mtd->priv; int err = 0; int offset; int pagenr, pages; *retlen = 0; DEBUG(2, "blkmtd: read: dev = `%s' from = %ld len = %d buf = %p\n", bdevname(rawdevice->binding->bd_dev), (long int)from, len, buf); pagenr = from >> PAGE_SHIFT; offset = from - (pagenr << PAGE_SHIFT); pages = (offset+len+PAGE_SIZE-1) >> PAGE_SHIFT; DEBUG(3, "blkmtd: read: pagenr = %d offset = %d, pages = %d\n", pagenr, offset, pages); /* just loop through each page, getting it via readpage() - slow but easy */ while(pages) { struct page *page; int cpylen; DEBUG(3, "blkmtd: read: looking for page: %d\n", pagenr); page = read_cache_page(&rawdevice->as, pagenr, (filler_t *)blkmtd_readpage, rawdevice->file); if(IS_ERR(page)) { return PTR_ERR(page); } wait_on_page(page); if(!Page_Uptodate(page)) { /* error reading page */ printk("blkmtd: read: page not uptodate\n"); page_cache_release(page); return -EIO; } cpylen = (PAGE_SIZE > len) ? len : PAGE_SIZE; if(offset+cpylen > PAGE_SIZE) cpylen = PAGE_SIZE-offset; memcpy(buf + *retlen, page_address(page) + offset, cpylen); offset = 0; len -= cpylen; *retlen += cpylen; pagenr++; pages--; page_cache_release(page); } DEBUG(2, "blkmtd: end read: retlen = %d, err = %d\n", *retlen, err); return err; }
static struct page * dir_get_page(struct inode *dir, unsigned long n) { struct address_space *mapping = dir->i_mapping; struct page *page = read_cache_page(mapping, n, (filler_t*)mapping->a_ops->readpage, NULL); if (!IS_ERR(page)) { wait_on_page(page); kmap(page); if (!Page_Uptodate(page)) goto fail; } return page; fail: dir_put_page(page); return ERR_PTR(-EIO); }
/* * The swap lock map insists that pages be in the page cache! * Therefore we can't use it. Later when we can remove the need for the * lock map and we can reduce the number of functions exported. */ void rw_swap_page_nolock(int rw, swp_entry_t entry, char *buf) { struct page *page = virt_to_page(buf); if (!PageLocked(page)) PAGE_BUG(page); if (PageSwapCache(page)) PAGE_BUG(page); if (page->mapping) PAGE_BUG(page); /* needs sync_page to wait I/O completation */ page->mapping = &swapper_space; if (!rw_swap_page_base(rw, entry, page)) UnlockPage(page); wait_on_page(page); page->mapping = NULL; }
/* * Setting up a new swap file needs a simple wrapper just to read the * swap signature. SysV shared memory also needs a simple wrapper. */ void rw_swap_page_nocache(int rw, unsigned long entry, char *buffer) { struct page *page; page = mem_map + MAP_NR((unsigned long) buffer); wait_on_page(page); set_bit(PG_locked, &page->flags); if (test_and_set_bit(PG_swap_cache, &page->flags)) { printk ("VM: read_swap_page: page already in swap cache!\n"); return; } if (page->inode) { printk ("VM: read_swap_page: page already in page cache!\n"); return; } page->inode = &swapper_inode; page->offset = entry; atomic_inc(&page->count); /* Protect from shrink_mmap() */ rw_swap_page(rw, entry, buffer, 1); atomic_dec(&page->count); page->inode = 0; clear_bit(PG_swap_cache, &page->flags); }
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority) { struct list_head * entry; int max_scan = nr_inactive_pages / priority; int max_mapped = min((nr_pages << (10 - priority)), max_scan / 10); spin_lock(&pagemap_lru_lock); while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) { struct page * page; /* lock depth is 1 or 2 */ if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); if (unlikely(!PageLRU(page))) BUG(); if (unlikely(PageActive(page))) BUG(); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page->zone, classzone)) continue; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); } continue; } if ((PageDirty(page) || DelallocPage(page)) && is_page_cache_freeable(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag * on the phisical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ int (*writepage)(struct page *); writepage = page->mapping->a_ops->writepage; if ((gfp_mask & __GFP_FS) && writepage) { ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); writepage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. */ if (page->buffers) { spin_unlock(&pagemap_lru_lock); /* avoid to free a locked page */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { if (!page->mapping) { /* * We must not allow an anon page * with no buffers to be visible on * the LRU, so we unlock the page after * taking the lru lock */ spin_lock(&pagemap_lru_lock); UnlockPage(page); __lru_cache_del(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } else { /* * The page is still in pagecache so undo the stuff * before the try_to_release_page since we've not * finished and we can now try the next step. */ page_cache_release(page); spin_lock(&pagemap_lru_lock); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } spin_lock(&pagecache_lock); /* * this is the non-racy check for busy page. */ if (!page->mapping || !is_page_cache_freeable(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: if (--max_mapped >= 0) continue; /* * Alert! We've found too many mapped pages on the * inactive list, so we start swapping out now! */ spin_unlock(&pagemap_lru_lock); swap_out(priority, gfp_mask, classzone); return nr_pages; } /* * It is critical to check PageDirty _after_ we made sure * the page is freeable* so not in use by anybody. */ if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); spin_unlock(&pagecache_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(swap); } __lru_cache_del(page); UnlockPage(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } spin_unlock(&pagemap_lru_lock); return nr_pages; }
int lock_kiovec(int nr, struct kiobuf *iovec[], int wait) { struct kiobuf *iobuf; int i, j; struct page *page, **ppage; int doublepage = 0; int repeat = 0; repeat: for (i = 0; i < nr; i++) { iobuf = iovec[i]; if (iobuf->locked) continue; iobuf->locked = 1; ppage = iobuf->maplist; for (j = 0; j < iobuf->nr_pages; ppage++, j++) { page = *ppage; if (!page) continue; if (TryLockPage(page)) goto retry; } } return 0; retry: /* * We couldn't lock one of the pages. Undo the locking so far, * wait on the page we got to, and try again. */ unlock_kiovec(nr, iovec); if (!wait) return -EAGAIN; /* * Did the release also unlock the page we got stuck on? */ if (!PageLocked(page)) { /* * If so, we may well have the page mapped twice * in the IO address range. Bad news. Of * course, it _might_ just be a coincidence, * but if it happens more than once, chances * are we have a double-mapped page. */ if (++doublepage >= 3) return -EINVAL; /* Try again... */ wait_on_page(page); } if (++repeat < 16) goto repeat; return -EAGAIN; }
/* * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. */ static int try_to_unuse(unsigned int type) { struct swap_info_struct * si = &swap_info[type]; struct mm_struct *start_mm; unsigned short *swap_map; unsigned short swcount; struct page *page; swp_entry_t entry; int i = 0; int retval = 0; int reset_overflow = 0; /* * When searching mms for an entry, a good strategy is to * start at the first mm we freed the previous entry from * (though actually we don't notice whether we or coincidence * freed the entry). Initialize this start_mm with a hold. * * A simpler strategy would be to start at the last mm we * freed the previous entry from; but that would take less * advantage of mmlist ordering (now preserved by swap_out()), * which clusters forked address spaces together, most recent * child immediately after parent. If we race with dup_mmap(), * we very much want to resolve parent before child, otherwise * we may miss some entries: using last mm would invert that. */ start_mm = &init_mm; atomic_inc(&init_mm.mm_users); /* * Keep on scanning until all entries have gone. Usually, * one pass through swap_map is enough, but not necessarily: * mmput() removes mm from mmlist before exit_mmap() and its * zap_page_range(). That's not too bad, those entries are * on their way out, and handled faster there than here. * do_munmap() behaves similarly, taking the range out of mm's * vma list before zap_page_range(). But unfortunately, when * unmapping a part of a vma, it takes the whole out first, * then reinserts what's left after (might even reschedule if * open() method called) - so swap entries may be invisible * to swapoff for a while, then reappear - but that is rare. */ while ((i = find_next_to_unuse(si, i))) { /* * Get a page for the entry, using the existing swap * cache page if there is one. Otherwise, get a clean * page and read the swap into it. */ swap_map = &si->swap_map[i]; entry = SWP_ENTRY(type, i); page = read_swap_cache_async(entry); if (!page) { /* * Either swap_duplicate() failed because entry * has been freed independently, and will not be * reused since sys_swapoff() already disabled * allocation from here, or alloc_page() failed. */ if (!*swap_map) continue; retval = -ENOMEM; break; } /* * Don't hold on to start_mm if it looks like exiting. */ if (atomic_read(&start_mm->mm_users) == 1) { mmput(start_mm); start_mm = &init_mm; atomic_inc(&init_mm.mm_users); } /* * Wait for and lock page. When do_swap_page races with * try_to_unuse, do_swap_page can handle the fault much * faster than try_to_unuse can locate the entry. This * apparently redundant "wait_on_page" lets try_to_unuse * defer to do_swap_page in such a case - in some tests, * do_swap_page and try_to_unuse repeatedly compete. */ wait_on_page(page); lock_page(page); /* * Remove all references to entry, without blocking. * Whenever we reach init_mm, there's no address space * to search, but use it as a reminder to search shmem. */ swcount = *swap_map; if (swcount > 1) { flush_page_to_ram(page); if (start_mm == &init_mm) shmem_unuse(entry, page); else unuse_process(start_mm, entry, page); } if (*swap_map > 1) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; struct mm_struct *new_start_mm = start_mm; struct mm_struct *mm; spin_lock(&mmlist_lock); while (*swap_map > 1 && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); swcount = *swap_map; if (mm == &init_mm) { set_start_mm = 1; shmem_unuse(entry, page); } else unuse_process(mm, entry, page); if (set_start_mm && *swap_map < swcount) { new_start_mm = mm; set_start_mm = 0; } } atomic_inc(&new_start_mm->mm_users); spin_unlock(&mmlist_lock); mmput(start_mm); start_mm = new_start_mm; } /* * How could swap count reach 0x7fff when the maximum * pid is 0x7fff, and there's no way to repeat a swap * page within an mm (except in shmem, where it's the * shared object which takes the reference count)? * We believe SWAP_MAP_MAX cannot occur in Linux 2.4. * * If that's wrong, then we should worry more about * exit_mmap() and do_munmap() cases described above: * we might be resetting SWAP_MAP_MAX too early here. * We know "Undead"s can happen, they're okay, so don't * report them; but do report if we reset SWAP_MAP_MAX. */ if (*swap_map == SWAP_MAP_MAX) { swap_list_lock(); swap_device_lock(si); nr_swap_pages++; *swap_map = 1; swap_device_unlock(si); swap_list_unlock(); reset_overflow = 1; } /* * If a reference remains (rare), we would like to leave * the page in the swap cache; but try_to_swap_out could * then re-duplicate the entry once we drop page lock, * so we might loop indefinitely; also, that page could * not be swapped out to other storage meanwhile. So: * delete from cache even if there's another reference, * after ensuring that the data has been saved to disk - * since if the reference remains (rarer), it will be * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. * Note shmem_unuse already deleted its from swap cache. */ if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { rw_swap_page(WRITE, page); lock_page(page); } if (PageSwapCache(page)) delete_from_swap_cache(page); /* * So we could skip searching mms once swap count went * to 1, we did not mark any present ptes as dirty: must * mark page dirty so try_to_swap_out will preserve it. */ SetPageDirty(page); UnlockPage(page); page_cache_release(page); /* * Make sure that we aren't completely killing * interactive performance. Interruptible check on * signal_pending() would be nice, but changes the spec? */ if (current->need_resched) schedule(); }
/* * Reads or writes a swap page. * wait=1: start I/O and wait for completion. wait=0: start asynchronous I/O. * * Important prevention of race condition: The first thing we do is set a lock * on this swap page, which lasts until I/O completes. This way a * write_swap_page(entry) immediately followed by a read_swap_page(entry) * on the same entry will first complete the write_swap_page(). Fortunately, * not more than one write_swap_page() request can be pending per entry. So * all races the caller must catch are: multiple read_swap_page() requests * on the same entry. */ void rw_swap_page(int rw, unsigned long entry, char * buf, int wait) { unsigned long type, offset; struct swap_info_struct * p; struct page *page; type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk("Internal error: bad swap-device\n"); return; } p = &swap_info[type]; offset = SWP_OFFSET(entry); if (offset >= p->max) { printk("rw_swap_page: weirdness\n"); return; } if (p->swap_map && !p->swap_map[offset]) { printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry); return; } if (!(p->flags & SWP_USED)) { printk("Trying to swap to unused swap-device\n"); return; } /* Make sure we are the only process doing I/O with this swap page. */ while (set_bit(offset,p->swap_lockmap)) { run_task_queue(&tq_disk); sleep_on(&lock_queue); } if (rw == READ) kstat.pswpin++; else kstat.pswpout++; page = mem_map + MAP_NR(buf); atomic_inc(&page->count); wait_on_page(page); if (p->swap_device) { if (!wait) { set_bit(PG_free_after, &page->flags); set_bit(PG_decr_after, &page->flags); set_bit(PG_swap_unlock_after, &page->flags); page->swap_unlock_entry = entry; atomic_inc(&nr_async_pages); } ll_rw_page(rw,p->swap_device,offset,buf); /* * NOTE! We don't decrement the page count if we * don't wait - that will happen asynchronously * when the IO completes. */ if (!wait) return; wait_on_page(page); } else if (p->swap_file) { struct inode *swapf = p->swap_file; unsigned int zones[PAGE_SIZE/512]; int i; if (swapf->i_op->bmap == NULL && swapf->i_op->smap != NULL){ /* With MsDOS, we use msdos_smap which return a sector number (not a cluster or block number). It is a patch to enable the UMSDOS project. Other people are working on better solution. It sounds like ll_rw_swap_file defined it operation size (sector size) based on PAGE_SIZE and the number of block to read. So using bmap or smap should work even if smap will require more blocks. */ int j; unsigned int block = offset << 3; for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){ if (!(zones[i] = swapf->i_op->smap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); return; } } }else{ int j; unsigned int block = offset << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize) if (!(zones[i] = bmap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); } } ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf); } else printk("rw_swap_page: no swap file or device\n"); atomic_dec(&page->count); if (offset && !clear_bit(offset,p->swap_lockmap)) printk("rw_swap_page: lock already cleared\n"); wake_up(&lock_queue); }
/* * Returns a pointer to a buffer containing at least LEN bytes of * filesystem starting at byte offset OFFSET into the filesystem. */ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; struct page *pages[BLKS_PER_BUF]; unsigned i, blocknr, buffer, unread; unsigned long devsize; int major, minor; char *data; if (!len) return NULL; blocknr = offset >> PAGE_CACHE_SHIFT; offset &= PAGE_CACHE_SIZE - 1; /* Check if an existing buffer already has the data.. */ for (i = 0; i < READ_BUFFERS; i++) { unsigned int blk_offset; if (buffer_dev[i] != sb) continue; if (blocknr < buffer_blocknr[i]) continue; blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT; blk_offset += offset; if (blk_offset + len > BUFFER_SIZE) continue; return read_buffers[i] + blk_offset; } devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT; major = MAJOR(sb->s_dev); minor = MINOR(sb->s_dev); if (blk_size[major]) devsize = blk_size[major][minor] >> 2; /* Ok, read in BLKS_PER_BUF pages completely first. */ unread = 0; for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = NULL; if (blocknr + i < devsize) { page = read_cache_page(mapping, blocknr + i, (filler_t *)mapping->a_ops->readpage, NULL); /* synchronous error? */ if (IS_ERR(page)) page = NULL; } pages[i] = page; } for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = pages[i]; if (page) { wait_on_page(page); if (!Page_Uptodate(page)) { /* asynchronous error */ page_cache_release(page); pages[i] = NULL; } } } buffer = next_buffer; next_buffer = NEXT_BUFFER(buffer); buffer_blocknr[buffer] = blocknr; buffer_dev[buffer] = sb; data = read_buffers[buffer]; for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = pages[i]; if (page) { memcpy(data, kmap(page), PAGE_CACHE_SIZE); kunmap(page); page_cache_release(page); } else memset(data, 0, PAGE_CACHE_SIZE); data += PAGE_CACHE_SIZE; } return read_buffers[buffer] + offset; }
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout) { struct list_head * entry; int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio; int max_mapped = vm_mapped_ratio * nr_pages; while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) { struct page * page; if (unlikely(current->need_resched)) { spin_unlock(&pagemap_lru_lock); __set_current_state(TASK_RUNNING); schedule(); spin_lock(&pagemap_lru_lock); continue; } page = list_entry(entry, struct page, lru); BUG_ON(!PageLRU(page)); BUG_ON(PageActive(page)); list_del(entry); list_add(entry, &inactive_list); /* * Zero page counts can happen because we unlink the pages * _after_ decrementing the usage count.. */ if (unlikely(!page_count(page))) continue; if (!memclass(page_zone(page), classzone)) continue; max_scan--; /* Racy check to avoid trylocking when not worthwhile */ if (!page->buffers && (page_count(page) != 1 || !page->mapping)) goto page_mapped; /* * The page is locked. IO in progress? * Move it to the back of the list. */ if (unlikely(TryLockPage(page))) { if (PageLaunder(page) && (gfp_mask & __GFP_FS)) { page_cache_get(page); spin_unlock(&pagemap_lru_lock); wait_on_page(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); } continue; } if (PageDirty(page) && is_page_cache_freeable(page) && page->mapping) { /* * It is not critical here to write it only if * the page is unmapped beause any direct writer * like O_DIRECT would set the PG_dirty bitflag * on the phisical page after having successfully * pinned it and after the I/O to the page is finished, * so the direct writes to the page cannot get lost. */ int (*writepage)(struct page *); writepage = page->mapping->a_ops->writepage; if ((gfp_mask & __GFP_FS) && writepage) { ClearPageDirty(page); SetPageLaunder(page); page_cache_get(page); spin_unlock(&pagemap_lru_lock); writepage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } /* * If the page has buffers, try to free the buffer mappings * associated with this page. If we succeed we try to free * the page as well. */ if (page->buffers) { spin_unlock(&pagemap_lru_lock); /* avoid to free a locked page */ page_cache_get(page); if (try_to_release_page(page, gfp_mask)) { if (!page->mapping) { /* * We must not allow an anon page * with no buffers to be visible on * the LRU, so we unlock the page after * taking the lru lock */ spin_lock(&pagemap_lru_lock); UnlockPage(page); __lru_cache_del(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } else { /* * The page is still in pagecache so undo the stuff * before the try_to_release_page since we've not * finished and we can now try the next step. */ page_cache_release(page); spin_lock(&pagemap_lru_lock); } } else { /* failed to drop the buffers so stop here */ UnlockPage(page); page_cache_release(page); spin_lock(&pagemap_lru_lock); continue; } } spin_lock(&pagecache_lock); /* * This is the non-racy check for busy page. * It is critical to check PageDirty _after_ we made sure * the page is freeable so not in use by anybody. * At this point we're guaranteed that page->buffers is NULL, * nobody can refill page->buffers under us because we still * hold the page lock. */ if (!page->mapping || page_count(page) > 1) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: if (--max_mapped < 0) { spin_unlock(&pagemap_lru_lock); nr_pages -= kmem_cache_reap(gfp_mask); if (nr_pages <= 0) goto out; shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask); shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask); #ifdef CONFIG_QUOTA shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); #endif if (!*failed_swapout) *failed_swapout = !swap_out(classzone); max_mapped = nr_pages * vm_mapped_ratio; spin_lock(&pagemap_lru_lock); refill_inactive(nr_pages, classzone); } continue; } if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } __lru_cache_del(page); /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); spin_unlock(&pagecache_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); spin_unlock(&pagecache_lock); swap_free(swap); } UnlockPage(page); /* effectively free the page here */ page_cache_release(page); if (--nr_pages) continue; break; } spin_unlock(&pagemap_lru_lock); out: return nr_pages; }
static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait) { unsigned long type, offset; struct swap_info_struct * p; int zones[PAGE_SIZE/512]; int zones_used; kdev_t dev = 0; int block_size; #ifdef DEBUG_SWAP printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n", (rw == READ) ? "read" : "write", entry, (char *) page_address(page), atomic_read(&page->count), wait ? "wait" : "nowait"); #endif type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk("Internal error: bad swap-device\n"); return; } /* Don't allow too many pending pages in flight.. */ if (atomic_read(&nr_async_pages) > pager_daemon.swap_cluster) wait = 1; p = &swap_info[type]; offset = SWP_OFFSET(entry); if (offset >= p->max) { printk("rw_swap_page: weirdness\n"); return; } if (p->swap_map && !p->swap_map[offset]) { printk(KERN_ERR "rw_swap_page: " "Trying to %s unallocated swap (%08lx)\n", (rw == READ) ? "read" : "write", entry); return; } if (!(p->flags & SWP_USED)) { printk(KERN_ERR "rw_swap_page: " "Trying to swap to unused swap-device\n"); return; } if (!PageLocked(page)) { printk(KERN_ERR "VM: swap page is unlocked\n"); return; } if (PageSwapCache(page)) { /* Make sure we are the only process doing I/O with this swap page. */ if (test_and_set_bit(offset, p->swap_lockmap)) { struct wait_queue __wait; __wait.task = current; add_wait_queue(&lock_queue, &__wait); for (;;) { current->state = TASK_UNINTERRUPTIBLE; mb(); if (!test_and_set_bit(offset, p->swap_lockmap)) break; run_task_queue(&tq_disk); schedule(); } current->state = TASK_RUNNING; remove_wait_queue(&lock_queue, &__wait); } /* * Make sure that we have a swap cache association for this * page. We need this to find which swap page to unlock once * the swap IO has completed to the physical page. If the page * is not already in the cache, just overload the offset entry * as if it were: we are not allowed to manipulate the inode * hashing for locked pages. */ if (page->offset != entry) { printk ("swap entry mismatch"); return; } } if (rw == READ) { clear_bit(PG_uptodate, &page->flags); kstat.pswpin++; } else kstat.pswpout++; atomic_inc(&page->count); if (p->swap_device) { zones[0] = offset; zones_used = 1; dev = p->swap_device; block_size = PAGE_SIZE; } else if (p->swap_file) { struct inode *swapf = p->swap_file->d_inode; int i; if (swapf->i_op->bmap == NULL && swapf->i_op->smap != NULL){ /* With MS-DOS, we use msdos_smap which returns a sector number (not a cluster or block number). It is a patch to enable the UMSDOS project. Other people are working on better solution. It sounds like ll_rw_swap_file defined its operation size (sector size) based on PAGE_SIZE and the number of blocks to read. So using bmap or smap should work even if smap will require more blocks. */ int j; unsigned int block = offset << 3; for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){ if (!(zones[i] = swapf->i_op->smap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); return; } } block_size = 512; }else{ int j; unsigned int block = offset << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); block_size = swapf->i_sb->s_blocksize; for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) if (!(zones[i] = bmap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); return; } zones_used = i; dev = swapf->i_dev; } } else { printk(KERN_ERR "rw_swap_page: no swap file or device\n"); /* Do some cleaning up so if this ever happens we can hopefully * trigger controlled shutdown. */ if (PageSwapCache(page)) { if (!test_and_clear_bit(offset,p->swap_lockmap)) printk("swap_after_unlock_page: lock already cleared\n"); wake_up(&lock_queue); } atomic_dec(&page->count); return; } if (!wait) { set_bit(PG_decr_after, &page->flags); atomic_inc(&nr_async_pages); } if (PageSwapCache(page)) { /* only lock/unlock swap cache pages! */ set_bit(PG_swap_unlock_after, &page->flags); } set_bit(PG_free_after, &page->flags); /* block_size == PAGE_SIZE/zones_used */ brw_page(rw, page, dev, zones, block_size, 0); /* Note! For consistency we do all of the logic, * decrementing the page count, and unlocking the page in the * swap lock map - in the IO completion handler. */ if (!wait) return; wait_on_page(page); /* This shouldn't happen, but check to be sure. */ if (atomic_read(&page->count) == 0) printk(KERN_ERR "rw_swap_page: page unused while waiting!\n"); #ifdef DEBUG_SWAP printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n", (rw == READ) ? "read" : "write", (char *) page_adddress(page), atomic_read(&page->count)); #endif }
struct dentry *umsdos_solve_hlink (struct dentry *hlink) { /* root is our root for resolving pseudo-hardlink */ struct dentry *base = hlink->d_sb->s_root; struct dentry *dentry_dst; char *path, *pt; int len; struct address_space *mapping = hlink->d_inode->i_mapping; struct page *page; page=read_cache_page(mapping,0,(filler_t *)mapping->a_ops->readpage,NULL); dentry_dst=(struct dentry *)page; if (IS_ERR(page)) goto out; wait_on_page(page); if (!Page_Uptodate(page)) goto async_fail; dentry_dst = ERR_PTR(-ENOMEM); path = (char *) kmalloc (PATH_MAX, GFP_KERNEL); if (path == NULL) goto out_release; memcpy(path, kmap(page), hlink->d_inode->i_size); kunmap(page); page_cache_release(page); len = hlink->d_inode->i_size; /* start at root dentry */ dentry_dst = dget(base); path[len] = '\0'; pt = path; if (*path == '/') pt++; /* skip leading '/' */ if (base->d_inode == pseudo_root) pt += (UMSDOS_PSDROOT_LEN + 1); while (1) { struct dentry *dir = dentry_dst, *demd; char *start = pt; int real; while (*pt != '\0' && *pt != '/') pt++; len = (int) (pt - start); if (*pt == '/') *pt++ = '\0'; real = 1; demd = umsdos_get_emd_dentry(dir); if (!IS_ERR(demd)) { if (demd->d_inode) real = 0; dput(demd); } #ifdef UMSDOS_DEBUG_VERBOSE printk ("umsdos_solve_hlink: dir %s/%s, name=%s, real=%d\n", dir->d_parent->d_name.name, dir->d_name.name, start, real); #endif dentry_dst = umsdos_lookup_dentry(dir, start, len, real); /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ if (real) d_drop(dir); dput (dir); if (IS_ERR(dentry_dst)) break; /* not found? stop search ... */ if (!dentry_dst->d_inode) { break; } if (*pt == '\0') /* we're finished! */ break; } /* end while */ if (!IS_ERR(dentry_dst)) { struct inode *inode = dentry_dst->d_inode; if (inode) { inode->u.umsdos_i.i_is_hlink = 1; #ifdef UMSDOS_DEBUG_VERBOSE printk ("umsdos_solve_hlink: resolved link %s/%s, ino=%ld\n", dentry_dst->d_parent->d_name.name, dentry_dst->d_name.name, inode->i_ino); #endif } else { #ifdef UMSDOS_DEBUG_VERBOSE printk ("umsdos_solve_hlink: resolved link %s/%s negative!\n", dentry_dst->d_parent->d_name.name, dentry_dst->d_name.name); #endif } } else printk(KERN_WARNING "umsdos_solve_hlink: err=%ld\n", PTR_ERR(dentry_dst)); kfree (path); out: dput(hlink); /* original hlink no longer needed */ return dentry_dst; async_fail: dentry_dst = ERR_PTR(-EIO); out_release: page_cache_release(page); goto out; }