/* * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... */ void swapin_readahead(swp_entry_t entry) { int i, num; struct page *new_page; unsigned long offset; /* * Get the number of handles we should do readahead io to. Also, * grab temporary references on them, releasing them as io completes. */ num = valid_swaphandles(entry, &offset); for (i = 0; i < num; offset++, i++) { /* Don't block on I/O for read-ahead */ if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster * (1 << page_cluster)) { while (i++ < num) swap_free(SWP_ENTRY(SWP_TYPE(entry), offset++)); break; } /* Ok, do the async read-ahead now */ new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0); if (new_page != NULL) page_cache_release(new_page); swap_free(SWP_ENTRY(SWP_TYPE(entry), offset)); } return; }
/* * Trying to stop swapping from a file is fraught with races, so * we repeat quite a bit here when we have to pause. swapoff() * isn't exactly timing-critical, so who cares (but this is /really/ * inefficient, ugh). * * We return 1 after having slept, which makes the process start over * from the beginning for this process.. */ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, unsigned int type, unsigned long page) { pte_t pte = *dir; if (pte_none(pte)) return 0; if (pte_present(pte)) { unsigned long page_nr = MAP_NR(pte_page(pte)); if (page_nr >= MAP_NR(high_memory)) return 0; if (!in_swap_cache(page_nr)) return 0; if (SWP_TYPE(in_swap_cache(page_nr)) != type) return 0; delete_from_swap_cache(page_nr); set_pte(dir, pte_mkdirty(pte)); return 0; } if (SWP_TYPE(pte_val(pte)) != type) return 0; read_swap_page(pte_val(pte), (char *) page); #if 0 /* Is this really needed here, hasn't it been solved elsewhere? */ flush_page_to_ram(page); #endif if (pte_val(*dir) != pte_val(pte)) { free_page(page); return 1; } set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)))); flush_tlb_page(vma, address); ++vma->vm_mm->rss; swap_free(pte_val(pte)); return 1; }
void swap_duplicate(unsigned long entry) { struct swap_info_struct * p; unsigned long offset, type; if (!entry) return; offset = SWP_OFFSET(entry); type = SWP_TYPE(entry); if (type & SHM_SWP_TYPE) return; if (type >= nr_swapfiles) { printk("Trying to duplicate nonexistent swap-page\n"); return; } p = type + swap_info; if (offset >= p->max) { printk("swap_duplicate: weirdness\n"); return; } if (!p->swap_map[offset]) { printk("swap_duplicate: trying to duplicate unused page\n"); return; } p->swap_map[offset]++; return; }
/* * Verify that a swap entry is valid and increment its swap map count. * * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as * "permanent", but will be reclaimed by the next swapoff. */ int swap_duplicate(unsigned long entry) { struct swap_info_struct * p; unsigned long offset, type; int result = 0; if (!entry) goto out; type = SWP_TYPE(entry); if (type & SHM_SWP_TYPE) goto out; if (type >= nr_swapfiles) goto bad_file; p = type + swap_info; offset = SWP_OFFSET(entry); if (offset >= p->max) goto bad_offset; if (!p->swap_map[offset]) goto bad_unused; /* * Entry is valid, so increment the map count. */ if (p->swap_map[offset] < SWAP_MAP_MAX) p->swap_map[offset]++; else { static int overflow = 0; if (overflow++ < 5) printk(KERN_WARNING "swap_duplicate: entry %08lx map count=%d\n", entry, p->swap_map[offset]); p->swap_map[offset] = SWAP_MAP_MAX; } result = 1; #ifdef DEBUG_SWAP printk("DebugVM: swap_duplicate(entry %08lx, count now %d)\n", entry, p->swap_map[offset]); #endif out: return result; bad_file: printk(KERN_ERR "swap_duplicate: entry %08lx, nonexistent swap file\n", entry); goto out; bad_offset: printk(KERN_ERR "swap_duplicate: entry %08lx, offset exceeds max\n", entry); goto out; bad_unused: printk(KERN_ERR "swap_duplicate at %8p: entry %08lx, unused page\n", __builtin_return_address(0), entry); goto out; }
int swap_count(unsigned long entry) { struct swap_info_struct * p; unsigned long offset, type; int retval = 0; if (!entry) goto bad_entry; type = SWP_TYPE(entry); if (type & SHM_SWP_TYPE) goto out; if (type >= nr_swapfiles) goto bad_file; p = type + swap_info; offset = SWP_OFFSET(entry); if (offset >= p->max) goto bad_offset; if (!p->swap_map[offset]) goto bad_unused; retval = p->swap_map[offset]; #ifdef DEBUG_SWAP printk("DebugVM: swap_count(entry %08lx, count %d)\n", entry, retval); #endif out: return retval; bad_entry: printk(KERN_ERR "swap_count: null entry!\n"); goto out; bad_file: printk(KERN_ERR "swap_count: entry %08lx, nonexistent swap file!\n", entry); goto out; bad_offset: printk(KERN_ERR "swap_count: entry %08lx, offset exceeds max!\n", entry); goto out; bad_unused: printk(KERN_ERR "swap_count at %8p: entry %08lx, unused page!\n", __builtin_return_address(0), entry); goto out; }
int add_to_swap_cache(unsigned long index, unsigned long entry) { struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)]; #ifdef SWAP_CACHE_INFO swap_cache_add_total++; #endif if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { entry = xchg(swap_cache + index, entry); if (entry) { printk("swap_cache: replacing non-NULL entry\n"); } #ifdef SWAP_CACHE_INFO swap_cache_add_success++; #endif return 1; } return 0; }
/* * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... */ void swapin_readahead(swp_entry_t entry) { int i, num; struct page *new_page; unsigned long offset; /* * Get the number of handles we should do readahead io to. */ num = valid_swaphandles(entry, &offset); for (i = 0; i < num; offset++, i++) { /* Ok, do the async read-ahead now */ new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset)); if (!new_page) break; page_cache_release(new_page); } return; }
/* This is run when asynchronous page I/O has completed. */ void swap_after_unlock_page (unsigned long entry) { unsigned long type, offset; struct swap_info_struct * p; type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk("swap_after_unlock_page: bad swap-device\n"); return; } p = &swap_info[type]; offset = SWP_OFFSET(entry); if (offset >= p->max) { printk("swap_after_unlock_page: weirdness\n"); return; } if (!clear_bit(offset,p->swap_lockmap)) printk("swap_after_unlock_page: lock already cleared\n"); wake_up(&lock_queue); }
static struct swap_info_struct * swap_info_get(swp_entry_t entry) { struct swap_info_struct * p; unsigned long offset, type; if (!entry.val) goto out; type = SWP_TYPE(entry); if (type >= nr_swapfiles) goto bad_nofile; p = & swap_info[type]; if (!(p->flags & SWP_USED)) goto bad_device; offset = SWP_OFFSET(entry); if (offset >= p->max) goto bad_offset; if (!p->swap_map[offset]) goto bad_free; swap_list_lock(); if (p->prio > swap_info[swap_list.next].prio) swap_list.next = type; swap_device_lock(p); return p; bad_free: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val); goto out; bad_offset: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val); goto out; bad_device: printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val); goto out; bad_nofile: printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val); out: return NULL; }
void swap_free(unsigned long entry) { struct swap_info_struct * p; unsigned long offset, type; if (!entry) return; type = SWP_TYPE(entry); if (type & SHM_SWP_TYPE) return; if (type >= nr_swapfiles) { printk("Trying to free nonexistent swap-page\n"); return; } p = & swap_info[type]; offset = SWP_OFFSET(entry); if (offset >= p->max) { printk("swap_free: weirdness\n"); return; } if (!(p->flags & SWP_USED)) { printk("Trying to free swap from unused swap-device\n"); return; } if (offset < p->lowest_bit) p->lowest_bit = offset; if (offset > p->highest_bit) p->highest_bit = offset; if (!p->swap_map[offset]) printk("swap_free: swap-space map null (entry %08lx)\n",entry); else if (p->swap_map[offset] == SWAP_MAP_RESERVED) printk("swap_free: swap-space reserved (entry %08lx)\n",entry); else if (!--p->swap_map[offset]) nr_swap_pages++; if (p->prio > swap_info[swap_list.next].prio) { swap_list.next = swap_list.head; } }
/* * Reads or writes a swap page. * wait=1: start I/O and wait for completion. wait=0: start asynchronous I/O. * * Important prevention of race condition: The first thing we do is set a lock * on this swap page, which lasts until I/O completes. This way a * write_swap_page(entry) immediately followed by a read_swap_page(entry) * on the same entry will first complete the write_swap_page(). Fortunately, * not more than one write_swap_page() request can be pending per entry. So * all races the caller must catch are: multiple read_swap_page() requests * on the same entry. */ void rw_swap_page(int rw, unsigned long entry, char * buf, int wait) { unsigned long type, offset; struct swap_info_struct * p; struct page *page; type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk("Internal error: bad swap-device\n"); return; } p = &swap_info[type]; offset = SWP_OFFSET(entry); if (offset >= p->max) { printk("rw_swap_page: weirdness\n"); return; } if (p->swap_map && !p->swap_map[offset]) { printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry); return; } if (!(p->flags & SWP_USED)) { printk("Trying to swap to unused swap-device\n"); return; } /* Make sure we are the only process doing I/O with this swap page. */ while (set_bit(offset,p->swap_lockmap)) { run_task_queue(&tq_disk); sleep_on(&lock_queue); } if (rw == READ) kstat.pswpin++; else kstat.pswpout++; page = mem_map + MAP_NR(buf); atomic_inc(&page->count); wait_on_page(page); if (p->swap_device) { if (!wait) { set_bit(PG_free_after, &page->flags); set_bit(PG_decr_after, &page->flags); set_bit(PG_swap_unlock_after, &page->flags); page->swap_unlock_entry = entry; atomic_inc(&nr_async_pages); } ll_rw_page(rw,p->swap_device,offset,buf); /* * NOTE! We don't decrement the page count if we * don't wait - that will happen asynchronously * when the IO completes. */ if (!wait) return; wait_on_page(page); } else if (p->swap_file) { struct inode *swapf = p->swap_file; unsigned int zones[PAGE_SIZE/512]; int i; if (swapf->i_op->bmap == NULL && swapf->i_op->smap != NULL){ /* With MsDOS, we use msdos_smap which return a sector number (not a cluster or block number). It is a patch to enable the UMSDOS project. Other people are working on better solution. It sounds like ll_rw_swap_file defined it operation size (sector size) based on PAGE_SIZE and the number of block to read. So using bmap or smap should work even if smap will require more blocks. */ int j; unsigned int block = offset << 3; for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){ if (!(zones[i] = swapf->i_op->smap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); return; } } }else{ int j; unsigned int block = offset << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize) if (!(zones[i] = bmap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); } } ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf); } else printk("rw_swap_page: no swap file or device\n"); atomic_dec(&page->count); if (offset && !clear_bit(offset,p->swap_lockmap)) printk("rw_swap_page: lock already cleared\n"); wake_up(&lock_queue); }
static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait) { unsigned long type, offset; struct swap_info_struct * p; int zones[PAGE_SIZE/512]; int zones_used; kdev_t dev = 0; int block_size; #ifdef DEBUG_SWAP printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n", (rw == READ) ? "read" : "write", entry, (char *) page_address(page), atomic_read(&page->count), wait ? "wait" : "nowait"); #endif type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk("Internal error: bad swap-device\n"); return; } /* Don't allow too many pending pages in flight.. */ if (atomic_read(&nr_async_pages) > pager_daemon.swap_cluster) wait = 1; p = &swap_info[type]; offset = SWP_OFFSET(entry); if (offset >= p->max) { printk("rw_swap_page: weirdness\n"); return; } if (p->swap_map && !p->swap_map[offset]) { printk(KERN_ERR "rw_swap_page: " "Trying to %s unallocated swap (%08lx)\n", (rw == READ) ? "read" : "write", entry); return; } if (!(p->flags & SWP_USED)) { printk(KERN_ERR "rw_swap_page: " "Trying to swap to unused swap-device\n"); return; } if (!PageLocked(page)) { printk(KERN_ERR "VM: swap page is unlocked\n"); return; } if (PageSwapCache(page)) { /* Make sure we are the only process doing I/O with this swap page. */ if (test_and_set_bit(offset, p->swap_lockmap)) { struct wait_queue __wait; __wait.task = current; add_wait_queue(&lock_queue, &__wait); for (;;) { current->state = TASK_UNINTERRUPTIBLE; mb(); if (!test_and_set_bit(offset, p->swap_lockmap)) break; run_task_queue(&tq_disk); schedule(); } current->state = TASK_RUNNING; remove_wait_queue(&lock_queue, &__wait); } /* * Make sure that we have a swap cache association for this * page. We need this to find which swap page to unlock once * the swap IO has completed to the physical page. If the page * is not already in the cache, just overload the offset entry * as if it were: we are not allowed to manipulate the inode * hashing for locked pages. */ if (page->offset != entry) { printk ("swap entry mismatch"); return; } } if (rw == READ) { clear_bit(PG_uptodate, &page->flags); kstat.pswpin++; } else kstat.pswpout++; atomic_inc(&page->count); if (p->swap_device) { zones[0] = offset; zones_used = 1; dev = p->swap_device; block_size = PAGE_SIZE; } else if (p->swap_file) { struct inode *swapf = p->swap_file->d_inode; int i; if (swapf->i_op->bmap == NULL && swapf->i_op->smap != NULL){ /* With MS-DOS, we use msdos_smap which returns a sector number (not a cluster or block number). It is a patch to enable the UMSDOS project. Other people are working on better solution. It sounds like ll_rw_swap_file defined its operation size (sector size) based on PAGE_SIZE and the number of blocks to read. So using bmap or smap should work even if smap will require more blocks. */ int j; unsigned int block = offset << 3; for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){ if (!(zones[i] = swapf->i_op->smap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); return; } } block_size = 512; }else{ int j; unsigned int block = offset << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits); block_size = swapf->i_sb->s_blocksize; for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) if (!(zones[i] = bmap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); return; } zones_used = i; dev = swapf->i_dev; } } else { printk(KERN_ERR "rw_swap_page: no swap file or device\n"); /* Do some cleaning up so if this ever happens we can hopefully * trigger controlled shutdown. */ if (PageSwapCache(page)) { if (!test_and_clear_bit(offset,p->swap_lockmap)) printk("swap_after_unlock_page: lock already cleared\n"); wake_up(&lock_queue); } atomic_dec(&page->count); return; } if (!wait) { set_bit(PG_decr_after, &page->flags); atomic_inc(&nr_async_pages); } if (PageSwapCache(page)) { /* only lock/unlock swap cache pages! */ set_bit(PG_swap_unlock_after, &page->flags); } set_bit(PG_free_after, &page->flags); /* block_size == PAGE_SIZE/zones_used */ brw_page(rw, page, dev, zones, block_size, 0); /* Note! For consistency we do all of the logic, * decrementing the page count, and unlocking the page in the * swap lock map - in the IO completion handler. */ if (!wait) return; wait_on_page(page); /* This shouldn't happen, but check to be sure. */ if (atomic_read(&page->count) == 0) printk(KERN_ERR "rw_swap_page: page unused while waiting!\n"); #ifdef DEBUG_SWAP printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n", (rw == READ) ? "read" : "write", (char *) page_adddress(page), atomic_read(&page->count)); #endif }