/* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We hold the mm semaphore and the page_table_lock on entry and exit * with the page_table_lock released. */ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { struct page *old_page, *new_page; old_page = pte_page(pte); if (!VALID_PAGE(old_page)) goto bad_wp_page; if (!TryLockPage(old_page)) { int reuse = can_share_swap_page(old_page); unlock_page(old_page); if (reuse) { #ifndef CONFIG_SUPERH /* Not needed for VIPT cache */ flush_cache_page(vma, address); #endif establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ } } /* * Ok, we need to copy. Oh, well.. */ page_cache_get(old_page); spin_unlock(&mm->page_table_lock); new_page = alloc_page(GFP_HIGHUSER); if (!new_page) goto no_mem; copy_cow_page(old_page,new_page,address); /* * Re-check the pte - we dropped the lock */ spin_lock(&mm->page_table_lock); if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; break_cow(vma, new_page, address, page_table); lru_cache_add(new_page); /* Free the old page.. */ new_page = old_page; } spin_unlock(&mm->page_table_lock); page_cache_release(new_page); page_cache_release(old_page); return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; no_mem: page_cache_release(old_page); return -1; }
/*add page to address_space*/ int add_to_page_cache(struct page *page, struct address_space *mapping, unsigned int offset) { if((_add_to_page_cache(page, mapping, offset)) < 0) return -1; lru_cache_add(page); return 0; }
static inline void move_to_lru(struct page *page) { if (PageActive(page)) { /* * lru_cache_add_active checks that * the PG_active bit is off. */ ClearPageActive(page); lru_cache_add_active(page); } else { lru_cache_add(page); } put_page(page); }
/** * lru_cache_add_active_or_unevictable * @page: the page to be added to LRU * @vma: vma in which page is mapped for determining reclaimability * * Place @page on the active or unevictable LRU list, depending on its * evictability. Note that if the page is not evictable, it goes * directly back onto it's zone's unevictable list, it does NOT use a * per cpu pagevec. */ void lru_cache_add_active_or_unevictable(struct page *page, struct vm_area_struct *vma) { VM_BUG_ON_PAGE(PageLRU(page), page); if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) SetPageActive(page); else if (!TestSetPageMlocked(page)) { /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte * lock is held(spinlock), which implies preemption disabled. */ __mod_zone_page_state(page_zone(page), NR_MLOCK, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } lru_cache_add(page); }
/* * We are called with the MM semaphore and page_table_lock * spinlock held to protect against concurrent faults in * multithreaded programs. */ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { pte_t entry; /* Read-only mapping of ZERO_PAGE. */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); /* ..except if it's a write access */ if (write_access) { struct page *page; /* Allocate our own private page. */ spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); if (!page) goto no_mem; clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); if (!pte_none(*page_table)) { page_cache_release(page); spin_unlock(&mm->page_table_lock); return 1; } mm->rss++; flush_page_to_ram(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add(page); mark_page_accessed(page); } set_pte(page_table, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ no_mem: return -1; }
/* * This is a little more tricky than the file -> pipe splicing. There are * basically three cases: * * - Destination page already exists in the address space and there * are users of it. For that case we have no other option that * copying the data. Tough luck. * - Destination page already exists in the address space, but there * are no users of it. Make sure it's uptodate, then drop it. Fall * through to last case. * - Destination page does not exist, we can add the pipe page to * the page cache and avoid the copy. * * If asked to move pages to the output file (SPLICE_F_MOVE is set in * sd->flags), we attempt to migrate pages from the pipe to the output * file address space page cache. This is possible if no one else has * the pipe page referenced outside of the pipe and page cache. If * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; pgoff_t index; int ret; /* * make sure the data in this buffer is uptodate */ ret = buf->ops->pin(pipe, buf); if (unlikely(ret)) return ret; index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; this_len = sd->len; if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; /* * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full * page. */ if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* * If steal succeeds, buf->page is now pruned from the * pagecache and we can reuse it. The page will also be * locked on successful return. */ if (buf->ops->steal(pipe, buf)) goto find_page; page = buf->page; if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) { unlock_page(page); goto find_page; } page_cache_get(page); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) lru_cache_add(page); } else { find_page: page = find_lock_page(mapping, index); if (!page) { ret = -ENOMEM; page = page_cache_alloc_cold(mapping); if (unlikely(!page)) goto out_ret; /* * This will also lock the page */ ret = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (unlikely(ret)) goto out; } /* * We get here with the page locked. If the page is also * uptodate, we don't need to do more. If it isn't, we * may need to bring it in if we are not going to overwrite * the full page. */ if (!PageUptodate(page)) { if (this_len < PAGE_CACHE_SIZE) { ret = mapping->a_ops->readpage(file, page); if (unlikely(ret)) goto out; lock_page(page); if (!PageUptodate(page)) { /* * Page got invalidated, repeat. */ if (!page->mapping) { unlock_page(page); page_cache_release(page); goto find_page; } ret = -EIO; goto out; } } else SetPageUptodate(page); } } ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); if (unlikely(ret)) { loff_t isize = i_size_read(mapping->host); if (ret != AOP_TRUNCATED_PAGE) unlock_page(page); page_cache_release(page); if (ret == AOP_TRUNCATED_PAGE) goto find_page; /* * prepare_write() may have instantiated a few blocks * outside i_size. Trim these off again. */ if (sd->pos + this_len > isize) vmtruncate(mapping->host, isize); goto out_ret; } if (buf->page != page) { /* * Careful, ->map() uses KM_USER0! */ char *src = buf->ops->map(pipe, buf, 1); char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); if (!ret) { /* * Return the number of bytes written and mark page as * accessed, we are now done! */ ret = this_len; mark_page_accessed(page); balance_dirty_pages_ratelimited(mapping); } else if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; } out: page_cache_release(page); unlock_page(page); out_ret: return ret; }
/* * do_no_page() tries to create a new page mapping. It aggressively * tries to share with existing pages, but makes a separate copy if * the "write_access" parameter is true in order to avoid the next * page fault. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. * * This is called with the MM semaphore held and the page table * spinlock held. Exit with the spinlock released. */ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t *page_table) { struct page * new_page; pte_t entry; if (!vma->vm_ops || !vma->vm_ops->nopage) return do_anonymous_page(mm, vma, page_table, write_access, address); spin_unlock(&mm->page_table_lock); new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); if (new_page == NULL) /* no page was available -- SIGBUS */ return 0; if (new_page == NOPAGE_OOM) return -1; /* * Should we do an early C-O-W break? */ if (write_access && !(vma->vm_flags & VM_SHARED)) { struct page * page = alloc_page(GFP_HIGHUSER); if (!page) { page_cache_release(new_page); return -1; } copy_highpage(page, new_page); page_cache_release(new_page); lru_cache_add(page); new_page = page; } spin_lock(&mm->page_table_lock); /* * This silly early PAGE_DIRTY setting removes a race * due to the bad i386 page protection. But it's valid * for other architectures too. * * Note that if write_access is true, we either now have * an exclusive copy of the page, or this is a shared mapping, * so we can make it writable and dirty to avoid having to * handle that later. */ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { ++mm->rss; flush_page_to_ram(new_page); flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); set_pte(page_table, entry); } else { /* One of our sibling threads was faster, back out. */ page_cache_release(new_page); spin_unlock(&mm->page_table_lock); return 1; } /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); spin_unlock(&mm->page_table_lock); return 2; /* Major fault */ }