/** * drm_gem_put_pages - helper to free backing pages for a GEM object * @obj: obj in question * @pages: pages to free * @dirty: if true, pages will be marked as dirty * @accessed: if true, the pages will be marked as accessed */ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, bool dirty, bool accessed) { int i, npages; /* We already BUG_ON() for non-page-aligned sizes in * drm_gem_object_init(), so we should never hit this unless * driver author is doing something really wrong: */ WARN_ON((obj->size & (PAGE_SIZE - 1)) != 0); npages = obj->size >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (dirty) set_page_dirty(pages[i]); if (accessed) mark_page_accessed(pages[i]); /* Undo the reference we took when populating the table */ page_cache_release(pages[i]); } drm_free_large(pages); }
static void i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { struct sgt_iter sgt_iter; struct page *page; /* Cancel any inflight work and force them to restart their gup */ obj->userptr.work = NULL; __i915_gem_userptr_set_active(obj, false); if (!pages) return; __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); mark_page_accessed(page); put_page(page); } obj->mm.dirty = false; sg_free_table(pages); kfree(pages); }
static void i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { struct sgt_iter sgt_iter; struct vm_page *page; BUG_ON(obj->userptr.work != NULL); __i915_gem_userptr_set_active(obj, false); if (obj->mm.madv != I915_MADV_WILLNEED) obj->mm.dirty = false; i915_gem_gtt_finish_pages(obj, pages); for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); mark_page_accessed(page); put_page(page); } obj->mm.dirty = false; sg_free_table(pages); kfree(pages); }
int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, char *buf, loff_t *pos, unsigned size) { struct address_space *mapping = ip->i_inode.i_mapping; unsigned long index = *pos / PAGE_CACHE_SIZE; unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); unsigned copied = 0; unsigned amt; struct page *page; void *p; do { amt = size - copied; if (offset + size > PAGE_CACHE_SIZE) amt = PAGE_CACHE_SIZE - offset; page = read_cache_page(mapping, index, __gfs2_readpage, NULL); if (IS_ERR(page)) return PTR_ERR(page); p = kmap_atomic(page, KM_USER0); memcpy(buf + copied, p + offset, amt); kunmap_atomic(p, KM_USER0); mark_page_accessed(page); page_cache_release(page); copied += amt; index++; offset = 0; } while(copied < size); (*pos) += size; return size; }
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage) { struct address_space *swap_space; struct file *swap_storage; struct page *from_page; struct page *to_page; int i; int ret = -ENOMEM; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); if (!persistent_swap_storage) { swap_storage = shmem_file_setup("ttm swap", ttm->num_pages << PAGE_SHIFT, 0); if (IS_ERR(swap_storage)) { pr_err("Failed allocating swap storage\n"); return PTR_ERR(swap_storage); } } else { swap_storage = persistent_swap_storage; } swap_space = swap_storage->f_mapping; for (i = 0; i < ttm->num_pages; ++i) { gfp_t gfp_mask = mapping_gfp_mask(swap_space); gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? __GFP_RETRY_MAYFAIL : 0); from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask); if (IS_ERR(to_page)) { ret = PTR_ERR(to_page); goto out_err; } copy_highpage(to_page, from_page); set_page_dirty(to_page); mark_page_accessed(to_page); put_page(to_page); } ttm_tt_unpopulate(ttm); ttm->swap_storage = swap_storage; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistent_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTENT_SWAP; return 0; out_err: if (!persistent_swap_storage) fput(swap_storage); return ret; }
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage) { struct address_space *swap_space; struct file *swap_storage; struct page *from_page; struct page *to_page; int i; int ret = -ENOMEM; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); if (!persistent_swap_storage) { swap_storage = shmem_file_setup("ttm swap", ttm->num_pages << PAGE_SHIFT, 0); if (unlikely(IS_ERR(swap_storage))) { pr_err("Failed allocating swap storage\n"); return PTR_ERR(swap_storage); } } else swap_storage = persistent_swap_storage; swap_space = swap_storage->f_path.dentry->d_inode->i_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = shmem_read_mapping_page(swap_space, i); if (unlikely(IS_ERR(to_page))) { ret = PTR_ERR(to_page); goto out_err; } preempt_disable(); copy_highpage(to_page, from_page); preempt_enable(); set_page_dirty(to_page); mark_page_accessed(to_page); page_cache_release(to_page); } ttm->bdev->driver->ttm_tt_unpopulate(ttm); ttm->swap_storage = swap_storage; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistent_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTENT_SWAP; return 0; out_err: if (!persistent_swap_storage) fput(swap_storage); return ret; }
/* * unlocks pages after btrfs_file_write is done with them */ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { if (!pages[i]) break; /* page checked is some magic around finding pages that * have been modified without going through btrfs_set_page_dirty * clear it here */ ClearPageChecked(pages[i]); unlock_page(pages[i]); mark_page_accessed(pages[i]); page_cache_release(pages[i]); } }
struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) { struct address_space *mapping = gl->gl_aspace->i_mapping; struct gfs2_sbd *sdp = gl->gl_sbd; struct page *page; struct buffer_head *bh; unsigned int shift; unsigned long index; unsigned int bufnum; shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; index = blkno >> shift; /* convert block to page */ bufnum = blkno - (index << shift); /* block buf index within page */ if (create) { for (;;) { page = grab_cache_page(mapping, index); if (page) break; yield(); } } else { page = find_lock_page(mapping, index); if (!page) return NULL; } if (!page_has_buffers(page)) create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0); /* Locate header for our buffer within our page */ for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) /* Do nothing */; get_bh(bh); if (!buffer_mapped(bh)) map_bh(bh, sdp->sd_vfs, blkno); unlock_page(page); mark_page_accessed(page); page_cache_release(page); return bh; }
/* * We are called with the MM semaphore and page_table_lock * spinlock held to protect against concurrent faults in * multithreaded programs. */ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { pte_t entry; /* Read-only mapping of ZERO_PAGE. */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); /* ..except if it's a write access */ if (write_access) { struct page *page; /* Allocate our own private page. */ spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); if (!page) goto no_mem; clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); if (!pte_none(*page_table)) { page_cache_release(page); spin_unlock(&mm->page_table_lock); return 1; } mm->rss++; flush_page_to_ram(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add(page); mark_page_accessed(page); } set_pte(page_table, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ no_mem: return -1; }
/** * drm_gem_put_pages - helper to free backing pages for a GEM object * @obj: obj in question * @pages: pages to free */ void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, bool dirty, bool accessed) { int i, npages; npages = obj->size >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (dirty) set_page_dirty(pages[i]); if (accessed) mark_page_accessed(pages[i]); /* Undo the reference we took when populating the table */ page_cache_release(pages[i]); } drm_free_large(pages); }
static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; struct page *page; spinlock_t *ptl; pte_t *ptep, pte; retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if (!pte_present(pte)) { swp_entry_t entry; /* * KSM's break_ksm() relies upon recognizing a ksm page * even while it is being migrated, so for that case we * need migration_entry_wait(). */ if (likely(!(flags & FOLL_MIGRATION))) goto no_page; if (pte_none(pte) || pte_file(pte)) goto no_page; entry = pte_to_swp_entry(pte); if (!is_migration_entry(entry)) goto no_page; pte_unmap_unlock(ptep, ptl); migration_entry_wait(mm, pmd, address); goto retry; } if ((flags & FOLL_NUMA) && pte_numa(pte)) goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) { pte_unmap_unlock(ptep, ptl); return NULL; } page = vm_normal_page(vma, address, pte); if (unlikely(!page)) { if ((flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(pte))) goto bad_page; page = pte_page(pte); } if (flags & FOLL_GET) get_page_foll(page); if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); /* * pte_mkyoung() would be more correct here, but atomic care * is needed to avoid losing the dirty bit: it is easier to use * mark_page_accessed(). */ mark_page_accessed(page); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE * which might bounce very badly if there is contention. * * If the page is already locked, we don't need to * handle it now - vmscan will handle it later if and * when it attempts to reclaim the page. */ if (page->mapping && trylock_page(page)) { lru_add_drain(); /* push cached pages to LRU */ /* * Because we lock page here, and migration is * blocked by the pte's page reference, and we * know the page is still mapped, we don't even * need to check for file-cache page truncation. */ mlock_vma_page(page); unlock_page(page); } } pte_unmap_unlock(ptep, ptl); return page; bad_page: pte_unmap_unlock(ptep, ptl); return ERR_PTR(-EFAULT); no_page: pte_unmap_unlock(ptep, ptl); if (!pte_none(pte)) return NULL; return no_page_table(vma, flags); }
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage) { struct address_space *swap_space; struct file *swap_storage; struct page *from_page; struct page *to_page; void *from_virtual; void *to_virtual; int i; int ret = -ENOMEM; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); /* * For user buffers, just unpin the pages, as there should be * vma references. */ if (ttm->page_flags & TTM_PAGE_FLAG_USER) { ttm_tt_free_user_pages(ttm); ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; ttm->swap_storage = NULL; return 0; } if (!persistant_swap_storage) { swap_storage = shmem_file_setup("ttm swap", ttm->num_pages << PAGE_SHIFT, 0); if (unlikely(IS_ERR(swap_storage))) { printk(KERN_ERR "Failed allocating swap storage.\n"); return PTR_ERR(swap_storage); } } else swap_storage = persistant_swap_storage; swap_space = swap_storage->f_path.dentry->d_inode->i_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = shmem_read_mapping_page(swap_space, i); if (unlikely(IS_ERR(to_page))) { ret = PTR_ERR(to_page); goto out_err; } preempt_disable(); #ifdef VMW_HAS_STACK_KMAP_ATOMIC from_virtual = kmap_atomic(from_page); to_virtual = kmap_atomic(to_page); #else from_virtual = kmap_atomic(from_page, KM_USER0); to_virtual = kmap_atomic(to_page, KM_USER1); #endif memcpy(to_virtual, from_virtual, PAGE_SIZE); #ifdef VMW_HAS_STACK_KMAP_ATOMIC kunmap_atomic(to_virtual); kunmap_atomic(from_virtual); #else kunmap_atomic(to_virtual, KM_USER1); kunmap_atomic(from_virtual, KM_USER0); #endif preempt_enable(); set_page_dirty(to_page); mark_page_accessed(to_page); page_cache_release(to_page); } ttm_tt_free_alloced_pages(ttm); ttm->swap_storage = swap_storage; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistant_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTANT_SWAP; return 0; out_err: if (!persistant_swap_storage) fput(swap_storage); return ret; }
/* mm->page_table_lock is held. mmap_sem is not held */ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone) { pte_t pte; swp_entry_t entry; /* Don't look at this pte if it's been accessed recently. */ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { mark_page_accessed(page); return 0; } /* Don't bother unmapping pages that are active */ if (PageActive(page)) return 0; /* Don't bother replenishing zones not under pressure.. */ if (!memclass(page->zone, classzone)) return 0; if (TryLockPage(page)) return 0; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook * is needed on CPUs which update the accessed and dirty * bits in hardware. */ flush_cache_page(vma, address); pte = ptep_get_and_clear(page_table); flush_tlb_page(vma, address); if (pte_dirty(pte)) set_page_dirty(page); /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. */ if (PageSwapCache(page)) { entry.val = page->index; swap_duplicate(entry); set_swap_pte: set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: mm->rss--; UnlockPage(page); { int freeable = page_count(page) - !!page->buffers <= 2; page_cache_release(page); return freeable; } } /* * Is it a clean page? Then it must be recoverable * by just paging it in again, and we can just drop * it.. or if it's dirty but has backing store, * just mark the page dirty and drop it. * * However, this won't actually free any real * memory, as the page will just be in the page cache * somewhere, and as such we should just continue * our scan. * * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ if (page->mapping) goto drop_pte; if (!PageDirty(page)) goto drop_pte; /* * Anonymous buffercache pages can be left behind by * concurrent truncate and pagefault. */ if (page->buffers) goto preserve; /* * This is a dirty, swappable page. First of all, * get a suitable swap entry for it, and make sure * we have the swap cache set up to associate the * page with that swap entry. */ for (;;) { entry = get_swap_page(); if (!entry.val) break; /* Add it to the swap cache and mark it dirty * (adding to the page cache will clear the dirty * and uptodate bits, so we need to do it again) */ if (add_to_swap_cache(page, entry) == 0) { SetPageUptodate(page); set_page_dirty(page); goto set_swap_pte; } /* Raced with "speculative" read_swap_cache_async */ swap_free(entry); } /* No swap space left */ preserve: set_pte(page_table, pte); UnlockPage(page); return 0; }
/* * This is a little more tricky than the file -> pipe splicing. There are * basically three cases: * * - Destination page already exists in the address space and there * are users of it. For that case we have no other option that * copying the data. Tough luck. * - Destination page already exists in the address space, but there * are no users of it. Make sure it's uptodate, then drop it. Fall * through to last case. * - Destination page does not exist, we can add the pipe page to * the page cache and avoid the copy. * * If asked to move pages to the output file (SPLICE_F_MOVE is set in * sd->flags), we attempt to migrate pages from the pipe to the output * file address space page cache. This is possible if no one else has * the pipe page referenced outside of the pipe and page cache. If * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; pgoff_t index; int ret; /* * make sure the data in this buffer is uptodate */ ret = buf->ops->pin(pipe, buf); if (unlikely(ret)) return ret; index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; this_len = sd->len; if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; find_page: page = find_lock_page(mapping, index); if (!page) { ret = -ENOMEM; page = page_cache_alloc_cold(mapping); if (unlikely(!page)) goto out_ret; /* * This will also lock the page */ ret = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (unlikely(ret)) goto out; } ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); if (unlikely(ret)) { loff_t isize = i_size_read(mapping->host); if (ret != AOP_TRUNCATED_PAGE) unlock_page(page); page_cache_release(page); if (ret == AOP_TRUNCATED_PAGE) goto find_page; /* * prepare_write() may have instantiated a few blocks * outside i_size. Trim these off again. */ if (sd->pos + this_len > isize) vmtruncate(mapping->host, isize); goto out_ret; } if (buf->page != page) { /* * Careful, ->map() uses KM_USER0! */ char *src = buf->ops->map(pipe, buf, 1); char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); if (ret) { if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; } if (ret < 0) goto out; /* * Partial write has happened, so 'ret' already initialized by * number of bytes written, Where is nothing we have to do here. */ } else ret = this_len; /* * Return the number of bytes written and mark page as * accessed, we are now done! */ mark_page_accessed(page); balance_dirty_pages_ratelimited(mapping); out: page_cache_release(page); unlock_page(page); out_ret: return ret; }
static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; struct dev_pagemap *pgmap = NULL; struct page *page; spinlock_t *ptl; pte_t *ptep, pte; retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if (!pte_present(pte)) { swp_entry_t entry; /* * KSM's break_ksm() relies upon recognizing a ksm page * even while it is being migrated, so for that case we * need migration_entry_wait(). */ if (likely(!(flags & FOLL_MIGRATION))) goto no_page; if (pte_none(pte)) goto no_page; entry = pte_to_swp_entry(pte); if (!is_migration_entry(entry)) goto no_page; pte_unmap_unlock(ptep, ptl); migration_entry_wait(mm, pmd, address); goto retry; } if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { pte_unmap_unlock(ptep, ptl); return NULL; } page = vm_normal_page(vma, address, pte); if (!page && pte_devmap(pte) && (flags & FOLL_GET)) { /* * Only return device mapping pages in the FOLL_GET case since * they are only valid while holding the pgmap reference. */ pgmap = get_dev_pagemap(pte_pfn(pte), NULL); if (pgmap) page = pte_page(pte); else goto no_page; } else if (unlikely(!page)) { if (flags & FOLL_DUMP) { /* Avoid special (like zero) pages in core dumps */ page = ERR_PTR(-EFAULT); goto out; } if (is_zero_pfn(pte_pfn(pte))) { page = pte_page(pte); } else { int ret; ret = follow_pfn_pte(vma, address, ptep, flags); page = ERR_PTR(ret); goto out; } } if (flags & FOLL_SPLIT && PageTransCompound(page)) { int ret; get_page(page); pte_unmap_unlock(ptep, ptl); lock_page(page); ret = split_huge_page(page); unlock_page(page); put_page(page); if (ret) return ERR_PTR(ret); goto retry; } if (flags & FOLL_GET) { get_page(page); /* drop the pgmap reference now that we hold the page */ if (pgmap) { put_dev_pagemap(pgmap); pgmap = NULL; } } if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); /* * pte_mkyoung() would be more correct here, but atomic care * is needed to avoid losing the dirty bit: it is easier to use * mark_page_accessed(). */ mark_page_accessed(page); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* Do not mlock pte-mapped THP */ if (PageTransCompound(page)) goto out; /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE * which might bounce very badly if there is contention. * * If the page is already locked, we don't need to * handle it now - vmscan will handle it later if and * when it attempts to reclaim the page. */ if (page->mapping && trylock_page(page)) { lru_add_drain(); /* push cached pages to LRU */ /* * Because we lock page here, and migration is * blocked by the pte's page reference, and we * know the page is still mapped, we don't even * need to check for file-cache page truncation. */ mlock_vma_page(page); unlock_page(page); } } out: pte_unmap_unlock(ptep, ptl); return page; no_page: pte_unmap_unlock(ptep, ptl); if (!pte_none(pte)) return NULL; return no_page_table(vma, flags); }
/* * This is a little more tricky than the file -> pipe splicing. There are * basically three cases: * * - Destination page already exists in the address space and there * are users of it. For that case we have no other option that * copying the data. Tough luck. * - Destination page already exists in the address space, but there * are no users of it. Make sure it's uptodate, then drop it. Fall * through to last case. * - Destination page does not exist, we can add the pipe page to * the page cache and avoid the copy. * * If asked to move pages to the output file (SPLICE_F_MOVE is set in * sd->flags), we attempt to migrate pages from the pipe to the output * file address space page cache. This is possible if no one else has * the pipe page referenced outside of the pipe and page cache. If * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; pgoff_t index; int ret; /* * make sure the data in this buffer is uptodate */ ret = buf->ops->pin(pipe, buf); if (unlikely(ret)) return ret; index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; this_len = sd->len; if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; /* * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full * page. */ if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* * If steal succeeds, buf->page is now pruned from the * pagecache and we can reuse it. The page will also be * locked on successful return. */ if (buf->ops->steal(pipe, buf)) goto find_page; page = buf->page; if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) { unlock_page(page); goto find_page; } page_cache_get(page); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) lru_cache_add(page); } else { find_page: page = find_lock_page(mapping, index); if (!page) { ret = -ENOMEM; page = page_cache_alloc_cold(mapping); if (unlikely(!page)) goto out_ret; /* * This will also lock the page */ ret = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (unlikely(ret)) goto out; } /* * We get here with the page locked. If the page is also * uptodate, we don't need to do more. If it isn't, we * may need to bring it in if we are not going to overwrite * the full page. */ if (!PageUptodate(page)) { if (this_len < PAGE_CACHE_SIZE) { ret = mapping->a_ops->readpage(file, page); if (unlikely(ret)) goto out; lock_page(page); if (!PageUptodate(page)) { /* * Page got invalidated, repeat. */ if (!page->mapping) { unlock_page(page); page_cache_release(page); goto find_page; } ret = -EIO; goto out; } } else SetPageUptodate(page); } } ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); if (unlikely(ret)) { loff_t isize = i_size_read(mapping->host); if (ret != AOP_TRUNCATED_PAGE) unlock_page(page); page_cache_release(page); if (ret == AOP_TRUNCATED_PAGE) goto find_page; /* * prepare_write() may have instantiated a few blocks * outside i_size. Trim these off again. */ if (sd->pos + this_len > isize) vmtruncate(mapping->host, isize); goto out_ret; } if (buf->page != page) { /* * Careful, ->map() uses KM_USER0! */ char *src = buf->ops->map(pipe, buf, 1); char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); if (!ret) { /* * Return the number of bytes written and mark page as * accessed, we are now done! */ ret = this_len; mark_page_accessed(page); balance_dirty_pages_ratelimited(mapping); } else if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; } out: page_cache_release(page); unlock_page(page); out_ret: return ret; }
/* * We hold the mm semaphore and the page_table_lock on entry and * should release the pagetable lock on exit.. */ static int do_swap_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, pte_t orig_pte, int write_access) { struct page *page; swp_entry_t entry = pte_to_swp_entry(orig_pte); pte_t pte; int ret = 1; spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { swapin_readahead(entry); page = read_swap_cache_async(entry); if (!page) { /* * Back out if somebody else faulted in this pte while * we released the page table lock. */ int retval; spin_lock(&mm->page_table_lock); retval = pte_same(*page_table, orig_pte) ? -1 : 1; spin_unlock(&mm->page_table_lock); return retval; } /* Had to read the page from swap area: Major fault */ ret = 2; } mark_page_accessed(page); lock_page(page); /* * Back out if somebody else faulted in this pte while we * released the page table lock. */ spin_lock(&mm->page_table_lock); if (!pte_same(*page_table, orig_pte)) { spin_unlock(&mm->page_table_lock); unlock_page(page); page_cache_release(page); return 1; } /* The page isn't present yet, go ahead with the fault. */ swap_free(entry); if (vm_swap_full()) remove_exclusive_swap_page(page); mm->rss++; pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) pte = pte_mkdirty(pte_mkwrite(pte)); unlock_page(page); flush_page_to_ram(page); flush_icache_page(vma, page); set_pte(page_table, pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); spin_unlock(&mm->page_table_lock); return ret; }
ssize_t GSFS_file_read (struct file *filp, char __user *charp, size_t len, loff_t *off){ struct inode *inode=filp->f_mapping->host; struct GSFS_inode *inf=(struct GSFS_inode*)inode->i_private; sector_t sec_start, sec_end, sec_len; struct page **res, **res2, **restemp; unsigned int *pn, *pn2, *pntemp; int i, odirect=filp->f_flags&O_SYNC, j, lock, pncount, pn2count; unsigned long pagestartbyte=0, pageendbyte, bufstart, bytes_in_first_buf_page; unsigned long pagelen; size_t rlen; //char *dest, // *src; gwf(printk("<0>" "File read with inode :%lu size:%lu offset:%llu , off:%llu, charp:%lx, pid:%u\n",inode->i_ino,len,*off,filp->f_pos,(unsigned long)charp,current->pid)); if((*off>=inode->i_size)){ gwf(printk("<0>" "File read ended for *pos<size with inode :%lu size:%lu offset:%llu , off:%llu, charp:%lx, pid:%u\n",inode->i_ino,len,*off,filp->f_pos,(unsigned long)charp,current->pid)); return 0; } if(!access_ok(VERIFY_WRITE,charp,len)){ gwf(printk("<0>" "File read ended for access_nok with inode :%lu size:%lu offset:%llu , off:%llu, charp:%lx, pid:%u\n",inode->i_ino,len,*off,filp->f_pos,(unsigned long)charp,current->pid)); return -EIO; } sec_start=(*off)>>Block_Size_Bits; if((*off+len)>inode->i_size) len=inode->i_size-*off; sec_end=(*off+len-1)>>Block_Size_Bits; sec_len=sec_end-sec_start+1; bytes_in_first_buf_page=((1+~((*off)&((unsigned long)Block_Size-1)))&(Block_Size-1)); if(!bytes_in_first_buf_page) bytes_in_first_buf_page=Block_Size; pn=kzalloc(sec_len*sizeof(unsigned int),GFP_KERNEL); pn2=kzalloc(sec_len*sizeof(unsigned int),GFP_KERNEL); res=kzalloc(sec_len*sizeof(struct page*),GFP_KERNEL); res2=kzalloc(sec_len*sizeof(struct page*),GFP_KERNEL); for(i=sec_start,j=0;i<=sec_end;i++,j++) pn[j]=i; gwf(printk("<0>" "GSFS_file_read: sec_start:%lu, sec_end:%lu, sec_len:%lu, bytes_in_first_buf_page: %lu\n", sec_start,sec_end,sec_len,bytes_in_first_buf_page)); pncount=GSFS_get_data_pages_of_inode(inode, pn, sec_len ,res,odirect); //printk("<0>" "res[%u]=%d \n",j,res[j]); rlen=0; pn2count=0; lock=0; do{ for(j=0;j<pncount;j++){ //printk("<0>" "res[%u]=%lx \n",j,res[j]); if(unlikely(!res[j])) continue; if(lock && PageLocked(res[j])){ //printk("<0>" "Locking for j:%u\n",j); wait_on_page_locked(res[j]); lock=0; } else if(PageLocked(res[j])){ pn2[pn2count]=pn[j]; res2[pn2count]=res[j]; pn2count++; continue; } //the page is available for writing to buffer if(pn[j]==sec_start){ pagestartbyte=((*off)&(Block_Size-1)); bufstart=(unsigned long)charp; } else{ pagestartbyte=0; bufstart=(unsigned long)(charp)+bytes_in_first_buf_page+((pn[j]-sec_start-1)<<Block_Size_Bits); } if(pn[j]==sec_end) pageendbyte=((*off+len-1)&(Block_Size-1)); else pageendbyte=Block_Size-1; pagelen=(unsigned long)(pageendbyte-pagestartbyte+1); if(inf->igflags & igflag_secure){ struct GSFS_page *gp=(struct GSFS_page*)page_private(res[j]); if(unlikely(!gp || !gp->sip) || unlikely(!(gp->sip->spflags & spflag_page_is_ready_for_read)) ){ //printk("<0>" "page is not ready for inode:%lu, index: %lu\n", inode->i_ino, res[j]->index); //if(gp && gp->sip) // printk("<0>" "and flags:%d\n",gp->sip->spflags); goto add_cont; } } i=__copy_to_user_inatomic((void*)bufstart,page_address(res[j])+pagestartbyte,pagelen); add_cont: rlen+=(pagelen-i); mark_page_accessed(res[j]); /* dest=(char*)bufstart; src=(char*)pagestartbyte; for(i=0;i<pagelen;i++) dest[i]=src[i]; */ //printk("<0>" "asdfasd%s",dest); //rlen+=i; GSFS_put_data_page_of_inode(inode,res[j]); //gwf(printk("<0>" "file read for inode:%lu, j:%u pn[j]:%u pagestartbyte:%lx bufstart:%lx pagelen:%lu i:%u sec_start:%lu\n", // inode->i_ino, j, pn[j],(unsigned long)pagestartbyte,(unsigned long)bufstart,pagelen,i,sec_start)); } lock=1; pncount=pn2count; pn2count=0; pntemp=pn2; pn2=pn; pn=pntemp; restemp=res2; res2=res; res=restemp; gwf(printk("<0>" "file read for inode:%lu pncount:%u\n",inode->i_ino,pncount)); }while(pncount); kfree(pn); kfree(pn2); kfree(res); kfree(res2); (*off)+=rlen; gwf(printk("<0>" "file read ends rlen=%lu len:%lu\n",rlen,len)); return rlen; }
ssize_t GSFS_file_write(struct file *filp, const char __user *buf, size_t len, loff_t *off){ struct inode *inode=filp->f_mapping->host; struct address_space *mapping=filp->f_mapping; sector_t sec_start, sec_end, sec_len; unsigned long bufstart, bytes_in_first_buf_page, bytes_in_last_buf_page, pagestartbyte, pageendbyte, pagelen; size_t rlen; struct page *res[2], *page, **pages; unsigned int i, j, pages_count, start_read, end_read; gwf(printk("<0>" "File write with inode :%lu len:%lu offset:%llu , filepos:%llu, buf:%lx inode_size:%llu, pid:%u\n",inode->i_ino,len,*off,filp->f_pos,(unsigned long)buf,inode->i_size,current->pid)); if(unlikely(!access_ok(VERIFY_READ,buf,len))) return -1; mutex_lock(&inode->i_mutex); if((*off+len)>inode->i_size){ if((*off+len)>inode->i_sb->s_maxbytes) return -1; inode->i_size=(*off+len); GSFS_truncate(inode); } if(filp->f_flags & O_APPEND) *off=inode->i_size; current->backing_dev_info=mapping->backing_dev_info; file_remove_suid(filp); file_update_time(filp); //inode_inc_iversion(inode); sec_start=(*off)>>Block_Size_Bits; sec_end=(*off+len-1)>>Block_Size_Bits; sec_len=sec_end-sec_start+1; pages=kzalloc(sizeof(struct page*) * sec_len, GFP_KERNEL); bytes_in_first_buf_page=Block_Size-((*off)&((unsigned long)Block_Size-1)); bytes_in_last_buf_page=((*off+len)&((unsigned long)Block_Size-1)); if(bytes_in_last_buf_page==0) bytes_in_last_buf_page=Block_Size; start_read=(bytes_in_first_buf_page!=Block_Size)?1:0; end_read=(bytes_in_last_buf_page!=Block_Size && inode->i_size>(*off+len))?1:0; gwf(printk("<0>" "GSFS write bytes_in_first_buf_page:%lu, bytes_in_last_buf_page:%lu\n",bytes_in_first_buf_page,bytes_in_last_buf_page)); gwf(printk("<0>" "GSFS write start_read:%u, end_read:%u, sec_start:%lu, sec_end:%lu\n",start_read,end_read,sec_start,sec_end)); if(sec_start==sec_end){ if(start_read || end_read){ res[0]=GSFS_get_data_page_of_inode_with_read(inode, sec_start); gwf(printk("<0>" "sec_start==sec_end, start_read || end_read , res[0]=%lx",(unsigned long)res[0])); } else{ res[0]=GSFS_get_locked_data_page_of_inode_without_read(inode,sec_start); if(likely(res[0])) unlock_page(res[0]); gwf(printk("<0>" "sec_start==sec_end, !(start_read || end_read) , res[0]=%lx",(unsigned long)res[0])); } res[1]=0; if(unlikely(!res[0])){ gwf(printk("<0>" "GSFS write len:-1\n")); mutex_unlock(&inode->i_mutex); printk("<1>" "GSFS write len:-1\n"); kfree(pages); return len; } } else{ if(start_read){ res[0]=GSFS_get_data_page_of_inode_with_read(inode, sec_start); gwf(printk("<0>" "sec_start!=sec_end, start_read, res[0]=%lx",(unsigned long)res[0])); } else{ res[0]=GSFS_get_locked_data_page_of_inode_without_read(inode,sec_start); if(likely(res[0])) unlock_page(res[0]); gwf(printk("<0>" "sec_start!=sec_end, !start_read, res[0]=%lx",(unsigned long)res[0])); } } pages_count=0; if(sec_len>1) for(i=sec_start+1;i<=sec_end-1;i++) pages[pages_count++]=GSFS_get_locked_data_page_of_inode_without_read(inode,i); if(sec_start != sec_end){ if(end_read){ res[1]=GSFS_get_data_page_of_inode_with_read(inode,sec_end); gwf(printk("<0>" "sec_start!=sec_end, end_read, res[1]=%lx",(unsigned long)res[1])); } else{ res[1]=GSFS_get_locked_data_page_of_inode_without_read(inode,sec_end); if(likely(res[1])) unlock_page(res[1]); gwf(printk("<0>" "sec_start!=sec_end, !end_read, res[1]=%lx",(unsigned long)res[1])); } if(unlikely(!res[0] || !res[1])){ gwf(printk("<0>" "GSFS write len:-1\n")); printk("<1>" "GSFS write len:-1\n"); mutex_unlock(&inode->i_mutex); kfree(pages); return len; } } rlen=0; bufstart=(unsigned long)buf+bytes_in_first_buf_page; pagelen=Block_Size; //100% expected complete pages that should be copied pages_count=0; if(sec_len>1) for(i=sec_start+1;i<=sec_end-1;i++){ gwf(printk("<0>" "write page complete pages, i:%u, bufstart:%lx, rlen=%lu\n",i,bufstart,rlen)); page=pages[pages_count++]; if(unlikely(!page)) goto buf_cont; j=__copy_from_user_inatomic(page_address(page),(void*)bufstart,pagelen); rlen+=(Block_Size-j); mark_page_accessed(page); set_page_dirty(page); put_page(page); unlock_page(page); buf_cont: bufstart+=pagelen; } //first and last page that are not surely complete for(i=0;i<2 && res[i];i++){ page=res[i]; wait_on_page_locked(page); lock_page(page); if(page->index==sec_start){ bufstart=(unsigned long)buf; pagestartbyte=Block_Size-bytes_in_first_buf_page; if(sec_start==sec_end) pageendbyte=pagestartbyte+len-1; else pageendbyte=Block_Size-1; } else{ bufstart=(unsigned long)buf+bytes_in_first_buf_page+((sec_len-2)<<Block_Size_Bits); pageendbyte=bytes_in_last_buf_page-1; pagestartbyte=0; } gwf(printk("<0>" "gsfs_write for first and last page, i=%u, page:%lx, bufstart:%lx, pagestartbyte:%lu, pageendbyte:%lu\n", i,(unsigned long)page,bufstart,pagestartbyte,pageendbyte)); pagelen=pageendbyte-pagestartbyte+1; j=__copy_from_user_inatomic(page_address(page)+pagestartbyte,(void*)bufstart,pagelen); rlen+=(pagelen-j); mark_page_accessed(page); set_page_dirty(page); put_page(page); unlock_page(page); } mutex_unlock(&inode->i_mutex); (*off)+=rlen; gwf(printk("<0>" "GSFS write rlen:%lu\n",rlen)); kfree(pages); if(filp->f_flags & O_SYNC){ write_inode_now(inode,1); } return rlen; }