int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; unsigned block_start, block_end, blocksize; struct ext4_io_page *io_page; struct buffer_head *bh, *head; int ret = 0; blocksize = 1 << inode->i_blkbits; BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); if (!io_page) { set_page_dirty(page); unlock_page(page); return -ENOMEM; } io_page->p_page = page; atomic_set(&io_page->p_count, 1); get_page(page); set_page_writeback(page); ClearPageError(page); for (bh = head = page_buffers(page), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_start >= len) { clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; } clear_buffer_dirty(bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { /* * We only get here on ENOMEM. Not much else * we can do but mark the page as dirty, and * better luck next time. */ set_page_dirty(page); break; } } unlock_page(page); /* * If the page was truncated before we could do the writeback, * or we had a memory allocation error while trying to write * the first buffer head, we won't have submitted any pages for * I/O. In that case we need to make sure we've cleared the * PageWriteback bit from the page to prevent the system from * wedging later on. */ put_io_page(io_page); return ret; }
/* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'. * If we're over `background_thresh' then the writeback threads are woken to * perform some writeout. */ static void balance_dirty_pages(struct address_space *mapping, unsigned long write_chunk) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; unsigned long background_thresh; unsigned long dirty_thresh; unsigned long bdi_thresh; unsigned long pages_written = 0; unsigned long pause = 1; struct backing_dev_info *bdi = mapping->backing_dev_info; for (;;) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, .range_cyclic = 1, }; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); nr_reclaimable = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); nr_writeback = global_page_state(NR_WRITEBACK); bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; /* * Throttle it only when the background writeback cannot * catch-up. This avoids (excessively) small writeouts * when the bdi limits are ramping up. */ if (nr_reclaimable + nr_writeback < (background_thresh + dirty_thresh) / 2) break; if (!bdi->dirty_exceeded) bdi->dirty_exceeded = 1; /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. * Unstable writes are a feature of certain networked * filesystems (i.e. NFS) in which data may have been * written to the server's write cache, but has not yet * been flushed to permanent storage. * Only move pages to writeback if this bdi is over its * threshold otherwise wait until the disk writes catch * up. */ if (bdi_nr_reclaimable > bdi_thresh) { writeback_inodes_wb(&bdi->wb, &wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); } /* * In order to avoid the stacked BDI deadlock we need * to ensure we accurately count the 'dirty' pages when * the threshold is low. * * Otherwise it would be possible to get thresh+n pages * reported dirty, even though there are thresh-m pages * actually dirty; with m+n sitting in the percpu * deltas. */ if (bdi_thresh < 2*bdi_stat_error(bdi)) { bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); } else if (bdi_nr_reclaimable) { bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); } if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; if (pages_written >= write_chunk) break; /* We've done our duty */ __set_current_state(TASK_UNINTERRUPTIBLE); io_schedule_timeout(pause); /* * Increase the delay for each loop, up to our previous * default of taking a 100ms nap. */ pause <<= 1; if (pause > HZ / 10) pause = HZ / 10; } if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && bdi->dirty_exceeded) bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) return; /* * In laptop mode, we wait until hitting the higher threshold before * starting background writeout, and then write out all the way down * to the lower threshold. So slow writers cause minimal disk activity. * * In normal mode, we start background writeout at the lower * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) bdi_start_background_writeback(bdi); } void set_page_dirty_balance(struct page *page, int page_mkwrite) { if (set_page_dirty(page) || page_mkwrite) { struct address_space *mapping = page_mapping(page); if (mapping) balance_dirty_pages_ratelimited(mapping); } } static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0; /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied * @nr_pages_dirtied: number of pages which the caller has just dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's * dirty state and will initiate writeback if needed. * * On really big machines, get_writeback_state is expensive, so try to avoid * calling it too often (ratelimiting). But once we're over the dirty memory * limit we decrease the ratelimiting by a lot, to prevent individual processes * from overshooting the limit by (ratelimit_pages) each. */ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied) { unsigned long ratelimit; unsigned long *p; ratelimit = ratelimit_pages; if (mapping->backing_dev_info->dirty_exceeded) ratelimit = 8; /* * Check the rate limiting. Also, we do not want to throttle real-time * tasks in balance_dirty_pages(). Period. */ preempt_disable(); p = &__get_cpu_var(bdp_ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { ratelimit = sync_writeback_pages(*p); *p = 0; preempt_enable(); balance_dirty_pages(mapping, ratelimit); return; } preempt_enable(); }
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage) { struct address_space *swap_space; struct file *swap_storage; struct page *from_page; struct page *to_page; void *from_virtual; void *to_virtual; int i; int ret = -ENOMEM; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); /* * For user buffers, just unpin the pages, as there should be * vma references. */ if (ttm->page_flags & TTM_PAGE_FLAG_USER) { ttm_tt_free_user_pages(ttm); ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; ttm->swap_storage = NULL; return 0; } if (!persistant_swap_storage) { swap_storage = shmem_file_setup("ttm swap", ttm->num_pages << PAGE_SHIFT, 0); if (unlikely(IS_ERR(swap_storage))) { printk(KERN_ERR "Failed allocating swap storage.\n"); return PTR_ERR(swap_storage); } } else swap_storage = persistant_swap_storage; swap_space = swap_storage->f_path.dentry->d_inode->i_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = read_mapping_page(swap_space, i, NULL); if (unlikely(IS_ERR(to_page))) { ret = PTR_ERR(to_page); goto out_err; } preempt_disable(); from_virtual = kmap_atomic(from_page, KM_USER0); to_virtual = kmap_atomic(to_page, KM_USER1); memcpy(to_virtual, from_virtual, PAGE_SIZE); kunmap_atomic(to_virtual, KM_USER1); kunmap_atomic(from_virtual, KM_USER0); preempt_enable(); set_page_dirty(to_page); mark_page_accessed(to_page); page_cache_release(to_page); } ttm_tt_free_alloced_pages(ttm); ttm->swap_storage = swap_storage; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistant_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTANT_SWAP; return 0; out_err: if (!persistant_swap_storage) fput(swap_storage); return ret; }
static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", (unsigned long) inode->i_ino); WARN_ON(1); return -EINVAL; } node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) return PTR_ERR(node_page); ri = F2FS_INODE(node_page); inode->i_mode = le16_to_cpu(ri->i_mode); i_uid_write(inode, le32_to_cpu(ri->i_uid)); i_gid_write(inode, le32_to_cpu(ri->i_gid)); set_nlink(inode, le32_to_cpu(ri->i_links)); inode->i_size = le64_to_cpu(ri->i_size); inode->i_blocks = le64_to_cpu(ri->i_blocks); inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime); inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime); inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec); inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); inode->i_generation = le32_to_cpu(ri->i_generation); fi->i_current_depth = le32_to_cpu(ri->i_current_depth); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; if (f2fs_init_extent_tree(inode, &ri->i_ext)) set_page_dirty(node_page); get_inline_info(fi, ri); /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); if (__written_first_block(ri)) set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); return 0; }
/* * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; f2fs_hash_t dentry_hash; struct f2fs_dir_entry *de; unsigned int nbucket, nblock; size_t namelen = name->len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; int slots = GET_DENTRY_SLOTS(namelen); struct page *page; int err = 0; int i; dentry_hash = f2fs_dentry_hash(name); level = 0; current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { level = F2FS_I(dir)->clevel; F2FS_I(dir)->chash = 0; } start: if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; /* Increase the depth, if required */ if (level == current_depth) ++current_depth; nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); dentry_blk = kmap(dentry_page); bit_pos = room_for_filename(dentry_blk, slots); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } /* Move to next level to find the empty slot for new dentry */ ++level; goto start; add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); down_write(&F2FS_I(inode)->i_sem); page = init_inode_metadata(inode, dir, name); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; } de = &dentry_blk->dentry[bit_pos]; de->hash_code = dentry_hash; de->name_len = cpu_to_le16(namelen); memcpy(dentry_blk->filename[bit_pos], name->name, name->len); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); for (i = 0; i < slots; i++) test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); set_page_dirty(dentry_page); /* we don't need to mark_inode_dirty now */ F2FS_I(inode)->i_pino = dir->i_ino; update_inode(inode, page); f2fs_put_page(page, 1); update_parent_metadata(dir, inode, current_depth); fail: up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode_page(dir); clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; }
int f2fs_setxattr(struct inode *inode, int name_index, const char *name, const void *value, size_t value_len) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_header *header = NULL; struct f2fs_xattr_entry *here, *last; struct page *page; void *base_addr; int error, found, free, name_len, newsize; char *pval; if (name == NULL) return -EINVAL; name_len = strlen(name); if (value == NULL) value_len = 0; if (name_len > 255 || value_len > MAX_VALUE_LEN) return -ERANGE; mutex_lock_op(sbi, NODE_NEW); if (!fi->i_xattr_nid) { /* Allocate new attribute block */ struct dnode_of_data dn; if (!alloc_nid(sbi, &fi->i_xattr_nid)) { mutex_unlock_op(sbi, NODE_NEW); return -ENOSPC; } set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); mark_inode_dirty(inode); page = new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(page)) { alloc_nid_failed(sbi, fi->i_xattr_nid); fi->i_xattr_nid = 0; mutex_unlock_op(sbi, NODE_NEW); return PTR_ERR(page); } alloc_nid_done(sbi, fi->i_xattr_nid); base_addr = page_address(page); header = XATTR_HDR(base_addr); header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); header->h_refcount = cpu_to_le32(1); } else { /* The inode already has an extended attribute block. */ page = get_node_page(sbi, fi->i_xattr_nid); if (IS_ERR(page)) { mutex_unlock_op(sbi, NODE_NEW); return PTR_ERR(page); } base_addr = page_address(page); header = XATTR_HDR(base_addr); } if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { error = -EIO; goto cleanup; } /* find entry with wanted name. */ found = 0; list_for_each_xattr(here, base_addr) { if (here->e_name_index != name_index) continue; if (here->e_name_len != name_len) continue; if (!memcmp(here->e_name, name, name_len)) { found = 1; break; } } last = here; while (!IS_XATTR_LAST_ENTRY(last)) last = XATTR_NEXT_ENTRY(last); newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + name_len + value_len); /* 1. Check space */ if (value) { /* If value is NULL, it is remove operation. * In case of update operation, we caculate free. */ free = MIN_OFFSET - ((char *)last - (char *)header); if (found) free = free - ENTRY_SIZE(here); if (free < newsize) { error = -ENOSPC; goto cleanup; } } /* 2. Remove old entry */ if (found) { /* If entry is found, remove old entry. * If not found, remove operation is not needed. */ struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); int oldsize = ENTRY_SIZE(here); memmove(here, next, (char *)last - (char *)next); last = (struct f2fs_xattr_entry *)((char *)last - oldsize); memset(last, 0, oldsize); } /* 3. Write new entry */ if (value) { /* Before we come here, old entry is removed. * We just write new entry. */ memset(last, 0, newsize); last->e_name_index = name_index; last->e_name_len = name_len; memcpy(last->e_name, name, name_len); pval = last->e_name + name_len; memcpy(pval, value, value_len); last->e_value_size = cpu_to_le16(value_len); } set_page_dirty(page); f2fs_put_page(page, 1); if (is_inode_flag_set(fi, FI_ACL_MODE)) { inode->i_mode = fi->i_acl_mode; inode->i_ctime = CURRENT_TIME_SEC; clear_inode_flag(fi, FI_ACL_MODE); } f2fs_write_inode(inode, NULL); mutex_unlock_op(sbi, NODE_NEW); return 0; cleanup: f2fs_put_page(page, 1); mutex_unlock_op(sbi, NODE_NEW); return error; }
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, }; int dirty, err; if (!f2fs_exist_data(dn->inode)) goto clear_out; err = f2fs_reserve_block(dn, 0); if (err) return err; f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); read_inline_data(page, dn->inode_page); set_page_dirty(page); /* clear dirty state */ dirty = clear_page_dirty_for_io(page); /* write data page to try to make data consistent */ set_page_writeback(page); fio.old_blkaddr = dn->data_blkaddr; write_data_page(dn, &fio); f2fs_wait_on_page_writeback(page, DATA, true); if (dirty) inode_dec_dirty_pages(dn->inode); /* this converted inline_data should be recovered. */ set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ truncate_inline_inode(dn->inode_page, 0); clear_inline_node(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); f2fs_put_dnode(dn); return 0; } int f2fs_convert_inline_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; struct page *ipage, *page; int err = 0; if (!f2fs_has_inline_data(inode)) return 0; page = f2fs_grab_cache_page(inode->i_mapping, 0, false); if (!page) return -ENOMEM; f2fs_lock_op(sbi); ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; } set_new_dnode(&dn, inode, ipage, ipage, 0); if (f2fs_has_inline_data(inode)) err = f2fs_convert_inline_page(&dn, page); f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); f2fs_put_page(page, 1); f2fs_balance_fs(sbi, dn.node_changed); return err; } int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; struct dnode_of_data dn; int err; set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; if (!f2fs_has_inline_data(inode)) { f2fs_put_dnode(&dn); return -EAGAIN; } f2fs_bug_on(F2FS_I_SB(inode), page->index); f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); src_addr = kmap_atomic(page); dst_addr = inline_data_addr(dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); set_page_dirty(dn.inode_page); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); clear_inline_node(dn.inode_page); f2fs_put_dnode(&dn); return 0; }
static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; int err; f2fs_balance_fs(sbi); vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, page->index); if (err) { f2fs_unlock_op(sbi); goto out; } f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || !PageUptodate(page))) { unlock_page(page); err = -EFAULT; goto out; } /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) goto mapped; /* page is wholly or partially inside EOF */ if (((loff_t)(page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { unsigned offset; offset = i_size_read(inode) & ~PAGE_CACHE_MASK; zero_user_segment(page, offset, PAGE_CACHE_SIZE); } set_page_dirty(page); SetPageUptodate(page); trace_f2fs_vm_page_mkwrite(page, DATA); mapped: /* fill the page */ f2fs_wait_on_page_writeback(page, DATA); /* wait for GCed encrypted page writeback */ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); /* if gced page is attached, don't write to cold segment */ clear_cold_data(page); out: return block_page_mkwrite_return(err); }
int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; struct page *ipage; bool truncate_page = false; trace_f2fs_truncate_blocks_enter(inode, from); free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); if (lock) f2fs_lock_op(sbi); ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); goto out; } if (f2fs_has_inline_data(inode)) { if (truncate_inline_inode(ipage, from)) set_page_dirty(ipage); f2fs_put_page(ipage, 1); truncate_page = true; goto out; } set_new_dnode(&dn, inode, ipage, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; goto out; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); if (dn.ofs_in_node || IS_INODE(dn.node_page)) { truncate_data_blocks_range(&dn, count); free_from += count; } f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); out: if (lock) f2fs_unlock_op(sbi); /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from, truncate_page); trace_f2fs_truncate_blocks_exit(inode, err); return err; }
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; struct gfs2_qadata *qa; loff_t size; int ret; /* Wait if fs is frozen. This is racy so we check again later on * and retry if the fs has been frozen after the page lock has * been acquired */ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { lock_page(page); if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(page); } goto out_unlock; } ret = -ENOMEM; qa = gfs2_qadata_get(ip); if (qa == NULL) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) goto out_alloc_put; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (ret) goto out_quota_unlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; lock_page(page); ret = -EINVAL; size = i_size_read(inode); last_index = (size - 1) >> PAGE_CACHE_SHIFT; /* Check page index against inode size */ if (size == 0 || (page->index > last_index)) goto out_trans_end; ret = -EAGAIN; /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) goto out_trans_end; /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); if (ret == 0) ret = gfs2_allocate_page_backing(page); out_trans_end: if (ret) unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_alloc_put: gfs2_qadata_put(ip); out_unlock: gfs2_glock_dq(&gh); out: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); /* This check must be post dropping of transaction lock */ if (inode->i_sb->s_frozen == SB_UNFROZEN) { wait_on_page_writeback(page); } else { ret = -EAGAIN; unlock_page(page); } } return block_page_mkwrite_return(ret); }
static int ramdisk_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) { set_page_dirty(page); return 0; }
static void free_pagelist(PAGELIST_T *pagelist, int actual) { unsigned long *need_release; struct page **pages; unsigned int num_pages, i; vchiq_log_trace(vchiq_arm_log_level, "free_pagelist - %x, %d", (unsigned int)pagelist, actual); num_pages = (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / PAGE_SIZE; need_release = (unsigned long *)(pagelist->addrs + num_pages); pages = (struct page **)(pagelist->addrs + num_pages + 1); /* Deal with any partial cache lines (fragments) */ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { FRAGMENTS_T *fragments = g_fragments_base + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS); int head_bytes, tail_bytes; head_bytes = (CACHE_LINE_SIZE - pagelist->offset) & (CACHE_LINE_SIZE - 1); tail_bytes = (pagelist->offset + actual) & (CACHE_LINE_SIZE - 1); if ((actual >= 0) && (head_bytes != 0)) { if (head_bytes > actual) head_bytes = actual; memcpy((char *)page_address(pages[0]) + pagelist->offset, fragments->headbuf, head_bytes); } if ((actual >= 0) && (head_bytes < actual) && (tail_bytes != 0)) { memcpy((char *)page_address(pages[num_pages - 1]) + ((pagelist->offset + actual) & (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)), fragments->tailbuf, tail_bytes); } down(&g_free_fragments_mutex); *(FRAGMENTS_T **) fragments = g_free_fragments; g_free_fragments = fragments; up(&g_free_fragments_mutex); up(&g_free_fragments_sema); } if (*need_release) { for (i = 0; i < num_pages; i++) { if (pagelist->type != PAGELIST_WRITE) set_page_dirty(pages[i]); page_cache_release(pages[i]); } } kfree(pagelist); }
int intel_scu_ipc_write_umip(u8 *data, int len, int offset) { int i, ret = 0, offset_align; int remainder, len_align = 0; u32 dptr, sptr, cmd; u8 cs, tbl_cs = 0, *buf = NULL; Sector sect; struct block_device *bdev; char *buffer = NULL; int *holderId = NULL; int sect_no; u8 checksum; if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_CLOVERVIEW) { /* Opening the mmcblk0boot0 */ bdev = get_emmc_bdev(); if (bdev == NULL) { pr_err("%s: get_emmc failed!\n", __func__); return -ENODEV; } /* make sure the block device is open rw */ ret = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, holderId); if (ret < 0) { pr_err("%s: blk_dev_get failed!\n", __func__); return -ret; } /* get memmap of the UMIP header */ sect_no = offset / SECTOR_SIZE; remainder = offset % SECTOR_SIZE; buffer = read_dev_sector(bdev, sect_no + UMIP_HEADER_HEADROOM_SECTOR, §); /* Shouldn't need to access UMIP sector 0/1 */ if (sect_no < UMIP_TOTAL_HEADER_SECTOR_NO) { pr_err("invalid umip offset\n"); ret = -EINVAL; goto bd_put; } else if (data == NULL || buffer == NULL) { pr_err("buffer is empty\n"); ret = -ENODEV; goto bd_put; } else if (len > (SECTOR_SIZE - remainder)) { pr_err("too much data to write\n"); ret = -EINVAL; goto bd_put; } lock_page(sect.v); memcpy(buffer + remainder, data, len); checksum = calc_checksum(buffer, SECTOR_SIZE); set_page_dirty(sect.v); unlock_page(sect.v); sync_blockdev(bdev); put_dev_sector(sect); /* * Updating the checksum, sector 0 (starting from UMIP * offset 0x08), we maintains 4 bytes for tracking each of * sector changes individually. For example, the dword at * offset 0x08 is used to checksum data integrity of sector * number 2, and so on so forth. It's worthnoting that only * the first byte in each 4 bytes stores checksum. * For detail, please check CTP FAS UMIP header definition */ buffer = read_dev_sector(bdev, UMIP_HEADER_SECTOR + UMIP_HEADER_HEADROOM_SECTOR, §); if (buffer == NULL) { pr_err("buffer is empty\n"); ret = -ENODEV; goto bd_put; } lock_page(sect.v); memcpy(buffer + 4 * (sect_no - UMIP_TOTAL_HEADER_SECTOR_NO) + UMIP_START_CHKSUM_ADDR, &checksum, 1/* one byte */); /* Change UMIP prologue chksum to zero */ *(buffer + UMIP_HEADER_CHKSUM_ADDR) = 0; for (i = 0; i < UMIP_TOTAL_CHKSUM_ENTRY; i++) { tbl_cs ^= *(u8 *)(buffer + 4 * i + UMIP_START_CHKSUM_ADDR); } /* Finish up with re-calcuating UMIP prologue checksum */ cs = dword_to_byte_chksum(xorblock((u32 *)buffer, SECTOR_SIZE)); *(buffer + UMIP_HEADER_CHKSUM_ADDR) = tbl_cs ^ cs; set_page_dirty(sect.v); unlock_page(sect.v); sync_blockdev(bdev); bd_put: if (buffer) put_dev_sector(sect); blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return ret; } else { if (!intel_mip_base) return -ENODEV; if (offset + len > IPC_MIP_MAX_ADDR) return -EINVAL; rpmsg_global_lock(); offset_align = offset & (~0x3); len_align = (len + (offset - offset_align) + 3) & (~0x3); if (len != len_align) { buf = kzalloc(len_align, GFP_KERNEL); if (!buf) { pr_err("Alloc memory failed\n"); ret = -ENOMEM; goto fail; } ret = read_mip(buf, len_align, offset_align, 0); if (ret) goto fail; memcpy(buf + offset - offset_align, data, len); } else { buf = data; } dptr = offset_align; sptr = len_align / 4; cmd = IPC_CMD_UMIP_WR << 12 | IPCMSG_MIP_ACCESS; memcpy(intel_mip_base, buf, len_align); ret = rpmsg_send_raw_command(mip_instance, cmd, 0, NULL, NULL, 0, 0, sptr, dptr); fail: if (buf && len_align != len) kfree(buf); rpmsg_global_unlock(); return ret; } }
static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct file *filp, struct f2fs_defragment *range) { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; struct extent_info ei; pgoff_t pg_start, pg_end; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; block_t blk_end = 0; bool fragmented = false; int err; /* if in-place-update policy is enabled, don't waste time here */ if (need_inplace_update(inode)) return -EINVAL; pg_start = range->start >> PAGE_CACHE_SHIFT; pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT; f2fs_balance_fs(sbi, true); inode_lock(inode); /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, range->start + range->len - 1); if (err) goto out; /* * lookup mapping info in extent cache, skip defragmenting if physical * block addresses are continuous. */ if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) { if (ei.fofs + ei.len >= pg_end) goto out; } map.m_lblk = pg_start; /* * lookup mapping info in dnode page cache, skip defragmenting if all * physical block addresses are continuous even if there are hole(s) * in logical blocks. */ while (map.m_lblk < pg_end) { map.m_len = pg_end - map.m_lblk; err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); if (err) goto out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; continue; } if (blk_end && blk_end != map.m_pblk) { fragmented = true; break; } blk_end = map.m_pblk + map.m_len; map.m_lblk += map.m_len; } if (!fragmented) goto out; map.m_lblk = pg_start; map.m_len = pg_end - pg_start; sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; /* * make sure there are enough free section for LFS allocation, this can * avoid defragment running in SSR mode when free section are allocated * intensively */ if (has_not_enough_free_secs(sbi, sec_num)) { err = -EAGAIN; goto out; } while (map.m_lblk < pg_end) { pgoff_t idx; int cnt = 0; do_map: map.m_len = pg_end - map.m_lblk; err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ); if (err) goto clear_out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { map.m_lblk++; continue; } set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { struct page *page; page = get_lock_data_page(inode, idx, true); if (IS_ERR(page)) { err = PTR_ERR(page); goto clear_out; } set_page_dirty(page); f2fs_put_page(page, 1); idx++; cnt++; total++; } map.m_lblk = idx; if (idx < pg_end && cnt < blk_per_seg) goto do_map; clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); err = filemap_fdatawrite(inode->i_mapping); if (err) goto out; } clear_out: clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); out: inode_unlock(inode); if (!err) range->len = (u64)total << PAGE_CACHE_SHIFT; return err; }
static inline int write_all_xattrs(struct inode *inode, __u32 hsize, void *txattr_addr, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t inline_size = 0; void *xattr_addr; struct page *xpage; nid_t new_nid = 0; int err; inline_size = inline_xattr_size(inode); if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) if (!alloc_nid(sbi, &new_nid)) return -ENOSPC; /* write to inline xattr */ if (inline_size) { struct page *page = NULL; void *inline_addr; if (ipage) { inline_addr = inline_xattr_addr(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { alloc_nid_failed(sbi, new_nid); return PTR_ERR(page); } inline_addr = inline_xattr_addr(page); f2fs_wait_on_page_writeback(page, NODE, true); } memcpy(inline_addr, txattr_addr, inline_size); f2fs_put_page(page, 1); /* no need to use xattr node block */ if (hsize <= inline_size) { err = truncate_xattr_node(inode, ipage); alloc_nid_failed(sbi, new_nid); return err; } } /* write to xattr node block */ if (F2FS_I(inode)->i_xattr_nid) { xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); return PTR_ERR(xpage); } f2fs_bug_on(sbi, new_nid); f2fs_wait_on_page_writeback(xpage, NODE, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); return PTR_ERR(xpage); } alloc_nid_done(sbi, new_nid); } xattr_addr = page_address(xpage); memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - sizeof(struct node_footer)); set_page_dirty(xpage); f2fs_put_page(xpage, 1); /* need to checkpoint during fsync */ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); return 0; }
static int __exchange_data_block(struct inode *inode, pgoff_t src, pgoff_t dst, bool full) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; block_t new_addr; bool do_replace = false; int ret; set_new_dnode(&dn, inode, NULL, NULL, 0); ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { new_addr = NULL_ADDR; } else { new_addr = dn.data_blkaddr; if (!is_checkpointed_data(sbi, new_addr)) { dn.data_blkaddr = NULL_ADDR; /* do not invalidate this block address */ set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); do_replace = true; } f2fs_put_dnode(&dn); } if (new_addr == NULL_ADDR) return full ? truncate_hole(inode, dst, dst + 1) : 0; if (do_replace) { struct page *ipage = get_node_page(sbi, inode->i_ino); struct node_info ni; if (IS_ERR(ipage)) { ret = PTR_ERR(ipage); goto err_out; } set_new_dnode(&dn, inode, ipage, NULL, 0); ret = f2fs_reserve_block(&dn, dst); if (ret) goto err_out; truncate_data_blocks_range(&dn, 1); get_node_info(sbi, dn.nid, &ni); f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, ni.version, true); f2fs_put_dnode(&dn); } else { struct page *psrc, *pdst; psrc = get_lock_data_page(inode, src, true); if (IS_ERR(psrc)) return PTR_ERR(psrc); pdst = get_new_data_page(inode, NULL, dst, false); if (IS_ERR(pdst)) { f2fs_put_page(psrc, 1); return PTR_ERR(pdst); } f2fs_copy_page(psrc, pdst); set_page_dirty(pdst); f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); return truncate_hole(inode, src, src + 1); } return 0; err_out: if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) { dn.data_blkaddr = new_addr; set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); f2fs_put_dnode(&dn); } return ret; }
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; loff_t size; int ret; sb_start_pagefault(inode->i_sb); /* Update file times before taking page lock */ file_update_time(vma->vm_file); ret = get_write_access(inode); if (ret) goto out; ret = gfs2_rs_alloc(ip); if (ret) goto out_write_access; gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { lock_page(page); if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(page); } goto out_unlock; } ret = gfs2_rindex_update(sdp); if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_quota_lock_check(ip, &ap); if (ret) goto out_unlock; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; lock_page(page); ret = -EINVAL; size = i_size_read(inode); last_index = (size - 1) >> PAGE_CACHE_SHIFT; /* Check page index against inode size */ if (size == 0 || (page->index > last_index)) goto out_trans_end; ret = -EAGAIN; /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) goto out_trans_end; /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); if (ret == 0) ret = gfs2_allocate_page_backing(page); out_trans_end: if (ret) unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } out_write_access: put_write_access(inode); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, }; /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * * There is no need to get a lock here unless we should be updating * atime. We ignore any locking errors since the only consequence is * a missed atime update (which will just be deferred until later). * * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; /* grab lock to update inode */ gfs2_glock_dq_uninit(&i_gh); file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; return 0; } /** * gfs2_open_common - This is common to open and atomic_open * @inode: The inode being opened * @file: The file being opened * * This maybe called under a glock or not depending upon how it has * been called. We must always be called under a glock for regular * files, however. For other file types, it does not matter whether * we hold the glock or not. * * Returns: Error code or 0 for success */ int gfs2_open_common(struct inode *inode, struct file *file) { struct gfs2_file *fp; int ret; if (S_ISREG(inode->i_mode)) { ret = generic_file_open(inode, file); if (ret) return ret; } fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); if (!fp) return -ENOMEM; mutex_init(&fp->f_fl_mutex); gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; return 0; } /** * gfs2_open - open a file * @inode: the inode to open * @file: the struct file for this opening * * After atomic_open, this function is only used for opening files * which are already cached. We must still get the glock for regular * files to ensure that we have the file size uptodate for the large * file check which is in the common code. That is only an issue for * regular files though. * * Returns: errno */ static int gfs2_open(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; int error; bool need_unlock = false; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; need_unlock = true; } error = gfs2_open_common(inode, file); if (need_unlock) gfs2_glock_dq_uninit(&i_gh); return error; } /** * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); kfree(file->private_data); file->private_data = NULL; if (!(file->f_mode & FMODE_WRITE)) return 0; gfs2_rs_delete(ip, &inode->i_writecount); return 0; } /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry * @start: the start position in the file to sync * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * * We split the data flushing here so that we don't wait for the data * until after we've also sent the metadata to disk. Note that for * data=ordered, we will write & wait for the data at the log flush * stage anyway, so this is unlikely to make much of a difference * except in the data=writeback case. * * If the fdatawrite fails due to any reason except -EIO, we will * continue the remainder of the fsync, although we'll still report * the error at the end. This is to match filemap_write_and_wait_range() * behaviour. * * Returns: errno */ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; int sync_state = inode->i_state & I_DIRTY_ALL; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; if (mapping->nrpages) { ret1 = filemap_fdatawrite_range(mapping, start, end); if (ret1 == -EIO) return ret1; } if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME); if (sync_state) { ret = sync_inode_metadata(inode, 1); if (ret) return ret; if (gfs2_is_jdata(ip)) filemap_write_and_wait(mapping); gfs2_ail_flush(ip->i_gl, 1); } if (mapping->nrpages) ret = filemap_fdatawait_range(mapping, start, end); return ret ? ret : ret1; } /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments * @pos: The file position * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong * offset. There is still a race, but provided the app is using its * own file locking, this will make O_APPEND work as expected. * */ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; ret = gfs2_rs_alloc(ip); if (ret) return ret; gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (iocb->ki_flags & IOCB_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; gfs2_glock_dq_uninit(&gh); } return generic_file_write_iter(iocb, from); } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, int mode) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; unsigned int nr_blks; sector_t lblock = offset >> inode->i_blkbits; error = gfs2_meta_inode_buffer(ip, &dibh); if (unlikely(error)) return error; gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) { error = gfs2_unstuff_dinode(ip, NULL); if (unlikely(error)) goto out; } while (len) { struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; bh_map.b_size = len; set_buffer_zeronew(&bh_map); error = gfs2_block_map(inode, lblock, &bh_map, 1); if (unlikely(error)) goto out; len -= bh_map.b_size; nr_blks = bh_map.b_size >> inode->i_blkbits; lblock += nr_blks; if (!buffer_new(&bh_map)) continue; if (unlikely(!buffer_zeronew(&bh_map))) { error = -EIO; goto out; } } out: brelse(dibh); return error; } /** * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of * blocks, determine how many bytes can be written. * @ip: The inode in question. * @len: Max cap of bytes. What we return in *len must be <= this. * @data_blocks: Compute and return the number of data blocks needed * @ind_blocks: Compute and return the number of indirect blocks needed * @max_blocks: The total blocks available to work with. * * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. */ static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, unsigned int *data_blocks, unsigned int *ind_blocks, unsigned int max_blocks) { loff_t max = *len; const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; if (*len > max) { *len = max; gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); } } static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes, max_blks = UINT_MAX; int error; const loff_t pos = offset; const loff_t count = len; loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; offset &= bsize_mask; len = next - offset; bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; if (!bytes) bytes = UINT_MAX; bytes &= bsize_mask; if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; gfs2_size_hint(file, offset, len); gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks); ap.min_target = data_blocks + ind_blocks; while (len > 0) { if (len < bytes) bytes = len; if (!gfs2_write_alloc_required(ip, offset, bytes)) { len -= bytes; offset += bytes; continue; } /* We need to determine how many bytes we can actually * fallocate without exceeding quota or going over the * end of the fs. We start off optimistically by assuming * we can write max_bytes */ max_bytes = (len > max_chunk_size) ? max_chunk_size : len; /* Since max_bytes is most likely a theoretical max, we * calculate a more realistic 'bytes' to serve as a good * starting point for the number of bytes we may be able * to write */ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; error = gfs2_quota_lock_check(ip, &ap); if (error) return error; /* ap.allowed tells us how many blocks quota will allow * us to write. Check if this reduces max_blks */ if (ap.allowed && ap.allowed < max_blks) max_blks = ap.allowed; error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; /* check if the selected rgrp limits our max_blks further */ if (ap.allowed && ap.allowed < max_blks) max_blks = ap.allowed; /* Almost done. Calculate bytes that can be written using * max_blks. We also recompute max_bytes, data_blocks and * ind_blocks */ calc_max_reserv(ip, &max_bytes, &data_blocks, &ind_blocks, max_blks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); if (error) goto out_trans_fail; error = fallocate_chunk(inode, offset, max_bytes, mode); gfs2_trans_end(sdp); if (error) goto out_trans_fail; len -= max_bytes; offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); } if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) { i_size_write(inode, pos + count); /* Marks the inode as dirty */ file_update_time(file); } return generic_write_sync(file, pos, count); out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); return error; } static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; if (!(mode & FALLOC_FL_KEEP_SIZE) && (offset + len) > inode->i_size) { ret = inode_newsize_ok(inode, offset + len); if (ret) goto out_unlock; } ret = get_write_access(inode); if (ret) goto out_unlock; ret = gfs2_rs_alloc(ip); if (ret) goto out_putw; ret = __gfs2_fallocate(file, mode, offset, len); if (ret) gfs2_rs_deltree(ip->i_res); out_putw: put_write_access(inode); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); mutex_unlock(&inode->i_mutex); return ret; } static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { int error; struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); error = gfs2_rs_alloc(ip); if (error) return (ssize_t)error; gfs2_size_hint(out, *ppos, len); return iter_file_splice_write(pipe, out, ppos, len, flags); } #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait * @fl: type and range of lock * * Returns: errno */ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (cmd == F_CANCELLK) { /* Hack: */ cmd = F_SETLK; fl->fl_type = F_UNLCK; } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { if (fl->fl_type == F_UNLCK) locks_lock_file_wait(file, fl); return -EIO; } if (IS_GETLK(cmd)) return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); else return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); } static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; int flags; int error = 0; int sleeptime; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT; mutex_lock(&fp->f_fl_mutex); gl = fl_gh->gh_gl; if (gl) { if (fl_gh->gh_state == state) goto out; locks_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else {
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; bool need_cp = false; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; if (unlikely(f2fs_readonly(inode->i_sb))) return 0; trace_f2fs_sync_file_enter(inode); /* if fdatasync is triggered, let's do in-place-update */ if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(fi, FI_NEED_IPU); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); clear_inode_flag(fi, FI_NEED_IPU); if (ret) { trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; } /* if the inode is dirty, let's recover all the time */ if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { update_inode_page(inode); goto go_write; } /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { /* it may call write_inode just prior to fsync */ if (need_inode_page_update(sbi, ino)) goto go_write; if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; } go_write: /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); /* * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ down_read(&fi->i_sem); need_cp = need_do_checkpoint(inode); up_read(&fi->i_sem); if (need_cp) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); /* * We've secured consistency through sync_fs. Following pino * will be used only for fsynced inodes after checkpoint. */ try_to_fix_pino(inode); clear_inode_flag(fi, FI_APPEND_WRITE); clear_inode_flag(fi, FI_UPDATE_WRITE); goto out; } sync_nodes: sync_node_pages(sbi, ino, &wbc); /* if cp_error was enabled, we should avoid infinite loop */ if (unlikely(f2fs_cp_error(sbi))) goto out; if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); goto sync_nodes; } ret = wait_on_node_pages_writeback(sbi, ino); if (ret) goto out; /* once recovery info is written, don't need to tack this */ remove_dirty_inode(sbi, ino, APPEND_INO); clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: remove_dirty_inode(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); f2fs_trace_ios(NULL, 1); return ret; } static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { struct pagevec pvec; int nr_pages; if (whence != SEEK_DATA) return 0; /* find first dirty page index */ pagevec_init(&pvec, 0); nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX; pagevec_release(&pvec); return pgofs; } static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, int whence) { switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) return true; break; case SEEK_HOLE: if (blkaddr == NULL_ADDR) return true; break; } return false; } static inline int unsigned_offsets(struct file *file) { return file->f_mode & FMODE_UNSIGNED_OFFSET; } static loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) { if (offset < 0 && !unsigned_offsets(file)) return -EINVAL; if (offset > maxsize) return -EINVAL; if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; } return offset; } static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes = inode->i_sb->s_maxbytes; struct dnode_of_data dn; pgoff_t pgofs, end_offset, dirty; loff_t data_ofs = offset; loff_t isize; int err = 0; mutex_lock(&inode->i_mutex); isize = i_size_read(inode); if (offset >= isize) goto fail; /* handle inline data case */ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; } pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT); dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence); for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { /* direct node does not exists */ if (whence == SEEK_DATA) { pgofs = PGOFS_OF_NEXT_DNODE(pgofs, F2FS_I(inode)); continue; } else { goto found; } } end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) { block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); if (__found_offset(blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } } f2fs_put_dnode(&dn); } if (whence == SEEK_DATA) goto fail; found: if (whence == SEEK_HOLE && data_ofs > isize) data_ofs = isize; mutex_unlock(&inode->i_mutex); return vfs_setpos(file, data_ofs, maxbytes); fail: mutex_unlock(&inode->i_mutex); return -ENXIO; } static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes = inode->i_sb->s_maxbytes; switch (whence) { case SEEK_SET: case SEEK_CUR: case SEEK_END: return generic_file_llseek_size(file, offset, whence, maxbytes); case SEEK_DATA: case SEEK_HOLE: if (offset < 0) return -ENXIO; return f2fs_seek_block(file, offset, whence); } return -EINVAL; } static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); /* we don't need to use inline_data strictly */ if (f2fs_has_inline_data(inode)) { int err = f2fs_convert_inline_inode(inode); if (err) return err; } file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; } int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; __le32 *addr; raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + ofs; for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); if (blkaddr == NULL_ADDR) continue; dn->data_blkaddr = NULL_ADDR; set_data_blkaddr(dn); f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(F2FS_I(dn->inode), FI_FIRST_BLOCK_WRITTEN); nr_free++; } if (nr_free) { dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } dn->ofs_in_node = ofs; trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, dn->ofs_in_node, nr_free); return nr_free; }