/* * Will look for holes and unwritten extents in the range starting at * pos for count bytes (inclusive). */ static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos, size_t count) { int ret = 0; unsigned int extent_flags; u32 cpos, clusters, extent_len, phys_cpos; struct super_block *sb = inode->i_sb; cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; while (clusters) { ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, &extent_flags); if (ret < 0) { mlog_errno(ret); goto out; } if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) { ret = 1; break; } if (extent_len > clusters) extent_len = clusters; clusters -= extent_len; cpos += extent_len; } out: return ret; }
static int __ocfs2_move_extents_range(struct buffer_head *di_bh, struct ocfs2_move_extents_context *context) { int ret = 0, flags, do_defrag, skip = 0; u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; struct inode *inode = context->inode; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_move_extents *range = context->range; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if ((inode->i_size == 0) || (range->me_len == 0)) return 0; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) return 0; context->refcount_loc = le64_to_cpu(di->i_refcount_loc); ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); ocfs2_init_dealloc_ctxt(&context->dealloc); /* * TO-DO XXX: * * - xattr extents. */ do_defrag = context->auto_defrag; /* * extents moving happens in unit of clusters, for the sake * of simplicity, we may ignore two clusters where 'byte_start' * and 'byte_start + len' were within. */ move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); len_to_move = (range->me_start + range->me_len) >> osb->s_clustersize_bits; if (len_to_move >= move_start) len_to_move -= move_start; else len_to_move = 0; if (do_defrag) { defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; if (defrag_thresh <= 1) goto done; } else
/* * A tail_to_skip value > 0 indicates that we're being called from * ocfs2_file_aio_write(). This has the following implications: * * - we don't want to update i_size * - di_bh will be NULL, which is fine because it's only used in the * case where we want to update i_size. * - ocfs2_zero_extend() will then only be filling the hole created * between i_size and the start of the write. */ static int ocfs2_extend_file(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size, size_t tail_to_skip) { int ret = 0; u32 clusters_to_add = 0; BUG_ON(!tail_to_skip && !di_bh); /* setattr sometimes calls us like this. */ if (new_i_size == 0) goto out; if (i_size_read(inode) == new_i_size) goto out; BUG_ON(new_i_size < i_size_read(inode)); if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { BUG_ON(tail_to_skip != 0); goto out_update_size; } clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - OCFS2_I(inode)->ip_clusters; /* * protect the pages that ocfs2_zero_extend is going to be * pulling into the page cache.. we do this before the * metadata extend so that we don't get into the situation * where we've extended the metadata but can't get the data * lock to zero. */ ret = ocfs2_data_lock(inode, 1); if (ret < 0) { mlog_errno(ret); goto out; } if (clusters_to_add) { ret = ocfs2_extend_allocation(inode, clusters_to_add); if (ret < 0) { mlog_errno(ret); goto out_unlock; } } /* * Call this even if we don't add any clusters to the tree. We * still need to zero the area between the old i_size and the * new i_size. */ ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); if (ret < 0) { mlog_errno(ret); goto out_unlock; } out_update_size: if (!tail_to_skip) { /* We're being called from ocfs2_setattr() which wants * us to update i_size */ ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); if (ret < 0) mlog_errno(ret); } out_unlock: if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) ocfs2_data_unlock(inode, 1); out: return ret; }
static int ocfs2_prepare_inode_for_write(struct dentry *dentry, loff_t *ppos, size_t count, int appending, int *direct_io) { int ret = 0, meta_level = appending; struct inode *inode = dentry->d_inode; u32 clusters; loff_t newsize, saved_pos; /* * We sample i_size under a read level meta lock to see if our write * is extending the file, if it is we back off and get a write level * meta lock. */ for(;;) { ret = ocfs2_meta_lock(inode, NULL, meta_level); if (ret < 0) { meta_level = -1; mlog_errno(ret); goto out; } /* Clear suid / sgid if necessary. We do this here * instead of later in the write path because * remove_suid() calls ->setattr without any hint that * we may have already done our cluster locking. Since * ocfs2_setattr() *must* take cluster locks to * proceeed, this will lead us to recursively lock the * inode. There's also the dinode i_size state which * can be lost via setattr during extending writes (we * set inode->i_size at the end of a write. */ if (should_remove_suid(dentry)) { if (meta_level == 0) { ocfs2_meta_unlock(inode, meta_level); meta_level = 1; continue; } ret = ocfs2_write_remove_suid(inode); if (ret < 0) { mlog_errno(ret); goto out_unlock; } } /* work on a copy of ppos until we're sure that we won't have * to recalculate it due to relocking. */ if (appending) { saved_pos = i_size_read(inode); mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); } else { saved_pos = *ppos; } if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { loff_t end = saved_pos + count; /* * Skip the O_DIRECT checks if we don't need * them. */ if (!direct_io || !(*direct_io)) break; /* * Allowing concurrent direct writes means * i_size changes wouldn't be synchronized, so * one node could wind up truncating another * nodes writes. */ if (end > i_size_read(inode)) { *direct_io = 0; break; } /* * We don't fill holes during direct io, so * check for them here. If any are found, the * caller will have to retake some cluster * locks and initiate the io as buffered. */ ret = ocfs2_check_range_for_holes(inode, saved_pos, count); if (ret == 1) { *direct_io = 0; ret = 0; } else if (ret < 0) mlog_errno(ret); break; } /* * The rest of this loop is concerned with legacy file * systems which don't support sparse files. */ newsize = count + saved_pos; mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", (long long) saved_pos, (long long) newsize, (long long) i_size_read(inode)); /* No need for a higher level metadata lock if we're * never going past i_size. */ if (newsize <= i_size_read(inode)) break; if (meta_level == 0) { ocfs2_meta_unlock(inode, meta_level); meta_level = 1; continue; } spin_lock(&OCFS2_I(inode)->ip_lock); clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - OCFS2_I(inode)->ip_clusters; spin_unlock(&OCFS2_I(inode)->ip_lock); mlog(0, "Writing at EOF, may need more allocation: " "i_size = %lld, newsize = %lld, need %u clusters\n", (long long) i_size_read(inode), (long long) newsize, clusters); /* We only want to continue the rest of this loop if * our extend will actually require more * allocation. */ if (!clusters) break; ret = ocfs2_extend_file(inode, NULL, newsize, count); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out_unlock; } break; } if (appending) *ppos = saved_pos; out_unlock: ocfs2_meta_unlock(inode, meta_level); out: return ret; }