/* * For a given allocation, determine which allocators will need to be * accessed, and lock them, reserving the appropriate number of bits. * * Called from ocfs2_extend_allocation() for file systems which don't * support holes, and from ocfs2_write() for file systems which * understand sparse inodes. */ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, u32 clusters_to_add, struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **meta_ac) { int ret, num_free_extents; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); *meta_ac = NULL; *data_ac = NULL; mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " "clusters_to_add = %u\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), le32_to_cpu(di->i_clusters), clusters_to_add); num_free_extents = ocfs2_num_free_extents(osb, inode, di); if (num_free_extents < 0) { ret = num_free_extents; mlog_errno(ret); goto out; } /* * Sparse allocation file systems need to be more conservative * with reserving room for expansion - the actual allocation * happens while we've got a journal handle open so re-taking * a cluster lock (because we ran out of room for another * extent) will violate ordering rules. * * Most of the time we'll only be seeing this 1 cluster at a time * anyway. */ if (!num_free_extents || (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } } ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } out: if (ret) { if (*meta_ac) { ocfs2_free_alloc_context(*meta_ac); *meta_ac = NULL; } /* * We cannot have an error and a non null *data_ac. */ } return ret; }
/* * TODO: Make this into a generic get_blocks function. * * From do_direct_io in direct-io.c: * "So what we do is to permit the ->get_blocks function to populate * bh.b_size with the size of IO which is permitted at this offset and * this i_blkbits." * * This function is called directly from get_more_blocks in direct-io.c. * * called like this: dio->get_blocks(dio->inode, fs_startblk, * fs_count, map_bh, dio->rw == WRITE); */ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret; u64 p_blkno, inode_blocks, contig_blocks; unsigned int ext_flags; unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; /* This function won't even be called if the request isn't all * nicely aligned and of the right size, so there's no need * for us to check any of that. */ inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); /* * Any write past EOF is not allowed because we'd be extending. */ if (create && (iblock + max_blocks) > inode_blocks) { ret = -EIO; goto bail; } /* This figures out the size of the next contiguous block, and * our logical offset */ ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &contig_blocks, &ext_flags); if (ret) { mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", (unsigned long long)iblock); ret = -EIO; goto bail; } if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { ocfs2_error(inode->i_sb, "Inode %llu has a hole at block %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)iblock); ret = -EROFS; goto bail; } /* * get_more_blocks() expects us to describe a hole by clearing * the mapped bit on bh_result(). * * Consider an unwritten extent as a hole. */ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) map_bh(bh_result, inode->i_sb, p_blkno); else { /* * ocfs2_prepare_inode_for_write() should have caught * the case where we'd be filling a hole and triggered * a buffered write instead. */ if (create) { ret = -EIO; mlog_errno(ret); goto bail; } clear_buffer_mapped(bh_result); } /* make sure we don't map more than max_blocks blocks here as that's all the kernel will handle at this point. */ if (max_blocks < contig_blocks) contig_blocks = max_blocks; bh_result->b_size = contig_blocks << blocksize_bits; bail: return ret; }
static int ocfs2_prepare_inode_for_write(struct dentry *dentry, loff_t *ppos, size_t count, int appending, int *direct_io) { int ret = 0, meta_level = appending; struct inode *inode = dentry->d_inode; u32 clusters; loff_t newsize, saved_pos; /* * We sample i_size under a read level meta lock to see if our write * is extending the file, if it is we back off and get a write level * meta lock. */ for(;;) { ret = ocfs2_meta_lock(inode, NULL, meta_level); if (ret < 0) { meta_level = -1; mlog_errno(ret); goto out; } /* Clear suid / sgid if necessary. We do this here * instead of later in the write path because * remove_suid() calls ->setattr without any hint that * we may have already done our cluster locking. Since * ocfs2_setattr() *must* take cluster locks to * proceeed, this will lead us to recursively lock the * inode. There's also the dinode i_size state which * can be lost via setattr during extending writes (we * set inode->i_size at the end of a write. */ if (should_remove_suid(dentry)) { if (meta_level == 0) { ocfs2_meta_unlock(inode, meta_level); meta_level = 1; continue; } ret = ocfs2_write_remove_suid(inode); if (ret < 0) { mlog_errno(ret); goto out_unlock; } } /* work on a copy of ppos until we're sure that we won't have * to recalculate it due to relocking. */ if (appending) { saved_pos = i_size_read(inode); mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); } else { saved_pos = *ppos; } if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { loff_t end = saved_pos + count; /* * Skip the O_DIRECT checks if we don't need * them. */ if (!direct_io || !(*direct_io)) break; /* * Allowing concurrent direct writes means * i_size changes wouldn't be synchronized, so * one node could wind up truncating another * nodes writes. */ if (end > i_size_read(inode)) { *direct_io = 0; break; } /* * We don't fill holes during direct io, so * check for them here. If any are found, the * caller will have to retake some cluster * locks and initiate the io as buffered. */ ret = ocfs2_check_range_for_holes(inode, saved_pos, count); if (ret == 1) { *direct_io = 0; ret = 0; } else if (ret < 0) mlog_errno(ret); break; } /* * The rest of this loop is concerned with legacy file * systems which don't support sparse files. */ newsize = count + saved_pos; mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", (long long) saved_pos, (long long) newsize, (long long) i_size_read(inode)); /* No need for a higher level metadata lock if we're * never going past i_size. */ if (newsize <= i_size_read(inode)) break; if (meta_level == 0) { ocfs2_meta_unlock(inode, meta_level); meta_level = 1; continue; } spin_lock(&OCFS2_I(inode)->ip_lock); clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - OCFS2_I(inode)->ip_clusters; spin_unlock(&OCFS2_I(inode)->ip_lock); mlog(0, "Writing at EOF, may need more allocation: " "i_size = %lld, newsize = %lld, need %u clusters\n", (long long) i_size_read(inode), (long long) newsize, clusters); /* We only want to continue the rest of this loop if * our extend will actually require more * allocation. */ if (!clusters) break; ret = ocfs2_extend_file(inode, NULL, newsize, count); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out_unlock; } break; } if (appending) *ppos = saved_pos; out_unlock: ocfs2_meta_unlock(inode, meta_level); out: return ret; }
static int ocfs2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = 0; unsigned int ext_flags; u64 max_blocks = bh_result->b_size >> inode->i_blkbits; u64 p_blkno, count, past_eof; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, (unsigned long long)iblock, bh_result, create); if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", inode, inode->i_ino); if (S_ISLNK(inode->i_mode)) { /* this always does I/O for some reason. */ err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); goto bail; } err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, &ext_flags); if (err) { mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " "%llu, NULL)\n", err, inode, (unsigned long long)iblock, (unsigned long long)p_blkno); goto bail; } if (max_blocks < count) count = max_blocks; /* * ocfs2 never allocates in this function - the only time we * need to use BH_New is when we're extending i_size on a file * system which doesn't support holes, in which case BH_New * allows block_prepare_write() to zero. * * If we see this on a sparse file system, then a truncate has * raced us and removed the cluster. In this case, we clear * the buffers dirty and uptodate bits and let the buffer code * ignore it as a hole. */ if (create && p_blkno == 0 && ocfs2_sparse_alloc(osb)) { clear_buffer_dirty(bh_result); clear_buffer_uptodate(bh_result); goto bail; } /* Treat the unwritten extent as a hole for zeroing purposes. */ if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) map_bh(bh_result, inode->i_sb, p_blkno); bh_result->b_size = count << inode->i_blkbits; if (!ocfs2_sparse_alloc(osb)) { if (p_blkno == 0) { err = -EIO; mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n", (unsigned long long)iblock, (unsigned long long)p_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno); mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); dump_stack(); } past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, (unsigned long long)past_eof); if (create && (iblock >= past_eof)) set_buffer_new(bh_result); } bail: if (err < 0) err = -EIO; mlog_exit(err); return err; }