static char *ocfs2_fast_symlink_getlink(struct inode *inode, struct buffer_head **bh) { int status; char *link = NULL; struct ocfs2_dinode *fe; mlog_entry_void(); status = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, bh, OCFS2_BH_CACHED, inode); if (status < 0) { mlog_errno(status); link = ERR_PTR(status); goto bail; } fe = (struct ocfs2_dinode *) (*bh)->b_data; link = (char *) fe->id2.i_symlink; bail: mlog_exit(status); return link; }
static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, u64 gd_blkno, u16 *bits_left) { int ret; u16 found; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *gd; struct inode *alloc_inode = ac->ac_inode; ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, &group_bh, OCFS2_BH_CACHED, alloc_inode); if (ret < 0) { mlog_errno(ret); return ret; } gd = (struct ocfs2_group_desc *) group_bh->b_data; if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); ret = -EIO; goto out; } ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, bit_off, &found); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } *num_bits = found; ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, *num_bits, le16_to_cpu(gd->bg_chain)); if (ret < 0) { mlog_errno(ret); goto out; } ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, *bit_off, *num_bits); if (ret < 0) mlog_errno(ret); *bits_left = le16_to_cpu(gd->bg_free_bits_count); out: brelse(group_bh); return ret; }
int ocfs2_init_slot_info(struct ocfs2_super *osb) { int status, i; u64 blkno; struct inode *inode = NULL; struct buffer_head *bh = NULL; struct ocfs2_slot_info *si; si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL); if (!si) { status = -ENOMEM; mlog_errno(status); goto bail; } spin_lock_init(&si->si_lock); si->si_num_slots = osb->max_slots; si->si_size = OCFS2_MAX_SLOTS; for(i = 0; i < si->si_num_slots; i++) si->si_global_node_nums[i] = OCFS2_INVALID_SLOT; inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } status = ocfs2_extent_map_get_blocks(inode, 0ULL, 1, &blkno, NULL); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_read_block(osb, blkno, &bh, 0, inode); if (status < 0) { mlog_errno(status); goto bail; } si->si_inode = inode; si->si_bh = bh; osb->slot_info = si; bail: if (status < 0 && si) ocfs2_free_slot_info(si); return status; }
static int ocfs2_write_remove_suid(struct inode *inode) { int ret; struct buffer_head *bh = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di; mlog_entry("(Inode %llu, mode 0%o)\n", (unsigned long long)oi->ip_blkno, inode->i_mode); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (handle == NULL) { ret = -ENOMEM; mlog_errno(ret); goto out; } ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); if (ret < 0) { mlog_errno(ret); goto out_trans; } ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_bh; } inode->i_mode &= ~S_ISUID; if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP)) inode->i_mode &= ~S_ISGID; di = (struct ocfs2_dinode *) bh->b_data; di->i_mode = cpu_to_le16(inode->i_mode); ret = ocfs2_journal_dirty(handle, bh); if (ret < 0) mlog_errno(ret); out_bh: brelse(bh); out_trans: ocfs2_commit_trans(osb, handle); out: mlog_exit(ret); return ret; }
/* * TODO: this should probably be merged into ocfs2_get_block * * However, you now need to pay attention to the cont_prepare_write() * stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much * expects never to extend). */ struct buffer_head *ocfs2_bread(struct inode *inode, int block, int *err, int reada) { struct buffer_head *bh = NULL; int tmperr; u64 p_blkno; int readflags = OCFS2_BH_CACHED; if (reada) readflags |= OCFS2_BH_READAHEAD; if (((u64)block << inode->i_sb->s_blocksize_bits) >= i_size_read(inode)) { BUG_ON(!reada); return NULL; } down_read(&OCFS2_I(inode)->ip_alloc_sem); tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); up_read(&OCFS2_I(inode)->ip_alloc_sem); if (tmperr < 0) { mlog_errno(tmperr); goto fail; } tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh, readflags, inode); if (tmperr < 0) goto fail; tmperr = 0; *err = 0; return bh; fail: if (bh) { brelse(bh); bh = NULL; } *err = -EIO; return NULL; }
static int ocfs2_readpage_inline(struct inode *inode, struct page *page) { int ret; struct buffer_head *di_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); BUG_ON(!PageLocked(page)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, OCFS2_BH_CACHED, inode); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_read_inline_data(inode, page, di_bh); out: unlock_page(page); brelse(di_bh); return ret; }
int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, u64 gd_blkno, struct buffer_head **bh) { int rc; struct buffer_head *tmp = *bh; rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp, ocfs2_validate_group_descriptor); if (rc) goto out; rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0); if (rc) { brelse(tmp); goto out; } /* If ocfs2_read_block() got us a new bh, pass it up. */ if (!*bh) *bh = tmp; out: return rc; }
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; int status; struct ocfs2_dinode *fe = NULL; struct buffer_head *bh = NULL; struct buffer_head *buffer_cache_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); void *kaddr; mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, (unsigned long long)iblock, bh_result, create); BUG_ON(ocfs2_inode_is_fast_symlink(inode)); if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { mlog(ML_ERROR, "block offset > PATH_MAX: %llu", (unsigned long long)iblock); goto bail; } status = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, &bh, OCFS2_BH_CACHED, inode); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", (unsigned long long)le64_to_cpu(fe->i_blkno), 7, fe->i_signature); goto bail; } if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { mlog(ML_ERROR, "block offset is outside the allocated size: " "%llu\n", (unsigned long long)iblock); goto bail; } /* We don't use the page cache to create symlink data, so if * need be, copy it over from the buffer cache. */ if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock; buffer_cache_bh = sb_getblk(osb->sb, blkno); if (!buffer_cache_bh) { mlog(ML_ERROR, "couldn't getblock for symlink!\n"); goto bail; } /* we haven't locked out transactions, so a commit * could've happened. Since we've got a reference on * the bh, even if it commits while we're doing the * copy, the data is still good. */ if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { kaddr = kmap_atomic(bh_result->b_page, KM_USER0); if (!kaddr) { mlog(ML_ERROR, "couldn't kmap!\n"); goto bail; } memcpy(kaddr + (bh_result->b_size * iblock), buffer_cache_bh->b_data, bh_result->b_size); kunmap_atomic(kaddr, KM_USER0); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); } map_bh(bh_result, inode->i_sb, le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); err = 0; bail: if (bh) brelse(bh); mlog_exit(err); return err; }
static int ocfs2_read_locked_inode(struct inode *inode, struct ocfs2_find_inode_args *args) { struct super_block *sb; struct ocfs2_super *osb; struct ocfs2_dinode *fe; struct buffer_head *bh = NULL; int status, can_lock; u32 generation = 0; mlog_entry("(0x%p, 0x%p)\n", inode, args); status = -EINVAL; if (inode == NULL || inode->i_sb == NULL) { mlog(ML_ERROR, "bad inode\n"); return status; } sb = inode->i_sb; osb = OCFS2_SB(sb); if (!args) { mlog(ML_ERROR, "bad inode args\n"); make_bad_inode(inode); return status; } /* * To improve performance of cold-cache inode stats, we take * the cluster lock here if possible. * * Generally, OCFS2 never trusts the contents of an inode * unless it's holding a cluster lock, so taking it here isn't * a correctness issue as much as it is a performance * improvement. * * There are three times when taking the lock is not a good idea: * * 1) During startup, before we have initialized the DLM. * * 2) If we are reading certain system files which never get * cluster locks (local alloc, truncate log). * * 3) If the process doing the iget() is responsible for * orphan dir recovery. We're holding the orphan dir lock and * can get into a deadlock with another process on another * node in ->delete_inode(). * * #1 and #2 can be simply solved by never taking the lock * here for system files (which are the only type we read * during mount). It's a heavier approach, but our main * concern is user-accesible files anyway. * * #3 works itself out because we'll eventually take the * cluster lock before trusting anything anyway. */ can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) && !ocfs2_mount_local(osb); /* * To maintain backwards compatibility with older versions of * ocfs2-tools, we still store the generation value for system * files. The only ones that actually matter to userspace are * the journals, but it's easier and inexpensive to just flag * all system files similarly. */ if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) generation = osb->fs_generation; ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, OCFS2_LOCK_TYPE_META, generation, inode); ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, OCFS2_LOCK_TYPE_OPEN, 0, inode); if (can_lock) { status = ocfs2_open_lock(inode); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } status = ocfs2_meta_lock(inode, NULL, 0); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } } if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { status = ocfs2_try_open_lock(inode, 0); if (status) { make_bad_inode(inode); return status; } } status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, can_lock ? inode : NULL); if (status < 0) { mlog_errno(status); goto bail; } status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { mlog(0, "Invalid dinode #%llu: signature = %.*s\n", (unsigned long long)args->fi_blkno, 7, fe->i_signature); goto bail; } /* * This is a code bug. Right now the caller needs to * understand whether it is asking for a system file inode or * not so the proper lock names can be built. */ mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), "Inode %llu: system file state is ambigous\n", (unsigned long long)args->fi_blkno); if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); if (ocfs2_populate_inode(inode, fe, 0) < 0) goto bail; BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); status = 0; bail: if (can_lock) ocfs2_meta_unlock(inode, 0); if (status < 0) make_bad_inode(inode); if (args && bh) brelse(bh); mlog_exit(status); return status; }
/* * Extend the filesystem to the new number of clusters specified. This entry * point is only used to extend the current filesystem to the end of the last * existing group. */ int ocfs2_group_extend(struct inode * inode, int new_clusters) { int ret; handle_t *handle; struct buffer_head *main_bm_bh = NULL; struct buffer_head *group_bh = NULL; struct inode *main_bm_inode = NULL; struct ocfs2_dinode *fe = NULL; struct ocfs2_group_desc *group = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u16 cl_bpc; u32 first_new_cluster; u64 lgd_blkno; mlog_entry_void(); if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) return -EROFS; if (new_clusters < 0) return -EINVAL; else if (new_clusters == 0) return 0; main_bm_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!main_bm_inode) { ret = -EINVAL; mlog_errno(ret); goto out; } mutex_lock(&main_bm_inode->i_mutex); ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); if (ret < 0) { mlog_errno(ret); goto out_mutex; } fe = (struct ocfs2_dinode *)main_bm_bh->b_data; if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != ocfs2_group_bitmap_size(osb->sb) * 8) { mlog(ML_ERROR, "The disk is too old and small. " "Force to do offline resize."); ret = -EINVAL; goto out_unlock; } if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); ret = -EIO; goto out_unlock; } first_new_cluster = le32_to_cpu(fe->i_clusters); lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); if (ret < 0) { mlog_errno(ret); goto out_unlock; } group = (struct ocfs2_group_desc *)group_bh->b_data; ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); if (ret) { mlog_errno(ret); goto out_unlock; } cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > le16_to_cpu(fe->id2.i_chain.cl_cpg)) { ret = -EINVAL; goto out_unlock; } mlog(0, "extend the last group at %llu, new clusters = %d\n", (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters); handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); ret = -EINVAL; goto out_unlock; } /* update the last group descriptor and inode. */ ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode, main_bm_bh, group_bh, first_new_cluster, new_clusters); if (ret) { mlog_errno(ret); goto out_commit; } ocfs2_update_super_and_backups(main_bm_inode, new_clusters); out_commit: ocfs2_commit_trans(osb, handle); out_unlock: brelse(group_bh); brelse(main_bm_bh); ocfs2_inode_unlock(main_bm_inode, 1); out_mutex: mutex_unlock(&main_bm_inode->i_mutex); iput(main_bm_inode); out: mlog_exit_void(); return ret; }
/* * We want to free the bitmap bits outside of any recovery context as * we'll need a cluster lock to do so, but we must clear the local * alloc before giving up the recovered nodes journal. To solve this, * we kmalloc a copy of the local alloc before it's change for the * caller to process with ocfs2_complete_local_alloc_recovery */ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, int slot_num, struct ocfs2_dinode **alloc_copy) { int status = 0; struct buffer_head *alloc_bh = NULL; struct inode *inode = NULL; struct ocfs2_dinode *alloc; mlog_entry("(slot_num = %d)\n", slot_num); *alloc_copy = NULL; inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, slot_num); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } mutex_lock(&inode->i_mutex); status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &alloc_bh, 0, inode); if (status < 0) { mlog_errno(status); goto bail; } *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); if (!(*alloc_copy)) { status = -ENOMEM; goto bail; } memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); alloc = (struct ocfs2_dinode *) alloc_bh->b_data; ocfs2_clear_local_alloc(alloc); status = ocfs2_write_block(osb, alloc_bh, inode); if (status < 0) mlog_errno(status); bail: if ((status < 0) && (*alloc_copy)) { kfree(*alloc_copy); *alloc_copy = NULL; } if (alloc_bh) brelse(alloc_bh); if (inode) { mutex_unlock(&inode->i_mutex); iput(inode); } mlog_exit(status); return status; }
static int ocfs2_extend_allocation(struct inode *inode, u32 clusters_to_add) { int status = 0; int restart_func = 0; int drop_alloc_sem = 0; int credits; u32 prev_clusters, logical_start; struct buffer_head *bh = NULL; struct ocfs2_dinode *fe = NULL; handle_t *handle = NULL; struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_alloc_context *meta_ac = NULL; enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); /* * This function only exists for file systems which don't * support holes. */ BUG_ON(ocfs2_sparse_alloc(osb)); status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, OCFS2_BH_CACHED, inode); if (status < 0) { mlog_errno(status); goto leave; } fe = (struct ocfs2_dinode *) bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); status = -EIO; goto leave; } logical_start = OCFS2_I(inode)->ip_clusters; restart_all: BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); /* blocks peope in read/write from reading our allocation * until we're done changing it. We depend on i_mutex to block * other extend/truncate calls while we're here. Ordering wrt * start_trans is important here -- always do it before! */ down_write(&OCFS2_I(inode)->ip_alloc_sem); drop_alloc_sem = 1; status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, &meta_ac); if (status) { mlog_errno(status); goto leave; } credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; mlog_errno(status); goto leave; } restarted_transaction: /* reserve a write to the file entry early on - that we if we * run out of credits in the allocation path, we can still * update i_size. */ status = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; } prev_clusters = OCFS2_I(inode)->ip_clusters; status = ocfs2_do_extend_allocation(osb, inode, &logical_start, clusters_to_add, bh, handle, data_ac, meta_ac, &why); if ((status < 0) && (status != -EAGAIN)) { if (status != -ENOSPC) mlog_errno(status); goto leave; } status = ocfs2_journal_dirty(handle, bh); if (status < 0) { mlog_errno(status); goto leave; } spin_lock(&OCFS2_I(inode)->ip_lock); clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); spin_unlock(&OCFS2_I(inode)->ip_lock); if (why != RESTART_NONE && clusters_to_add) { if (why == RESTART_META) { mlog(0, "restarting function.\n"); restart_func = 1; } else { BUG_ON(why != RESTART_TRANS); mlog(0, "restarting transaction.\n"); /* TODO: This can be more intelligent. */ credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); status = ocfs2_extend_trans(handle, credits); if (status < 0) { /* handle still has to be committed at * this point. */ status = -ENOMEM; mlog_errno(status); goto leave; } goto restarted_transaction; } } mlog(0, "fe: i_clusters = %u, i_size=%llu\n", le32_to_cpu(fe->i_clusters), (unsigned long long)le64_to_cpu(fe->i_size)); mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", OCFS2_I(inode)->ip_clusters, i_size_read(inode)); leave: if (drop_alloc_sem) { up_write(&OCFS2_I(inode)->ip_alloc_sem); drop_alloc_sem = 0; } if (handle) { ocfs2_commit_trans(osb, handle); handle = NULL; } if (data_ac) { ocfs2_free_alloc_context(data_ac); data_ac = NULL; } if (meta_ac) { ocfs2_free_alloc_context(meta_ac); meta_ac = NULL; } if ((!status) && restart_func) { restart_func = 0; goto restart_all; } if (bh) { brelse(bh); bh = NULL; } mlog_exit(status); return status; }
/* * expects the suballoc inode to already be locked. */ static int ocfs2_free_suballoc_bits(handle_t *handle, struct inode *alloc_inode, struct buffer_head *alloc_bh, unsigned int start_bit, u64 bg_blkno, unsigned int count) { int status = 0; u32 tmp_used; struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; struct ocfs2_chain_list *cl = &fe->id2.i_chain; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *group; mlog_entry_void(); if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); status = -EIO; goto bail; } BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, alloc_inode); if (status < 0) { mlog_errno(status); goto bail; } group = (struct ocfs2_group_desc *) group_bh->b_data; status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); if (status) { mlog_errno(status); goto bail; } BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); status = ocfs2_block_group_clear_bits(handle, alloc_inode, group, group_bh, start_bit, count); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, count); tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); status = ocfs2_journal_dirty(handle, alloc_bh); if (status < 0) { mlog_errno(status); goto bail; } bail: if (group_bh) brelse(group_bh); mlog_exit(status); return status; }
static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, u64 *bg_blkno, u16 *bits_left) { int status; u16 chain, tmp_bits; u32 tmp_used; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; struct buffer_head *prev_group_bh = NULL; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; struct ocfs2_group_desc *bg; chain = ac->ac_chain; mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", bits_wanted, chain, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), le64_to_cpu(cl->cl_recs[chain].c_blkno), &group_bh, OCFS2_BH_CACHED, alloc_inode); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); if (status) { mlog_errno(status); goto bail; } status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ while ((status = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, bit_off, &tmp_bits)) == -ENOSPC) { if (!bg->bg_next_group) break; if (prev_group_bh) { brelse(prev_group_bh); prev_group_bh = NULL; } next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), next_group, &group_bh, OCFS2_BH_CACHED, alloc_inode); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); if (status) { mlog_errno(status); goto bail; } } if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); *num_bits = tmp_bits; BUG_ON(*num_bits == 0); /* * Keep track of previous block descriptor read. When * we find a target, if we have read more than X * number of descriptors, and the target is reasonably * empty, relink him to top of his chain. * * We've read 0 extra blocks and only send one more to * the transaction, yet the next guy to search has a * much easier time. * * Do this *after* figuring out how many bits we're taking out * of our target group. */ if (ac->ac_allow_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, ac->ac_bh, group_bh, prev_group_bh, chain); if (status < 0) { mlog_errno(status); goto bail; } } /* Ok, claim our bits now: set the info on dinode, chainlist * and then the group */ status = ocfs2_journal_access(handle, alloc_inode, ac->ac_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); status = ocfs2_journal_dirty(handle, ac->ac_bh); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_block_group_set_bits(handle, alloc_inode, bg, group_bh, *bit_off, *num_bits); if (status < 0) { mlog_errno(status); goto bail; } mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, (unsigned long long)le64_to_cpu(fe->i_blkno)); *bg_blkno = le64_to_cpu(bg->bg_blkno); *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: if (group_bh) brelse(group_bh); if (prev_group_bh) brelse(prev_group_bh); mlog_exit(status); return status; }
/* * Find the leaf containing the interval we want. While we're on our * way down the tree, fill in every record we see at any depth, because * we might want it later. * * Note that this code is run without ip_lock. That's because it * sleeps while reading. If someone is also filling the extent list at * the same time we are, we might have to restart. */ static int ocfs2_extent_map_find_leaf(struct inode *inode, u32 cpos, u32 clusters, struct ocfs2_extent_list *el) { int i, ret; struct buffer_head *eb_bh = NULL; u64 blkno; u32 rec_end; struct ocfs2_extent_block *eb; struct ocfs2_extent_rec *rec; /* * The bh data containing the el cannot change here, because * we hold alloc_sem. So we can do this without other * locks. */ while (el->l_tree_depth) { blkno = 0; for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { rec = &el->l_recs[i]; rec_end = (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)); ret = -EBADR; if (rec_end > OCFS2_I(inode)->ip_clusters) { mlog_errno(ret); ocfs2_error(inode->i_sb, "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n", i, (unsigned long long)le64_to_cpu(rec->e_blkno), (unsigned long long)OCFS2_I(inode)->ip_blkno, OCFS2_I(inode)->ip_clusters); goto out_free; } if (rec_end <= cpos) { ret = ocfs2_extent_map_insert(inode, rec, le16_to_cpu(el->l_tree_depth)); if (ret && (ret != -EEXIST)) { mlog_errno(ret); goto out_free; } continue; } if ((cpos + clusters) <= le32_to_cpu(rec->e_cpos)) { ret = ocfs2_extent_map_insert(inode, rec, le16_to_cpu(el->l_tree_depth)); if (ret && (ret != -EEXIST)) { mlog_errno(ret); goto out_free; } continue; } /* * We've found a record that matches our * interval. We don't insert it because we're * about to traverse it. */ /* Check to see if we're stradling */ ret = -ESRCH; if (!ocfs2_extent_rec_contains_clusters(rec, cpos, clusters)) { mlog_errno(ret); goto out_free; } /* * If we've already found a record, the el has * two records covering the same interval. * EEEK! */ ret = -EBADR; if (blkno) { mlog_errno(ret); ocfs2_error(inode->i_sb, "Multiple extents for (cpos = %u, clusters = %u) on inode %llu; e_blkno %llu and rec %d at e_blkno %llu\n", cpos, clusters, (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)blkno, i, (unsigned long long)le64_to_cpu(rec->e_blkno)); goto out_free; } blkno = le64_to_cpu(rec->e_blkno); } /* * We don't support holes, and we're still up * in the branches, so we'd better have found someone */ ret = -EBADR; if (!blkno) { ocfs2_error(inode->i_sb, "No record found for (cpos = %u, clusters = %u) on inode %llu\n", cpos, clusters, (unsigned long long)OCFS2_I(inode)->ip_blkno); mlog_errno(ret); goto out_free; } if (eb_bh) { brelse(eb_bh); eb_bh = NULL; } ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &eb_bh, OCFS2_BH_CACHED, inode); if (ret) { mlog_errno(ret); goto out_free; } eb = (struct ocfs2_extent_block *)eb_bh->b_data; if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); ret = -EIO; goto out_free; } el = &eb->h_list; } BUG_ON(el->l_tree_depth); for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { rec = &el->l_recs[i]; if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) > OCFS2_I(inode)->ip_clusters) { ret = -EBADR; mlog_errno(ret); ocfs2_error(inode->i_sb, "Extent %d at e_blkno %llu of inode %llu goes past ip_clusters of %u\n", i, (unsigned long long)le64_to_cpu(rec->e_blkno), (unsigned long long)OCFS2_I(inode)->ip_blkno, OCFS2_I(inode)->ip_clusters); return ret; } ret = ocfs2_extent_map_insert(inode, rec, le16_to_cpu(el->l_tree_depth)); if (ret && (ret != -EEXIST)) { mlog_errno(ret); goto out_free; } } ret = 0; out_free: if (eb_bh) brelse(eb_bh); return ret; }
/* * This lookup actually will read from disk. It has one invariant: * It will never re-traverse blocks. This means that all inserts should * be new regions or more granular regions (both allowed by insert). */ static int ocfs2_extent_map_lookup_read(struct inode *inode, u32 cpos, u32 clusters, struct ocfs2_extent_map_entry **ret_ent) { int ret; u64 blkno; struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; struct ocfs2_extent_map_entry *ent; struct buffer_head *bh = NULL; struct ocfs2_extent_block *eb; struct ocfs2_dinode *di; struct ocfs2_extent_list *el; spin_lock(&OCFS2_I(inode)->ip_lock); ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL); if (ent) { if (!ent->e_tree_depth) { spin_unlock(&OCFS2_I(inode)->ip_lock); *ret_ent = ent; return 0; } blkno = le64_to_cpu(ent->e_rec.e_blkno); spin_unlock(&OCFS2_I(inode)->ip_lock); ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &bh, OCFS2_BH_CACHED, inode); if (ret) { mlog_errno(ret); if (bh) brelse(bh); return ret; } eb = (struct ocfs2_extent_block *)bh->b_data; if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); brelse(bh); return -EIO; } el = &eb->h_list; } else { spin_unlock(&OCFS2_I(inode)->ip_lock); ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, &bh, OCFS2_BH_CACHED, inode); if (ret) { mlog_errno(ret); if (bh) brelse(bh); return ret; } di = (struct ocfs2_dinode *)bh->b_data; if (!OCFS2_IS_VALID_DINODE(di)) { brelse(bh); OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di); return -EIO; } el = &di->id2.i_list; } ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el); brelse(bh); if (ret) { mlog_errno(ret); return ret; } ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL); if (!ent) { ret = -ESRCH; mlog_errno(ret); return ret; } /* FIXME: Make sure this isn't a corruption */ BUG_ON(ent->e_tree_depth); *ret_ent = ent; return 0; }
int ocfs2_load_local_alloc(struct ocfs2_super *osb) { int status = 0; struct ocfs2_dinode *alloc = NULL; struct buffer_head *alloc_bh = NULL; u32 num_used; struct inode *inode = NULL; struct ocfs2_local_alloc *la; mlog_entry_void(); /* read the alloc off disk */ inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &alloc_bh, 0, inode); if (status < 0) { mlog_errno(status); goto bail; } alloc = (struct ocfs2_dinode *) alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); if (!(le32_to_cpu(alloc->i_flags) & (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { mlog(ML_ERROR, "Invalid local alloc inode, %"MLFu64"\n", OCFS2_I(inode)->ip_blkno); status = -EINVAL; goto bail; } if ((la->la_size == 0) || (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", le16_to_cpu(la->la_size)); status = -EINVAL; goto bail; } /* do a little verification. */ num_used = ocfs2_local_alloc_count_bits(alloc); /* hopefully the local alloc has always been recovered before * we load it. */ if (num_used || alloc->id1.bitmap1.i_used || alloc->id1.bitmap1.i_total || la->la_bm_off) mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" "found = %u, set = %u, taken = %u, off = %u\n", num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), le32_to_cpu(alloc->id1.bitmap1.i_total), OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); osb->local_alloc_bh = alloc_bh; osb->local_alloc_state = OCFS2_LA_ENABLED; bail: if (status < 0) if (alloc_bh) brelse(alloc_bh); if (inode) iput(inode); mlog_exit(status); return status; }