static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, int type, u32 slot, u64 *last_alloc_group, int flags) { int status; u32 bits_wanted = ac->ac_bits_wanted; struct inode *alloc_inode; struct buffer_head *bh = NULL; struct ocfs2_dinode *fe; u32 free_bits; alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); if (!alloc_inode) { mlog_errno(-EINVAL); return -EINVAL; } mutex_lock(&alloc_inode->i_mutex); status = ocfs2_inode_lock(alloc_inode, &bh, 1); if (status < 0) { mutex_unlock(&alloc_inode->i_mutex); iput(alloc_inode); mlog_errno(status); return status; } ac->ac_inode = alloc_inode; ac->ac_alloc_slot = slot; fe = (struct ocfs2_dinode *) bh->b_data; /* The bh was validated by the inode read inside * ocfs2_inode_lock(). Any corruption is a code bug. */ BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { status = ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu\n", (unsigned long long)le64_to_cpu(fe->i_blkno)); goto bail; } free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - le32_to_cpu(fe->id1.bitmap1.i_used); if (bits_wanted > free_bits) { /* cluster bitmap never grows */ if (ocfs2_is_cluster_bitmap(alloc_inode)) { trace_ocfs2_reserve_suballoc_bits_nospc(bits_wanted, free_bits); status = -ENOSPC; goto bail; } if (!(flags & ALLOC_NEW_GROUP)) { trace_ocfs2_reserve_suballoc_bits_no_new_group( slot, bits_wanted, free_bits); status = -ENOSPC; goto bail; } status = ocfs2_block_group_alloc(osb, alloc_inode, bh, ac->ac_max_block, last_alloc_group, flags); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } atomic_inc(&osb->alloc_stats.bg_extends); /* You should never ask for this much metadata */ BUG_ON(bits_wanted > (le32_to_cpu(fe->id1.bitmap1.i_total) - le32_to_cpu(fe->id1.bitmap1.i_used))); } get_bh(bh); ac->ac_bh = bh; bail: brelse(bh); if (status) mlog_errno(status); return status; }
/* * Initialize the ACLs of a new inode. If parent directory has default ACL, * then clone to new inode. Called from ocfs2_mknod. */ int ocfs2_init_acl(handle_t *handle, struct inode *inode, struct inode *dir, struct buffer_head *di_bh, struct buffer_head *dir_bh, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0, ret2; mode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, dir_bh); if (IS_ERR(acl)) return PTR_ERR(acl); } if (!acl) { mode = inode->i_mode & ~current_umask(); ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); if (ret) { mlog_errno(ret); goto cleanup; } } } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { struct posix_acl *clone; if (S_ISDIR(inode->i_mode)) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_DEFAULT, acl, meta_ac, data_ac); if (ret) goto cleanup; } clone = posix_acl_clone(acl, GFP_NOFS); ret = -ENOMEM; if (!clone) goto cleanup; mode = inode->i_mode; ret = posix_acl_create_masq(clone, &mode); if (ret >= 0) { ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); if (ret2) { mlog_errno(ret2); ret = ret2; goto cleanup; } if (ret > 0) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_ACCESS, clone, meta_ac, data_ac); } } posix_acl_release(clone); } cleanup: posix_acl_release(acl); return ret; }
/* Add a new group descriptor to global_bitmap. */ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) { int ret; handle_t *handle; struct buffer_head *main_bm_bh = NULL; struct inode *main_bm_inode = NULL; struct ocfs2_dinode *fe = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *group = NULL; struct ocfs2_chain_list *cl; struct ocfs2_chain_rec *cr; u16 cl_bpc; mlog_entry_void(); if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) return -EROFS; main_bm_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!main_bm_inode) { ret = -EINVAL; mlog_errno(ret); goto out; } mutex_lock(&main_bm_inode->i_mutex); ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); if (ret < 0) { mlog_errno(ret); goto out_mutex; } fe = (struct ocfs2_dinode *)main_bm_bh->b_data; if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != ocfs2_group_bitmap_size(osb->sb) * 8) { mlog(ML_ERROR, "The disk is too old and small." " Force to do offline resize."); ret = -EINVAL; goto out_unlock; } ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh); if (ret < 0) { mlog(ML_ERROR, "Can't read the group descriptor # %llu " "from the device.", (unsigned long long)input->group); goto out_unlock; } ocfs2_set_new_buffer_uptodate(inode, group_bh); ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); if (ret) { mlog_errno(ret); goto out_unlock; } mlog(0, "Add a new group %llu in chain = %u, length = %u\n", (unsigned long long)input->group, input->chain, input->clusters); handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS); if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); ret = -EINVAL; goto out_unlock; } cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); cl = &fe->id2.i_chain; cr = &cl->cl_recs[input->chain]; ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_commit; } group = (struct ocfs2_group_desc *)group_bh->b_data; group->bg_next_group = cr->c_blkno; ret = ocfs2_journal_dirty(handle, group_bh); if (ret < 0) { mlog_errno(ret); goto out_commit; } ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_commit; } if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) { le16_add_cpu(&cl->cl_next_free_rec, 1); memset(cr, 0, sizeof(struct ocfs2_chain_rec)); } cr->c_blkno = cpu_to_le64(input->group); le32_add_cpu(&cr->c_total, input->clusters * cl_bpc); le32_add_cpu(&cr->c_free, input->frees * cl_bpc); le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc); le32_add_cpu(&fe->id1.bitmap1.i_used, (input->clusters - input->frees) * cl_bpc); le32_add_cpu(&fe->i_clusters, input->clusters); ocfs2_journal_dirty(handle, main_bm_bh); spin_lock(&OCFS2_I(main_bm_inode)->ip_lock); OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits); spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock); i_size_write(main_bm_inode, le64_to_cpu(fe->i_size)); ocfs2_update_super_and_backups(main_bm_inode, input->clusters); out_commit: ocfs2_commit_trans(osb, handle); out_unlock: brelse(group_bh); brelse(main_bm_bh); ocfs2_inode_unlock(main_bm_inode, 1); out_mutex: mutex_unlock(&main_bm_inode->i_mutex); iput(main_bm_inode); out: mlog_exit_void(); return ret; }
static int ocfs2_remove_inode(struct inode *inode, struct buffer_head *di_bh, struct inode *orphan_dir_inode, struct buffer_head *orphan_dir_bh) { int status; struct inode *inode_alloc_inode = NULL; struct buffer_head *inode_alloc_bh = NULL; handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; inode_alloc_inode = ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, le16_to_cpu(di->i_suballoc_slot)); if (!inode_alloc_inode) { status = -EEXIST; mlog_errno(status); goto bail; } mutex_lock(&inode_alloc_inode->i_mutex); status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1); if (status < 0) { mutex_unlock(&inode_alloc_inode->i_mutex); mlog_errno(status); goto bail; } handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + ocfs2_quota_trans_credits(inode->i_sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto bail_unlock; } if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, orphan_dir_bh); if (status < 0) { mlog_errno(status); goto bail_commit; } } /* set the inodes dtime */ status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail_commit; } di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); ocfs2_journal_dirty(handle, di_bh); ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); dquot_free_inode(inode); status = ocfs2_free_dinode(handle, inode_alloc_inode, inode_alloc_bh, di); if (status < 0) mlog_errno(status); bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: ocfs2_inode_unlock(inode_alloc_inode, 1); mutex_unlock(&inode_alloc_inode->i_mutex); brelse(inode_alloc_bh); bail: iput(inode_alloc_inode); return status; }
/* Query the cluster to determine whether we should wipe an inode from * disk or not. * * Requires the inode to have the cluster lock. */ static int ocfs2_query_inode_wipe(struct inode *inode, struct buffer_head *di_bh, int *wipe) { int status = 0, reason = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di; *wipe = 0; trace_ocfs2_query_inode_wipe_begin((unsigned long long)oi->ip_blkno, inode->i_nlink); /* While we were waiting for the cluster lock in * ocfs2_delete_inode, another node might have asked to delete * the inode. Recheck our flags to catch this. */ if (!ocfs2_inode_is_valid_to_delete(inode)) { reason = 1; goto bail; } /* Now that we have an up to date inode, we can double check * the link count. */ if (inode->i_nlink) goto bail; /* Do some basic inode verification... */ di = (struct ocfs2_dinode *) di_bh->b_data; if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) && !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { /* * Inodes in the orphan dir must have ORPHANED_FL. The only * inodes that come back out of the orphan dir are reflink * targets. A reflink target may be moved out of the orphan * dir between the time we scan the directory and the time we * process it. This would lead to HAS_REFCOUNT_FL being set but * ORPHANED_FL not. */ if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { reason = 2; goto bail; } /* for lack of a better error? */ status = -EEXIST; mlog(ML_ERROR, "Inode %llu (on-disk %llu) not orphaned! " "Disk flags 0x%x, inode flags 0x%x\n", (unsigned long long)oi->ip_blkno, (unsigned long long)le64_to_cpu(di->i_blkno), le32_to_cpu(di->i_flags), oi->ip_flags); goto bail; } /* has someone already deleted us?! baaad... */ if (di->i_dtime) { status = -EEXIST; mlog_errno(status); goto bail; } /* * This is how ocfs2 determines whether an inode is still live * within the cluster. Every node takes a shared read lock on * the inode open lock in ocfs2_read_locked_inode(). When we * get to ->delete_inode(), each node tries to convert it's * lock to an exclusive. Trylocks are serialized by the inode * meta data lock. If the upconvert succeeds, we know the inode * is no longer live and can be deleted. * * Though we call this with the meta data lock held, the * trylock keeps us from ABBA deadlock. */ status = ocfs2_try_open_lock(inode, 1); if (status == -EAGAIN) { status = 0; reason = 3; goto bail; } if (status < 0) { mlog_errno(status); goto bail; } *wipe = 1; trace_ocfs2_query_inode_wipe_succ(le16_to_cpu(di->i_orphaned_slot)); bail: trace_ocfs2_query_inode_wipe_end(status, reason); return status; }
/* * sync the local alloc to main bitmap. * * assumes you've already locked the main bitmap -- the bitmap inode * passed is used for caching. */ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_dinode *alloc, struct inode *main_bm_inode, struct buffer_head *main_bm_bh) { int status = 0; int bit_off, left, count, start; u64 la_start_blk; u64 blkno; void *bitmap; struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); trace_ocfs2_sync_local_to_main( le32_to_cpu(alloc->id1.bitmap1.i_total), le32_to_cpu(alloc->id1.bitmap1.i_used)); if (!alloc->id1.bitmap1.i_total) { goto bail; } if (le32_to_cpu(alloc->id1.bitmap1.i_used) == le32_to_cpu(alloc->id1.bitmap1.i_total)) { goto bail; } la_start_blk = ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(la->la_bm_off)); bitmap = la->la_bitmap; start = count = bit_off = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) != -1) { if ((bit_off < left) && (bit_off == start)) { count++; start++; continue; } if (count) { blkno = la_start_blk + ocfs2_clusters_to_blocks(osb->sb, start - count); trace_ocfs2_sync_local_to_main_free( count, start - count, (unsigned long long)la_start_blk, (unsigned long long)blkno); status = ocfs2_release_clusters(handle, main_bm_inode, main_bm_bh, blkno, count); if (status < 0) { mlog_errno(status); goto bail; } } if (bit_off >= left) break; count = 1; start = bit_off + 1; } bail: if (status) mlog_errno(status); return status; }
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, int sysfile_type) { struct inode *inode = NULL; struct super_block *sb = osb->sb; struct ocfs2_find_inode_args args; journal_t *journal = OCFS2_SB(sb)->journal->j_journal; trace_ocfs2_iget_begin((unsigned long long)blkno, flags, sysfile_type); /* Ok. By now we've either got the offsets passed to us by the * caller, or we just pulled them off the bh. Lets do some * sanity checks to make sure they're OK. */ if (blkno == 0) { inode = ERR_PTR(-EINVAL); mlog_errno(PTR_ERR(inode)); goto bail; } args.fi_blkno = blkno; args.fi_flags = flags; args.fi_ino = ino_from_blkno(sb, blkno); args.fi_sysfile_type = sysfile_type; inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, ocfs2_init_locked_inode, &args); /* inode was *not* in the inode cache. 2.6.x requires * us to do our own read_inode call and unlock it * afterwards. */ if (inode == NULL) { inode = ERR_PTR(-ENOMEM); mlog_errno(PTR_ERR(inode)); goto bail; } trace_ocfs2_iget5_locked(inode->i_state); if (inode->i_state & I_NEW) { ocfs2_read_locked_inode(inode, &args); unlock_new_inode(inode); } if (is_bad_inode(inode)) { iput(inode); inode = ERR_PTR(-ESTALE); goto bail; } /* * Set transaction id's of transactions that have to be committed * to finish f[data]sync. We set them to currently running transaction * as we cannot be sure that the inode or some of its metadata isn't * part of the transaction - the inode could have been reclaimed and * now it is reread from disk. */ if (journal) { transaction_t *transaction; tid_t tid; struct ocfs2_inode_info *oi = OCFS2_I(inode); read_lock(&journal->j_state_lock); if (journal->j_running_transaction) transaction = journal->j_running_transaction; else transaction = journal->j_committing_transaction; if (transaction) tid = transaction->t_tid; else tid = journal->j_commit_sequence; read_unlock(&journal->j_state_lock); oi->i_sync_tid = tid; oi->i_datasync_tid = tid; } bail: if (!IS_ERR(inode)) { trace_ocfs2_iget_end(inode, (unsigned long long)OCFS2_I(inode)->ip_blkno); } return inode; }
/* * expects the suballoc inode to already be locked. */ static int ocfs2_free_suballoc_bits(handle_t *handle, struct inode *alloc_inode, struct buffer_head *alloc_bh, unsigned int start_bit, u64 bg_blkno, unsigned int count) { int status = 0; u32 tmp_used; struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; struct ocfs2_chain_list *cl = &fe->id2.i_chain; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *group; mlog_entry_void(); if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); status = -EIO; goto bail; } BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, alloc_inode); if (status < 0) { mlog_errno(status); goto bail; } group = (struct ocfs2_group_desc *) group_bh->b_data; status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); if (status) { mlog_errno(status); goto bail; } BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); status = ocfs2_block_group_clear_bits(handle, alloc_inode, group, group_bh, start_bit, count); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, count); tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); status = ocfs2_journal_dirty(handle, alloc_bh); if (status < 0) { mlog_errno(status); goto bail; } bail: if (group_bh) brelse(group_bh); mlog_exit(status); return status; }
/* * We expect the block group allocator to already be locked. */ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct inode *alloc_inode, struct buffer_head *bh) { int status, credits; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; struct ocfs2_chain_list *cl; struct ocfs2_alloc_context *ac = NULL; handle_t *handle = NULL; u32 bit_off, num_bits; u16 alloc_rec; u64 bg_blkno; struct buffer_head *bg_bh = NULL; struct ocfs2_group_desc *bg; BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); mlog_entry_void(); cl = &fe->id2.i_chain; status = ocfs2_reserve_clusters(osb, le16_to_cpu(cl->cl_cpg), &ac); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } credits = ocfs2_calc_group_alloc_credits(osb->sb, le16_to_cpu(cl->cl_cpg)); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; mlog_errno(status); goto bail; } status = ocfs2_claim_clusters(osb, handle, ac, le16_to_cpu(cl->cl_cpg), &bit_off, &num_bits); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } alloc_rec = ocfs2_find_smallest_chain(cl); /* setup the group */ bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); mlog(0, "new descriptor, record %u, at block %llu\n", alloc_rec, (unsigned long long)bg_blkno); bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { status = -EIO; mlog_errno(status); goto bail; } ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, bg_blkno, alloc_rec, cl); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) bg_bh->b_data; status = ocfs2_journal_access(handle, alloc_inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) le16_add_cpu(&cl->cl_next_free_rec, 1); le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); status = ocfs2_journal_dirty(handle, bh); if (status < 0) { mlog_errno(status); goto bail; } spin_lock(&OCFS2_I(alloc_inode)->ip_lock); OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, le32_to_cpu(fe->i_clusters))); spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); status = 0; bail: if (handle) ocfs2_commit_trans(osb, handle); if (ac) ocfs2_free_alloc_context(ac); if (bg_bh) brelse(bg_bh); mlog_exit(status); return status; }
/* will give out up to bits_wanted contiguous bits. */ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, u64 *bg_blkno) { int status; u16 victim, i; u16 bits_left = 0; u64 hint_blkno = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; mlog_entry_void(); BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); BUG_ON(!ac->ac_bh); fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); status = -EIO; goto bail; } if (le32_to_cpu(fe->id1.bitmap1.i_used) >= le32_to_cpu(fe->id1.bitmap1.i_total)) { ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " "bits but only %u total.", (unsigned long long)le64_to_cpu(fe->i_blkno), le32_to_cpu(fe->id1.bitmap1.i_used), le32_to_cpu(fe->id1.bitmap1.i_total)); status = -EIO; goto bail; } if (hint_blkno) { /* Attempt to short-circuit the usual search mechanism * by jumping straight to the most recently used * allocation group. This helps us mantain some * contiguousness across allocations. */ status = ocfs2_search_one_group(ac, handle, bits_wanted, min_bits, bit_off, num_bits, hint_blkno, &bits_left); if (!status) { /* Be careful to update *bg_blkno here as the * caller is expecting it to be filled in, and * ocfs2_search_one_group() won't do that for * us. */ *bg_blkno = hint_blkno; goto set_hint; } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; } } cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, num_bits, bg_blkno, &bits_left); if (!status) goto set_hint; if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; } mlog(0, "Search of victim chain %u came up with nothing, " "trying all chains now.\n", victim); /* If we didn't pick a good victim, then just default to * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ ac->ac_allow_chain_relink = 0; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; if (!cl->cl_recs[i].c_free) continue; ac->ac_chain = i; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, num_bits, bg_blkno, &bits_left); if (!status) break; if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; } } set_hint: if (status != -ENOSPC) { /* If the next search of this group is not likely to * yield a suitable extent, then we reset the last * group hint so as to not waste a disk read */ if (bits_left < min_bits) ac->ac_last_group = 0; else ac->ac_last_group = *bg_blkno; } bail: mlog_exit(status); return status; }
/* * min_bits - minimum contiguous chunk from this total allocation we * can handle. set to what we asked for originally for a full * contig. allocation, set to '1' to indicate we can deal with extents * of any size. */ int ocfs2_claim_clusters(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_alloc_context *ac, u32 min_clusters, u32 *cluster_start, u32 *num_clusters) { int status; unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; u64 bg_blkno = 0; u16 bg_bit_off; mlog_entry_void(); BUG_ON(!ac); BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL && ac->ac_which != OCFS2_AC_USE_MAIN); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { status = ocfs2_claim_local_alloc_bits(osb, handle, ac, bits_wanted, cluster_start, num_clusters); if (!status) atomic_inc(&osb->alloc_stats.local_data); } else { if (min_clusters > (osb->bitmap_cpg - 1)) { /* The only paths asking for contiguousness * should know about this already. */ mlog(ML_ERROR, "minimum allocation requested exceeds " "group bitmap size!"); status = -ENOSPC; goto bail; } /* clamp the current request down to a realistic size. */ if (bits_wanted > (osb->bitmap_cpg - 1)) bits_wanted = osb->bitmap_cpg - 1; status = ocfs2_claim_suballoc_bits(osb, ac, handle, bits_wanted, min_clusters, &bg_bit_off, num_clusters, &bg_blkno); if (!status) { *cluster_start = ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, bg_blkno, bg_bit_off); atomic_inc(&osb->alloc_stats.bitmap_data); } } if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } ac->ac_bits_given += *num_clusters; bail: mlog_exit(status); return status; }
static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, handle_t *handle, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, u64 *bg_blkno, u16 *bits_left) { int status; u16 chain, tmp_bits; u32 tmp_used; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; struct buffer_head *prev_group_bh = NULL; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; struct ocfs2_group_desc *bg; chain = ac->ac_chain; mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", bits_wanted, chain, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), le64_to_cpu(cl->cl_recs[chain].c_blkno), &group_bh, OCFS2_BH_CACHED, alloc_inode); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); if (status) { mlog_errno(status); goto bail; } status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ while ((status = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, bit_off, &tmp_bits)) == -ENOSPC) { if (!bg->bg_next_group) break; if (prev_group_bh) { brelse(prev_group_bh); prev_group_bh = NULL; } next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), next_group, &group_bh, OCFS2_BH_CACHED, alloc_inode); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); if (status) { mlog_errno(status); goto bail; } } if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); *num_bits = tmp_bits; BUG_ON(*num_bits == 0); /* * Keep track of previous block descriptor read. When * we find a target, if we have read more than X * number of descriptors, and the target is reasonably * empty, relink him to top of his chain. * * We've read 0 extra blocks and only send one more to * the transaction, yet the next guy to search has a * much easier time. * * Do this *after* figuring out how many bits we're taking out * of our target group. */ if (ac->ac_allow_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, ac->ac_bh, group_bh, prev_group_bh, chain); if (status < 0) { mlog_errno(status); goto bail; } } /* Ok, claim our bits now: set the info on dinode, chainlist * and then the group */ status = ocfs2_journal_access(handle, alloc_inode, ac->ac_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); status = ocfs2_journal_dirty(handle, ac->ac_bh); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_block_group_set_bits(handle, alloc_inode, bg, group_bh, *bit_off, *num_bits); if (status < 0) { mlog_errno(status); goto bail; } mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, (unsigned long long)le64_to_cpu(fe->i_blkno)); *bg_blkno = le64_to_cpu(bg->bg_blkno); *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: if (group_bh) brelse(group_bh); if (prev_group_bh) brelse(prev_group_bh); mlog_exit(status); return status; }
static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, struct ocfs2_slot_info *si) { int status = 0; u64 blkno; unsigned long long blocks, bytes; unsigned int i; struct buffer_head *bh; status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes); if (status) goto bail; blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes); BUG_ON(blocks > UINT_MAX); si->si_blocks = blocks; if (!si->si_blocks) goto bail; if (si->si_extended) si->si_slots_per_block = (osb->sb->s_blocksize / sizeof(struct ocfs2_extended_slot)); else si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16); /* The size checks above should ensure this */ BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); mlog(0, "Slot map needs %u buffers for %llu bytes\n", si->si_blocks, bytes); si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, GFP_KERNEL); if (!si->si_bh) { status = -ENOMEM; mlog_errno(status); goto bail; } for (i = 0; i < si->si_blocks; i++) { status = ocfs2_extent_map_get_blocks(si->si_inode, i, &blkno, NULL, NULL); if (status < 0) { mlog_errno(status); goto bail; } mlog(0, "Reading slot map block %u at %llu\n", i, (unsigned long long)blkno); bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno, 1, &bh, OCFS2_BH_IGNORE_CACHE, NULL); if (status < 0) { mlog_errno(status); goto bail; } si->si_bh[i] = bh; } bail: return status; }
int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, int blocks, struct ocfs2_alloc_context **ac) { int status; int slot = ocfs2_get_meta_steal_slot(osb); *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { status = -ENOMEM; mlog_errno(status); goto bail; } (*ac)->ac_bits_wanted = blocks; (*ac)->ac_which = OCFS2_AC_USE_META; (*ac)->ac_group_search = ocfs2_block_group_search; if (slot != OCFS2_INVALID_SLOT && atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL) goto extent_steal; atomic_set(&osb->s_num_meta_stolen, 0); status = ocfs2_reserve_suballoc_bits(osb, (*ac), EXTENT_ALLOC_SYSTEM_INODE, (u32)osb->slot_num, NULL, ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP); if (status >= 0) { status = 0; if (slot != OCFS2_INVALID_SLOT) ocfs2_init_meta_steal_slot(osb); goto bail; } else if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; } ocfs2_free_ac_resource(*ac); extent_steal: status = ocfs2_steal_meta(osb, *ac); atomic_inc(&osb->s_num_meta_stolen); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } status = 0; bail: if ((status < 0) && *ac) { ocfs2_free_alloc_context(*ac); *ac = NULL; } if (status) mlog_errno(status); return status; }
/* * We want to free the bitmap bits outside of any recovery context as * we'll need a cluster lock to do so, but we must clear the local * alloc before giving up the recovered nodes journal. To solve this, * we kmalloc a copy of the local alloc before it's change for the * caller to process with ocfs2_complete_local_alloc_recovery */ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, int slot_num, struct ocfs2_dinode **alloc_copy) { int status = 0; struct buffer_head *alloc_bh = NULL; struct inode *inode = NULL; struct ocfs2_dinode *alloc; trace_ocfs2_begin_local_alloc_recovery(slot_num); *alloc_copy = NULL; inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, slot_num); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } mutex_lock(&inode->i_mutex); status = ocfs2_read_inode_block_full(inode, &alloc_bh, OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; } *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); if (!(*alloc_copy)) { status = -ENOMEM; goto bail; } memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); alloc = (struct ocfs2_dinode *) alloc_bh->b_data; ocfs2_clear_local_alloc(alloc); ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); if (status < 0) mlog_errno(status); bail: if (status < 0) { kfree(*alloc_copy); *alloc_copy = NULL; } brelse(alloc_bh); if (inode) { mutex_unlock(&inode->i_mutex); iput(inode); } if (status) mlog_errno(status); return status; }
static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, int type, u32 slot) { int status; u32 bits_wanted = ac->ac_bits_wanted; struct inode *alloc_inode; struct buffer_head *bh = NULL; struct ocfs2_dinode *fe; u32 free_bits; mlog_entry_void(); alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); if (!alloc_inode) { mlog_errno(-EINVAL); return -EINVAL; } mutex_lock(&alloc_inode->i_mutex); status = ocfs2_meta_lock(alloc_inode, &bh, 1); if (status < 0) { mutex_unlock(&alloc_inode->i_mutex); iput(alloc_inode); mlog_errno(status); return status; } ac->ac_inode = alloc_inode; fe = (struct ocfs2_dinode *) bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); status = -EIO; goto bail; } if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", (unsigned long long)le64_to_cpu(fe->i_blkno)); status = -EIO; goto bail; } free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - le32_to_cpu(fe->id1.bitmap1.i_used); if (bits_wanted > free_bits) { /* cluster bitmap never grows */ if (ocfs2_is_cluster_bitmap(alloc_inode)) { mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", bits_wanted, free_bits); status = -ENOSPC; goto bail; } status = ocfs2_block_group_alloc(osb, alloc_inode, bh); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } atomic_inc(&osb->alloc_stats.bg_extends); /* You should never ask for this much metadata */ BUG_ON(bits_wanted > (le32_to_cpu(fe->id1.bitmap1.i_total) - le32_to_cpu(fe->id1.bitmap1.i_used))); } get_bh(bh); ac->ac_bh = bh; bail: if (bh) brelse(bh); mlog_exit(status); return status; }
/* * make sure we've got at least bits_wanted contiguous bits in the * local alloc. You lose them when you drop i_mutex. * * We will add ourselves to the transaction passed in, but may start * our own in order to shift windows. */ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, u32 bits_wanted, struct ocfs2_alloc_context *ac) { int status; struct ocfs2_dinode *alloc; struct inode *local_alloc_inode; unsigned int free_bits; BUG_ON(!ac); local_alloc_inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); if (!local_alloc_inode) { status = -ENOENT; mlog_errno(status); goto bail; } mutex_lock(&local_alloc_inode->i_mutex); /* * We must double check state and allocator bits because * another process may have changed them while holding i_mutex. */ spin_lock(&osb->osb_lock); if (!ocfs2_la_state_enabled(osb) || (bits_wanted > osb->local_alloc_bits)) { spin_unlock(&osb->osb_lock); status = -ENOSPC; goto bail; } spin_unlock(&osb->osb_lock); alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; #ifdef CONFIG_OCFS2_DEBUG_FS if (le32_to_cpu(alloc->id1.bitmap1.i_used) != ocfs2_local_alloc_count_bits(alloc)) { ocfs2_error(osb->sb, "local alloc inode %llu says it has " "%u free bits, but a count shows %u", (unsigned long long)le64_to_cpu(alloc->i_blkno), le32_to_cpu(alloc->id1.bitmap1.i_used), ocfs2_local_alloc_count_bits(alloc)); status = -EIO; goto bail; } #endif free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - le32_to_cpu(alloc->id1.bitmap1.i_used); if (bits_wanted > free_bits) { /* uhoh, window change time. */ status = ocfs2_local_alloc_slide_window(osb, local_alloc_inode); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } /* * Under certain conditions, the window slide code * might have reduced the number of bits available or * disabled the the local alloc entirely. Re-check * here and return -ENOSPC if necessary. */ status = -ENOSPC; if (!ocfs2_la_state_enabled(osb)) goto bail; free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - le32_to_cpu(alloc->id1.bitmap1.i_used); if (bits_wanted > free_bits) goto bail; } ac->ac_inode = local_alloc_inode; /* We should never use localalloc from another slot */ ac->ac_alloc_slot = osb->slot_num; ac->ac_which = OCFS2_AC_USE_LOCAL; get_bh(osb->local_alloc_bh); ac->ac_bh = osb->local_alloc_bh; status = 0; bail: if (status < 0 && local_alloc_inode) { mutex_unlock(&local_alloc_inode->i_mutex); iput(local_alloc_inode); } trace_ocfs2_reserve_local_alloc_bits( (unsigned long long)ac->ac_max_block, bits_wanted, osb->slot_num, status); if (status) mlog_errno(status); return status; }
static int ocfs2_relink_block_group(handle_t *handle, struct inode *alloc_inode, struct buffer_head *fe_bh, struct buffer_head *bg_bh, struct buffer_head *prev_bg_bh, u16 chain) { int status; /* there is a really tiny chance the journal calls could fail, * but we wouldn't want inconsistent blocks in *any* case. */ u64 fe_ptr, bg_ptr, prev_bg_ptr; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); status = -EIO; goto out; } if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); status = -EIO; goto out; } if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); status = -EIO; goto out; } mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", (unsigned long long)le64_to_cpu(fe->i_blkno), chain, (unsigned long long)le64_to_cpu(bg->bg_blkno), (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); bg_ptr = le64_to_cpu(bg->bg_next_group); prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; } prev_bg->bg_next_group = bg->bg_next_group; status = ocfs2_journal_dirty(handle, prev_bg_bh); if (status < 0) { mlog_errno(status); goto out_rollback; } status = ocfs2_journal_access(handle, alloc_inode, bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; } bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; status = ocfs2_journal_dirty(handle, bg_bh); if (status < 0) { mlog_errno(status); goto out_rollback; } status = ocfs2_journal_access(handle, alloc_inode, fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_rollback; } fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; status = ocfs2_journal_dirty(handle, fe_bh); if (status < 0) { mlog_errno(status); goto out_rollback; } status = 0; out_rollback: if (status < 0) { fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); bg->bg_next_group = cpu_to_le64(bg_ptr); prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); } out: mlog_exit(status); return status; }
static void ocfs2_clear_inode(struct inode *inode) { int status; struct ocfs2_inode_info *oi = OCFS2_I(inode); clear_inode(inode); trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, inode->i_nlink); mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, "Inode=%lu\n", inode->i_ino); dquot_drop(inode); /* To preven remote deletes we hold open lock before, now it * is time to unlock PR and EX open locks. */ ocfs2_open_unlock(inode); /* Do these before all the other work so that we don't bounce * the downconvert thread while waiting to destroy the locks. */ ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, &oi->ip_la_data_resv); ocfs2_resv_init_once(&oi->ip_la_data_resv); /* We very well may get a clear_inode before all an inodes * metadata has hit disk. Of course, we can't drop any cluster * locks until the journal has finished with it. The only * exception here are successfully wiped inodes - their * metadata can now be considered to be part of the system * inodes from which it came. */ if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED)) ocfs2_checkpoint_inode(inode); mlog_bug_on_msg(!list_empty(&oi->ip_io_markers), "Clear inode of %llu, inode has io markers\n", (unsigned long long)oi->ip_blkno); ocfs2_extent_map_trunc(inode, 0); status = ocfs2_drop_inode_locks(inode); if (status < 0) mlog_errno(status); ocfs2_lock_res_free(&oi->ip_rw_lockres); ocfs2_lock_res_free(&oi->ip_inode_lockres); ocfs2_lock_res_free(&oi->ip_open_lockres); ocfs2_metadata_cache_exit(INODE_CACHE(inode)); mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached, "Clear inode of %llu, inode has %u cache items\n", (unsigned long long)oi->ip_blkno, INODE_CACHE(inode)->ci_num_cached); mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE), "Clear inode of %llu, inode has a bad flag\n", (unsigned long long)oi->ip_blkno); mlog_bug_on_msg(spin_is_locked(&oi->ip_lock), "Clear inode of %llu, inode is locked\n", (unsigned long long)oi->ip_blkno); mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex), "Clear inode of %llu, io_mutex is locked\n", (unsigned long long)oi->ip_blkno); mutex_unlock(&oi->ip_io_mutex); /* * down_trylock() returns 0, down_write_trylock() returns 1 * kernel 1, world 0 */ mlog_bug_on_msg(!down_write_trylock(&oi->ip_alloc_sem), "Clear inode of %llu, alloc_sem is locked\n", (unsigned long long)oi->ip_blkno); up_write(&oi->ip_alloc_sem); mlog_bug_on_msg(oi->ip_open_count, "Clear inode of %llu has open count %d\n", (unsigned long long)oi->ip_blkno, oi->ip_open_count); /* Clear all other flags. */ oi->ip_flags = 0; oi->ip_dir_start_lookup = 0; oi->ip_blkno = 0ULL; /* * ip_jinode is used to track txns against this inode. We ensure that * the journal is flushed before journal shutdown. Thus it is safe to * have inodes get cleaned up after journal shutdown. */ jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, &oi->ip_jinode); }
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, struct buffer_head *bhs[], int flags) { int status = 0; int i, ignore_cache = 0; struct buffer_head *bh; mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n", inode, (unsigned long long)block, nr, flags); BUG_ON(!inode); BUG_ON((flags & OCFS2_BH_READAHEAD) && (flags & OCFS2_BH_IGNORE_CACHE)); if (bhs == NULL) { status = -EINVAL; mlog_errno(status); goto bail; } if (nr < 0) { mlog(ML_ERROR, "asked to read %d blocks!\n", nr); status = -EINVAL; mlog_errno(status); goto bail; } if (nr == 0) { mlog(ML_BH_IO, "No buffers will be read!\n"); status = 0; goto bail; } mutex_lock(&OCFS2_I(inode)->ip_io_mutex); for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { bhs[i] = sb_getblk(inode->i_sb, block++); if (bhs[i] == NULL) { mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); status = -EIO; mlog_errno(status); goto bail; } } bh = bhs[i]; ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE); /* There are three read-ahead cases here which we need to * be concerned with. All three assume a buffer has * previously been submitted with OCFS2_BH_READAHEAD * and it hasn't yet completed I/O. * * 1) The current request is sync to disk. This rarely * happens these days, and never when performance * matters - the code can just wait on the buffer * lock and re-submit. * * 2) The current request is cached, but not * readahead. ocfs2_buffer_uptodate() will return * false anyway, so we'll wind up waiting on the * buffer lock to do I/O. We re-check the request * with after getting the lock to avoid a re-submit. * * 3) The current request is readahead (and so must * also be a caching one). We short circuit if the * buffer is locked (under I/O) and if it's in the * uptodate cache. The re-check from #2 catches the * case that the previous read-ahead completes just * before our is-it-in-flight check. */ if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) { mlog(ML_UPTODATE, "bh (%llu), inode %llu not uptodate\n", (unsigned long long)bh->b_blocknr, (unsigned long long)OCFS2_I(inode)->ip_blkno); /* We're using ignore_cache here to say * "go to disk" */ ignore_cache = 1; } /* XXX: Can we ever get this and *not* have the cached * flag set? */ if (buffer_jbd(bh)) { if (ignore_cache) mlog(ML_BH_IO, "trying to sync read a jbd " "managed bh (blocknr = %llu)\n", (unsigned long long)bh->b_blocknr); continue; } if (ignore_cache) { if (buffer_dirty(bh)) { /* This should probably be a BUG, or * at least return an error. */ mlog(ML_BH_IO, "asking me to sync read a dirty " "buffer! (blocknr = %llu)\n", (unsigned long long)bh->b_blocknr); continue; } /* A read-ahead request was made - if the * buffer is already under read-ahead from a * previously submitted request than we are * done here. */ if ((flags & OCFS2_BH_READAHEAD) && ocfs2_buffer_read_ahead(inode, bh)) continue; lock_buffer(bh); if (buffer_jbd(bh)) { #ifdef CATCH_BH_JBD_RACES mlog(ML_ERROR, "block %llu had the JBD bit set " "while I was in lock_buffer!", (unsigned long long)bh->b_blocknr); BUG(); #else unlock_buffer(bh); continue; #endif } /* Re-check ocfs2_buffer_uptodate() as a * previously read-ahead buffer may have * completed I/O while we were waiting for the * buffer lock. */ if (!(flags & OCFS2_BH_IGNORE_CACHE) && !(flags & OCFS2_BH_READAHEAD) && ocfs2_buffer_uptodate(inode, bh)) { unlock_buffer(bh); continue; } clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); continue; } } status = 0; for (i = (nr - 1); i >= 0; i--) { bh = bhs[i]; if (!(flags & OCFS2_BH_READAHEAD)) { /* We know this can't have changed as we hold the * inode sem. Avoid doing any work on the bh if the * journal has it. */ if (!buffer_jbd(bh)) wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* Status won't be cleared from here on out, * so we can safely record this and loop back * to cleanup the other buffers. Don't need to * remove the clustered uptodate information * for this bh as it's not marked locally * uptodate. */ status = -EIO; put_bh(bh); bhs[i] = NULL; continue; } } /* Always set the buffer in the cache, even if it was * a forced read, or read-ahead which hasn't yet * completed. */ ocfs2_set_buffer_uptodate(inode, bh); } mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", (unsigned long long)block, nr, ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", flags); bail: mlog_exit(status); return status; }
static int ocfs2_read_locked_inode(struct inode *inode, struct ocfs2_find_inode_args *args) { struct super_block *sb; struct ocfs2_super *osb; struct ocfs2_dinode *fe; struct buffer_head *bh = NULL; int status, can_lock; u32 generation = 0; status = -EINVAL; if (inode == NULL || inode->i_sb == NULL) { mlog(ML_ERROR, "bad inode\n"); return status; } sb = inode->i_sb; osb = OCFS2_SB(sb); if (!args) { mlog(ML_ERROR, "bad inode args\n"); make_bad_inode(inode); return status; } /* * To improve performance of cold-cache inode stats, we take * the cluster lock here if possible. * * Generally, OCFS2 never trusts the contents of an inode * unless it's holding a cluster lock, so taking it here isn't * a correctness issue as much as it is a performance * improvement. * * There are three times when taking the lock is not a good idea: * * 1) During startup, before we have initialized the DLM. * * 2) If we are reading certain system files which never get * cluster locks (local alloc, truncate log). * * 3) If the process doing the iget() is responsible for * orphan dir recovery. We're holding the orphan dir lock and * can get into a deadlock with another process on another * node in ->delete_inode(). * * #1 and #2 can be simply solved by never taking the lock * here for system files (which are the only type we read * during mount). It's a heavier approach, but our main * concern is user-accessible files anyway. * * #3 works itself out because we'll eventually take the * cluster lock before trusting anything anyway. */ can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) && !ocfs2_mount_local(osb); trace_ocfs2_read_locked_inode( (unsigned long long)OCFS2_I(inode)->ip_blkno, can_lock); /* * To maintain backwards compatibility with older versions of * ocfs2-tools, we still store the generation value for system * files. The only ones that actually matter to userspace are * the journals, but it's easier and inexpensive to just flag * all system files similarly. */ if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) generation = osb->fs_generation; ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, OCFS2_LOCK_TYPE_META, generation, inode); ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, OCFS2_LOCK_TYPE_OPEN, 0, inode); if (can_lock) { status = ocfs2_open_lock(inode); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } status = ocfs2_inode_lock(inode, NULL, 0); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } } if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { status = ocfs2_try_open_lock(inode, 0); if (status) { make_bad_inode(inode); return status; } } if (can_lock) { status = ocfs2_read_inode_block_full(inode, &bh, OCFS2_BH_IGNORE_CACHE); } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); /* * If buffer is in jbd, then its checksum may not have been * computed as yet. */ if (!status && !buffer_jbd(bh)) status = ocfs2_validate_inode_block(osb->sb, bh); } if (status < 0) { mlog_errno(status); goto bail; } status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; /* * This is a code bug. Right now the caller needs to * understand whether it is asking for a system file inode or * not so the proper lock names can be built. */ mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), "Inode %llu: system file state is ambigous\n", (unsigned long long)args->fi_blkno); if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); ocfs2_populate_inode(inode, fe, 0); BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); status = 0; bail: if (can_lock) ocfs2_inode_unlock(inode, 0); if (status < 0) make_bad_inode(inode); if (args && bh) brelse(bh); return status; }
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]) { int status = 0; unsigned int i; struct buffer_head *bh; if (!nr) { mlog(ML_BH_IO, "No buffers will be read!\n"); goto bail; } for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { bhs[i] = sb_getblk(osb->sb, block++); if (bhs[i] == NULL) { status = -EIO; mlog_errno(status); goto bail; } } bh = bhs[i]; if (buffer_jbd(bh)) { mlog(ML_ERROR, "trying to sync read a jbd " "managed bh (blocknr = %llu), skipping\n", (unsigned long long)bh->b_blocknr); continue; } if (buffer_dirty(bh)) { /* This should probably be a BUG, or * at least return an error. */ mlog(ML_ERROR, "trying to sync read a dirty " "buffer! (blocknr = %llu), skipping\n", (unsigned long long)bh->b_blocknr); continue; } lock_buffer(bh); if (buffer_jbd(bh)) { mlog(ML_ERROR, "block %llu had the JBD bit set " "while I was in lock_buffer!", (unsigned long long)bh->b_blocknr); BUG(); } clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); } for (i = nr; i > 0; i--) { bh = bhs[i - 1]; if (buffer_jbd(bh)) { mlog(ML_ERROR, "the journal got the buffer while it was " "locked for io! (blocknr = %llu)\n", (unsigned long long)bh->b_blocknr); BUG(); } wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* Status won't be cleared from here on out, * so we can safely record this and loop back * to cleanup the other buffers. */ status = -EIO; put_bh(bh); bhs[i - 1] = NULL; } } bail: return status; }
static int ocfs2_wipe_inode(struct inode *inode, struct buffer_head *di_bh) { int status, orphaned_slot = -1; struct inode *orphan_dir_inode = NULL; struct buffer_head *orphan_dir_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { orphaned_slot = le16_to_cpu(di->i_orphaned_slot); status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); if (status) return status; orphan_dir_inode = ocfs2_get_system_file_inode(osb, ORPHAN_DIR_SYSTEM_INODE, orphaned_slot); if (!orphan_dir_inode) { status = -EEXIST; mlog_errno(status); goto bail; } /* Lock the orphan dir. The lock will be held for the entire * delete_inode operation. We do this now to avoid races with * recovery completion on other nodes. */ mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); if (status < 0) { mutex_unlock(&orphan_dir_inode->i_mutex); mlog_errno(status); goto bail; } } /* we do this while holding the orphan dir lock because we * don't want recovery being run from another node to try an * inode delete underneath us -- this will result in two nodes * truncating the same file! */ status = ocfs2_truncate_for_delete(osb, inode, di_bh); if (status < 0) { mlog_errno(status); goto bail_unlock_dir; } /* Remove any dir index tree */ if (S_ISDIR(inode->i_mode)) { status = ocfs2_dx_dir_truncate(inode, di_bh); if (status) { mlog_errno(status); goto bail_unlock_dir; } } /*Free extended attribute resources associated with this inode.*/ status = ocfs2_xattr_remove(inode, di_bh); if (status < 0) { mlog_errno(status); goto bail_unlock_dir; } status = ocfs2_remove_refcount_tree(inode, di_bh); if (status < 0) { mlog_errno(status); goto bail_unlock_dir; } status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, orphan_dir_bh); if (status < 0) mlog_errno(status); bail_unlock_dir: if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR) return status; ocfs2_inode_unlock(orphan_dir_inode, 1); mutex_unlock(&orphan_dir_inode->i_mutex); brelse(orphan_dir_bh); bail: iput(orphan_dir_inode); ocfs2_signal_wipe_completion(osb, orphaned_slot); return status; }
static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, struct page *page) { int ret; struct address_space *mapping = inode->i_mapping; loff_t pos = page_offset(page); unsigned int len = PAGE_CACHE_SIZE; pgoff_t last_index; struct page *locked_page = NULL; void *fsdata; loff_t size = i_size_read(inode); /* * Another node might have truncated while we were waiting on * cluster locks. */ last_index = size >> PAGE_CACHE_SHIFT; if (page->index > last_index) { ret = -EINVAL; goto out; } /* * The i_size check above doesn't catch the case where nodes * truncated and then re-extended the file. We'll re-check the * page mapping after taking the page lock inside of * ocfs2_write_begin_nolock(). */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) { /* * the page has been umapped in ocfs2_data_downconvert_worker. * So return 0 here and let VFS retry. */ ret = 0; goto out; } /* * Call ocfs2_write_begin() and ocfs2_write_end() to take * advantage of the allocation code there. We pass a write * length of the whole page (chopped to i_size) to make sure * the whole thing is allocated. * * Since we know the page is up to date, we don't have to * worry about ocfs2_write_begin() skipping some buffer reads * because the "write" would invalidate their data. */ if (page->index == last_index) len = size & ~PAGE_CACHE_MASK; ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, &fsdata, di_bh, page); if (ret) { if (ret != -ENOSPC) mlog_errno(ret); goto out; } ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, fsdata); if (ret < 0) { mlog_errno(ret); goto out; } BUG_ON(ret != len); ret = 0; out: return ret; }
static void ocfs2_delete_inode(struct inode *inode) { int wipe, status; sigset_t oldset; struct buffer_head *di_bh = NULL; trace_ocfs2_delete_inode(inode->i_ino, (unsigned long long)OCFS2_I(inode)->ip_blkno, is_bad_inode(inode)); /* When we fail in read_inode() we mark inode as bad. The second test * catches the case when inode allocation fails before allocating * a block for inode. */ if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) goto bail; dquot_initialize(inode); if (!ocfs2_inode_is_valid_to_delete(inode)) { /* It's probably not necessary to truncate_inode_pages * here but we do it for safety anyway (it will most * likely be a no-op anyway) */ ocfs2_cleanup_delete_inode(inode, 0); goto bail; } /* We want to block signals in delete_inode as the lock and * messaging paths may return us -ERESTARTSYS. Which would * cause us to exit early, resulting in inodes being orphaned * forever. */ ocfs2_block_signals(&oldset); /* * Synchronize us against ocfs2_get_dentry. We take this in * shared mode so that all nodes can still concurrently * process deletes. */ status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0); if (status < 0) { mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status); ocfs2_cleanup_delete_inode(inode, 0); goto bail_unblock; } /* Lock down the inode. This gives us an up to date view of * it's metadata (for verification), and allows us to * serialize delete_inode on multiple nodes. * * Even though we might be doing a truncate, we don't take the * allocation lock here as it won't be needed - nobody will * have the file open. */ status = ocfs2_inode_lock(inode, &di_bh, 1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); ocfs2_cleanup_delete_inode(inode, 0); goto bail_unlock_nfs_sync; } /* Query the cluster. This will be the final decision made * before we go ahead and wipe the inode. */ status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); if (!wipe || status < 0) { /* Error and remote inode busy both mean we won't be * removing the inode, so they take almost the same * path. */ if (status < 0) mlog_errno(status); /* Someone in the cluster has disallowed a wipe of * this inode, or it was never completely * orphaned. Write out the pages and exit now. */ ocfs2_cleanup_delete_inode(inode, 1); goto bail_unlock_inode; } ocfs2_cleanup_delete_inode(inode, 0); status = ocfs2_wipe_inode(inode, di_bh); if (status < 0) { if (status != -EDEADLK) mlog_errno(status); goto bail_unlock_inode; } /* * Mark the inode as successfully deleted. * * This is important for ocfs2_clear_inode() as it will check * this flag and skip any checkpointing work * * ocfs2_stuff_meta_lvb() also uses this flag to invalidate * the LVB for other nodes. */ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; bail_unlock_inode: ocfs2_inode_unlock(inode, 1); brelse(di_bh); bail_unlock_nfs_sync: ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); bail_unblock: ocfs2_unblock_signals(&oldset); bail: return; }
int ocfs2_load_local_alloc(struct ocfs2_super *osb) { int status = 0; struct ocfs2_dinode *alloc = NULL; struct buffer_head *alloc_bh = NULL; u32 num_used; struct inode *inode = NULL; struct ocfs2_local_alloc *la; if (osb->local_alloc_bits == 0) goto bail; if (osb->local_alloc_bits >= osb->bitmap_cpg) { mlog(ML_NOTICE, "Requested local alloc window %d is larger " "than max possible %u. Using defaults.\n", osb->local_alloc_bits, (osb->bitmap_cpg - 1)); osb->local_alloc_bits = ocfs2_megabytes_to_clusters(osb->sb, ocfs2_la_default_mb(osb)); } /* read the alloc off disk */ inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); if (!inode) { status = -EINVAL; mlog_errno(status); goto bail; } status = ocfs2_read_inode_block_full(inode, &alloc_bh, OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; } alloc = (struct ocfs2_dinode *) alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); if (!(le32_to_cpu(alloc->i_flags) & (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); status = -EINVAL; goto bail; } if ((la->la_size == 0) || (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", le16_to_cpu(la->la_size)); status = -EINVAL; goto bail; } /* do a little verification. */ num_used = ocfs2_local_alloc_count_bits(alloc); /* hopefully the local alloc has always been recovered before * we load it. */ if (num_used || alloc->id1.bitmap1.i_used || alloc->id1.bitmap1.i_total || la->la_bm_off) mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" "found = %u, set = %u, taken = %u, off = %u\n", num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), le32_to_cpu(alloc->id1.bitmap1.i_total), OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); osb->local_alloc_bh = alloc_bh; osb->local_alloc_state = OCFS2_LA_ENABLED; bail: if (status < 0) brelse(alloc_bh); if (inode) iput(inode); trace_ocfs2_load_local_alloc(osb->local_alloc_bits); if (status) mlog_errno(status); return status; }
/* * Extend the filesystem to the new number of clusters specified. This entry * point is only used to extend the current filesystem to the end of the last * existing group. */ int ocfs2_group_extend(struct inode * inode, int new_clusters) { int ret; handle_t *handle; struct buffer_head *main_bm_bh = NULL; struct buffer_head *group_bh = NULL; struct inode *main_bm_inode = NULL; struct ocfs2_dinode *fe = NULL; struct ocfs2_group_desc *group = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u16 cl_bpc; u32 first_new_cluster; u64 lgd_blkno; mlog_entry_void(); if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) return -EROFS; if (new_clusters < 0) return -EINVAL; else if (new_clusters == 0) return 0; main_bm_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!main_bm_inode) { ret = -EINVAL; mlog_errno(ret); goto out; } mutex_lock(&main_bm_inode->i_mutex); ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); if (ret < 0) { mlog_errno(ret); goto out_mutex; } fe = (struct ocfs2_dinode *)main_bm_bh->b_data; /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(), * so any corruption is a code bug. */ BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != ocfs2_group_bitmap_size(osb->sb) * 8) { mlog(ML_ERROR, "The disk is too old and small. " "Force to do offline resize."); ret = -EINVAL; goto out_unlock; } first_new_cluster = le32_to_cpu(fe->i_clusters); lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno, &group_bh); if (ret < 0) { mlog_errno(ret); goto out_unlock; } group = (struct ocfs2_group_desc *)group_bh->b_data; cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > le16_to_cpu(fe->id2.i_chain.cl_cpg)) { ret = -EINVAL; goto out_unlock; } mlog(0, "extend the last group at %llu, new clusters = %d\n", (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters); handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); ret = -EINVAL; goto out_unlock; } /* update the last group descriptor and inode. */ ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode, main_bm_bh, group_bh, first_new_cluster, new_clusters); if (ret) { mlog_errno(ret); goto out_commit; } ocfs2_update_super_and_backups(main_bm_inode, new_clusters); out_commit: ocfs2_commit_trans(osb, handle); out_unlock: brelse(group_bh); brelse(main_bm_bh); ocfs2_inode_unlock(main_bm_inode, 1); out_mutex: mutex_unlock(&main_bm_inode->i_mutex); iput(main_bm_inode); out: mlog_exit_void(); return ret; }
/* * return any unused bits to the bitmap and write out a clean * local_alloc. * * local_alloc_bh is optional. If not passed, we will simply use the * one off osb. If you do pass it however, be warned that it *will* be * returned brelse'd and NULL'd out.*/ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) { int status; handle_t *handle; struct inode *local_alloc_inode = NULL; struct buffer_head *bh = NULL; struct buffer_head *main_bm_bh = NULL; struct inode *main_bm_inode = NULL; struct ocfs2_dinode *alloc_copy = NULL; struct ocfs2_dinode *alloc = NULL; cancel_delayed_work(&osb->la_enable_wq); flush_workqueue(ocfs2_wq); if (osb->local_alloc_state == OCFS2_LA_UNUSED) goto out; local_alloc_inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); if (!local_alloc_inode) { status = -ENOENT; mlog_errno(status); goto out; } osb->local_alloc_state = OCFS2_LA_DISABLED; ocfs2_resmap_uninit(&osb->osb_la_resmap); main_bm_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!main_bm_inode) { status = -EINVAL; mlog_errno(status); goto out; } mutex_lock(&main_bm_inode->i_mutex); status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); if (status < 0) { mlog_errno(status); goto out_mutex; } /* WINDOW_MOVE_CREDITS is a bit heavy... */ handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); handle = NULL; goto out_unlock; } bh = osb->local_alloc_bh; alloc = (struct ocfs2_dinode *) bh->b_data; alloc_copy = kmalloc(bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; goto out_commit; } memcpy(alloc_copy, alloc, bh->b_size); status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out_commit; } ocfs2_clear_local_alloc(alloc); ocfs2_journal_dirty(handle, bh); brelse(bh); osb->local_alloc_bh = NULL; osb->local_alloc_state = OCFS2_LA_UNUSED; status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, main_bm_inode, main_bm_bh); if (status < 0) mlog_errno(status); out_commit: ocfs2_commit_trans(osb, handle); out_unlock: brelse(main_bm_bh); ocfs2_inode_unlock(main_bm_inode, 1); out_mutex: mutex_unlock(&main_bm_inode->i_mutex); iput(main_bm_inode); out: if (local_alloc_inode) iput(local_alloc_inode); kfree(alloc_copy); }
static int ocfs2_update_last_group_and_inode(handle_t *handle, struct inode *bm_inode, struct buffer_head *bm_bh, struct buffer_head *group_bh, u32 first_new_cluster, int new_clusters) { int ret = 0; struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb); struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data; struct ocfs2_chain_list *cl = &fe->id2.i_chain; struct ocfs2_chain_rec *cr; struct ocfs2_group_desc *group; u16 chain, num_bits, backups = 0; u16 cl_bpc = le16_to_cpu(cl->cl_bpc); u16 cl_cpg = le16_to_cpu(cl->cl_cpg); mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", new_clusters, first_new_cluster); ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out; } group = (struct ocfs2_group_desc *)group_bh->b_data; /* update the group first. */ num_bits = new_clusters * cl_bpc; le16_add_cpu(&group->bg_bits, num_bits); le16_add_cpu(&group->bg_free_bits_count, num_bits); /* * check whether there are some new backup superblocks exist in * this group and update the group bitmap accordingly. */ if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB)) { backups = ocfs2_calc_new_backup_super(bm_inode, group, new_clusters, first_new_cluster, cl_cpg, 1); le16_add_cpu(&group->bg_free_bits_count, -1 * backups); } ret = ocfs2_journal_dirty(handle, group_bh); if (ret < 0) { mlog_errno(ret); goto out_rollback; } /* update the inode accordingly. */ ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); goto out_rollback; } chain = le16_to_cpu(group->bg_chain); cr = (&cl->cl_recs[chain]); le32_add_cpu(&cr->c_total, num_bits); le32_add_cpu(&cr->c_free, num_bits); le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits); le32_add_cpu(&fe->i_clusters, new_clusters); if (backups) { le32_add_cpu(&cr->c_free, -1 * backups); le32_add_cpu(&fe->id1.bitmap1.i_used, backups); } spin_lock(&OCFS2_I(bm_inode)->ip_lock); OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits); spin_unlock(&OCFS2_I(bm_inode)->ip_lock); i_size_write(bm_inode, le64_to_cpu(fe->i_size)); ocfs2_journal_dirty(handle, bm_bh); out_rollback: if (ret < 0) { ocfs2_calc_new_backup_super(bm_inode, group, new_clusters, first_new_cluster, cl_cpg, 0); le16_add_cpu(&group->bg_free_bits_count, backups); le16_add_cpu(&group->bg_bits, -1 * num_bits); le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); } out: mlog_exit(ret); return ret; }
/* * We expect the block group allocator to already be locked. */ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct inode *alloc_inode, struct buffer_head *bh, u64 max_block, u64 *last_alloc_group, int flags) { int status, credits; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; struct ocfs2_chain_list *cl; struct ocfs2_alloc_context *ac = NULL; handle_t *handle = NULL; u16 alloc_rec; struct buffer_head *bg_bh = NULL; struct ocfs2_group_desc *bg; BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); cl = &fe->id2.i_chain; status = ocfs2_reserve_clusters_with_limit(osb, le16_to_cpu(cl->cl_cpg), max_block, flags, &ac); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } credits = ocfs2_calc_group_alloc_credits(osb->sb, le16_to_cpu(cl->cl_cpg)); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; mlog_errno(status); goto bail; } if (last_alloc_group && *last_alloc_group != 0) { trace_ocfs2_block_group_alloc( (unsigned long long)*last_alloc_group); ac->ac_last_group = *last_alloc_group; } bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, ac, cl); if (IS_ERR(bg_bh) && (PTR_ERR(bg_bh) == -ENOSPC)) bg_bh = ocfs2_block_group_alloc_discontig(handle, alloc_inode, ac, cl); if (IS_ERR(bg_bh)) { status = PTR_ERR(bg_bh); bg_bh = NULL; if (status != -ENOSPC) mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) bg_bh->b_data; status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } alloc_rec = le16_to_cpu(bg->bg_chain); le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno; if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) le16_add_cpu(&cl->cl_next_free_rec, 1); le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); ocfs2_journal_dirty(handle, bh); spin_lock(&OCFS2_I(alloc_inode)->ip_lock); OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, le32_to_cpu(fe->i_clusters))); spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0); status = 0; /* save the new last alloc group so that the caller can cache it. */ if (last_alloc_group) *last_alloc_group = ac->ac_last_group; bail: if (handle) ocfs2_commit_trans(osb, handle); if (ac) ocfs2_free_alloc_context(ac); brelse(bg_bh); if (status) mlog_errno(status); return status; }