static void ocfs2_delete_inode(struct inode *inode) { int wipe, status; sigset_t oldset; struct buffer_head *di_bh = NULL; trace_ocfs2_delete_inode(inode->i_ino, (unsigned long long)OCFS2_I(inode)->ip_blkno, is_bad_inode(inode)); /* When we fail in read_inode() we mark inode as bad. The second test * catches the case when inode allocation fails before allocating * a block for inode. */ if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) goto bail; dquot_initialize(inode); if (!ocfs2_inode_is_valid_to_delete(inode)) { /* It's probably not necessary to truncate_inode_pages * here but we do it for safety anyway (it will most * likely be a no-op anyway) */ ocfs2_cleanup_delete_inode(inode, 0); goto bail; } /* We want to block signals in delete_inode as the lock and * messaging paths may return us -ERESTARTSYS. Which would * cause us to exit early, resulting in inodes being orphaned * forever. */ ocfs2_block_signals(&oldset); /* * Synchronize us against ocfs2_get_dentry. We take this in * shared mode so that all nodes can still concurrently * process deletes. */ status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0); if (status < 0) { mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status); ocfs2_cleanup_delete_inode(inode, 0); goto bail_unblock; } /* Lock down the inode. This gives us an up to date view of * it's metadata (for verification), and allows us to * serialize delete_inode on multiple nodes. * * Even though we might be doing a truncate, we don't take the * allocation lock here as it won't be needed - nobody will * have the file open. */ status = ocfs2_inode_lock(inode, &di_bh, 1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); ocfs2_cleanup_delete_inode(inode, 0); goto bail_unlock_nfs_sync; } /* Query the cluster. This will be the final decision made * before we go ahead and wipe the inode. */ status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); if (!wipe || status < 0) { /* Error and remote inode busy both mean we won't be * removing the inode, so they take almost the same * path. */ if (status < 0) mlog_errno(status); /* Someone in the cluster has disallowed a wipe of * this inode, or it was never completely * orphaned. Write out the pages and exit now. */ ocfs2_cleanup_delete_inode(inode, 1); goto bail_unlock_inode; } ocfs2_cleanup_delete_inode(inode, 0); status = ocfs2_wipe_inode(inode, di_bh); if (status < 0) { if (status != -EDEADLK) mlog_errno(status); goto bail_unlock_inode; } /* * Mark the inode as successfully deleted. * * This is important for ocfs2_clear_inode() as it will check * this flag and skip any checkpointing work * * ocfs2_stuff_meta_lvb() also uses this flag to invalidate * the LVB for other nodes. */ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; bail_unlock_inode: ocfs2_inode_unlock(inode, 1); brelse(di_bh); bail_unlock_nfs_sync: ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); bail_unblock: ocfs2_unblock_signals(&oldset); bail: return; }
void ocfs2_delete_inode(struct inode *inode) { int wipe, status; sigset_t blocked, oldset; struct buffer_head *di_bh = NULL; mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); if (is_bad_inode(inode)) { mlog(0, "Skipping delete of bad inode\n"); goto bail; } if (!ocfs2_inode_is_valid_to_delete(inode)) { /* It's probably not necessary to truncate_inode_pages * here but we do it for safety anyway (it will most * likely be a no-op anyway) */ ocfs2_cleanup_delete_inode(inode, 0); goto bail; } /* We want to block signals in delete_inode as the lock and * messaging paths may return us -ERESTARTSYS. Which would * cause us to exit early, resulting in inodes being orphaned * forever. */ sigfillset(&blocked); status = sigprocmask(SIG_BLOCK, &blocked, &oldset); if (status < 0) { mlog_errno(status); ocfs2_cleanup_delete_inode(inode, 1); goto bail; } /* Lock down the inode. This gives us an up to date view of * it's metadata (for verification), and allows us to * serialize delete_inode votes. * * Even though we might be doing a truncate, we don't take the * allocation lock here as it won't be needed - nobody will * have the file open. */ status = ocfs2_meta_lock(inode, &di_bh, 1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); ocfs2_cleanup_delete_inode(inode, 0); goto bail_unblock; } /* Query the cluster. This will be the final decision made * before we go ahead and wipe the inode. */ status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); if (!wipe || status < 0) { /* Error and inode busy vote both mean we won't be * removing the inode, so they take almost the same * path. */ if (status < 0) mlog_errno(status); /* Someone in the cluster has voted to not wipe this * inode, or it was never completely orphaned. Write * out the pages and exit now. */ ocfs2_cleanup_delete_inode(inode, 1); goto bail_unlock_inode; } ocfs2_cleanup_delete_inode(inode, 0); status = ocfs2_wipe_inode(inode, di_bh); if (status < 0) { if (status != -EDEADLK) mlog_errno(status); goto bail_unlock_inode; } /* * Mark the inode as successfully deleted. * * This is important for ocfs2_clear_inode() as it will check * this flag and skip any checkpointing work * * ocfs2_stuff_meta_lvb() also uses this flag to invalidate * the LVB for other nodes. */ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; bail_unlock_inode: ocfs2_meta_unlock(inode, 1); brelse(di_bh); bail_unblock: status = sigprocmask(SIG_SETMASK, &oldset, NULL); if (status < 0) mlog_errno(status); bail: clear_inode(inode); mlog_exit_void(); }
/* Query the cluster to determine whether we should wipe an inode from * disk or not. * * Requires the inode to have the cluster lock. */ static int ocfs2_query_inode_wipe(struct inode *inode, struct buffer_head *di_bh, int *wipe) { int status = 0, reason = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di; *wipe = 0; trace_ocfs2_query_inode_wipe_begin((unsigned long long)oi->ip_blkno, inode->i_nlink); /* While we were waiting for the cluster lock in * ocfs2_delete_inode, another node might have asked to delete * the inode. Recheck our flags to catch this. */ if (!ocfs2_inode_is_valid_to_delete(inode)) { reason = 1; goto bail; } /* Now that we have an up to date inode, we can double check * the link count. */ if (inode->i_nlink) goto bail; /* Do some basic inode verification... */ di = (struct ocfs2_dinode *) di_bh->b_data; if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) && !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { /* * Inodes in the orphan dir must have ORPHANED_FL. The only * inodes that come back out of the orphan dir are reflink * targets. A reflink target may be moved out of the orphan * dir between the time we scan the directory and the time we * process it. This would lead to HAS_REFCOUNT_FL being set but * ORPHANED_FL not. */ if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { reason = 2; goto bail; } /* for lack of a better error? */ status = -EEXIST; mlog(ML_ERROR, "Inode %llu (on-disk %llu) not orphaned! " "Disk flags 0x%x, inode flags 0x%x\n", (unsigned long long)oi->ip_blkno, (unsigned long long)le64_to_cpu(di->i_blkno), le32_to_cpu(di->i_flags), oi->ip_flags); goto bail; } /* has someone already deleted us?! baaad... */ if (di->i_dtime) { status = -EEXIST; mlog_errno(status); goto bail; } /* * This is how ocfs2 determines whether an inode is still live * within the cluster. Every node takes a shared read lock on * the inode open lock in ocfs2_read_locked_inode(). When we * get to ->delete_inode(), each node tries to convert it's * lock to an exclusive. Trylocks are serialized by the inode * meta data lock. If the upconvert succeeds, we know the inode * is no longer live and can be deleted. * * Though we call this with the meta data lock held, the * trylock keeps us from ABBA deadlock. */ status = ocfs2_try_open_lock(inode, 1); if (status == -EAGAIN) { status = 0; reason = 3; goto bail; } if (status < 0) { mlog_errno(status); goto bail; } *wipe = 1; trace_ocfs2_query_inode_wipe_succ(le16_to_cpu(di->i_orphaned_slot)); bail: trace_ocfs2_query_inode_wipe_end(status, reason); return status; }
/* Query the cluster to determine whether we should wipe an inode from * disk or not. * * Requires the inode to have the cluster lock. */ static int ocfs2_query_inode_wipe(struct inode *inode, struct buffer_head *di_bh, int *wipe) { int status = 0; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di; *wipe = 0; /* While we were waiting for the cluster lock in * ocfs2_delete_inode, another node might have asked to delete * the inode. Recheck our flags to catch this. */ if (!ocfs2_inode_is_valid_to_delete(inode)) { mlog(0, "Skipping delete of %llu because flags changed\n", (unsigned long long)oi->ip_blkno); goto bail; } /* Now that we have an up to date inode, we can double check * the link count. */ if (inode->i_nlink) { mlog(0, "Skipping delete of %llu because nlink = %u\n", (unsigned long long)oi->ip_blkno, inode->i_nlink); goto bail; } /* Do some basic inode verification... */ di = (struct ocfs2_dinode *) di_bh->b_data; if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { /* for lack of a better error? */ status = -EEXIST; mlog(ML_ERROR, "Inode %llu (on-disk %llu) not orphaned! " "Disk flags 0x%x, inode flags 0x%x\n", (unsigned long long)oi->ip_blkno, (unsigned long long)le64_to_cpu(di->i_blkno), le32_to_cpu(di->i_flags), oi->ip_flags); goto bail; } /* has someone already deleted us?! baaad... */ if (di->i_dtime) { status = -EEXIST; mlog_errno(status); goto bail; } /* * This is how ocfs2 determines whether an inode is still live * within the cluster. Every node takes a shared read lock on * the inode open lock in ocfs2_read_locked_inode(). When we * get to ->delete_inode(), each node tries to convert it's * lock to an exclusive. Trylocks are serialized by the inode * meta data lock. If the upconvert suceeds, we know the inode * is no longer live and can be deleted. * * Though we call this with the meta data lock held, the * trylock keeps us from ABBA deadlock. */ status = ocfs2_try_open_lock(inode, 1); if (status == -EAGAIN) { status = 0; mlog(0, "Skipping delete of %llu because it is in use on " "other nodes\n", (unsigned long long)oi->ip_blkno); goto bail; } if (status < 0) { mlog_errno(status); goto bail; } *wipe = 1; mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n", (unsigned long long)oi->ip_blkno, le16_to_cpu(di->i_orphaned_slot)); bail: return status; }