/* * This is called to fill in the vector of log iovecs for the given inode * log item. It fills the first item with an inode log format structure, * the second with the on-disk inode structure, and a possible third and/or * fourth with the inode data/extents/b-tree root and inode attributes * data/extents/b-tree root. */ STATIC void xfs_inode_item_format( struct xfs_log_item *lip, struct xfs_log_vec *lv) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; ASSERT(ip->i_d.di_version > 1); ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); ilf->ilf_type = XFS_LI_INODE; ilf->ilf_ino = ip->i_ino; ilf->ilf_blkno = ip->i_imap.im_blkno; ilf->ilf_len = ip->i_imap.im_len; ilf->ilf_boffset = ip->i_imap.im_boffset; ilf->ilf_fields = XFS_ILOG_CORE; ilf->ilf_size = 2; /* format + core */ xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); xfs_inode_item_format_core(ip, lv, &vecp); xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); if (XFS_IFORK_Q(ip)) { xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); } else { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); } /* update the format with the exact fields we actually logged */ ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); }
int xfs_check_acl(struct inode *inode, int mask, unsigned int flags) { struct xfs_inode *ip; struct posix_acl *acl; int error = -EAGAIN; ip = XFS_I(inode); trace_xfs_check_acl(ip); /* * If there is no attribute fork no ACL exists on this inode and * we can skip the whole exercise. */ if (!XFS_IFORK_Q(ip)) return -EAGAIN; if (flags & IPERM_FLAG_RCU) { if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) return -ECHILD; return -EAGAIN; } acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); } return error; }
int xfs_check_acl(struct inode *inode, int mask) { struct xfs_inode *ip = XFS_I(inode); struct posix_acl *acl; int error = -EAGAIN; xfs_itrace_entry(ip); /* * If there is no attribute fork no ACL exists on this inode and * we can skip the whole exercise. */ if (!XFS_IFORK_Q(ip)) return -EAGAIN; acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); } return error; }
/* * Initialize the Linux inode. * * When reading existing inodes from disk this is called directly from xfs_iget, * when creating a new inode it is called from xfs_ialloc after setting up the * inode. These callers have different criteria for clearing XFS_INEW, so leave * it up to the caller to deal with unlocking the inode appropriately. */ void xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; gfp_t gfp_mask; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ hlist_add_fake(&inode->i_hash); inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); switch (inode->i_mode & S_IFMT) { case S_IFBLK: case S_IFCHR: inode->i_rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, sysv_minor(ip->i_df.if_u2.if_rdev)); break; default: inode->i_rdev = 0; break; } i_size_write(inode, ip->i_d.di_size); xfs_diflags_to_iflags(inode, ip); if (S_ISDIR(inode->i_mode)) { lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); ip->d_ops = ip->i_mount->m_dir_inode_ops; } else { ip->d_ops = ip->i_mount->m_nondir_inode_ops; lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); } /* * Ensure all page cache allocations are done from GFP_NOFS context to * prevent direct reclaim recursion back into the filesystem and blowing * stacks or deadlocking. */ gfp_mask = mapping_gfp_mask(inode->i_mapping); mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ if (!XFS_IFORK_Q(ip)) { inode_has_no_xattr(inode); cache_no_acl(inode); } }
/* * Writes a modified inode's changes out to the inode's on disk home. * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel. */ int libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp) { xfs_inode_log_item_t *iip; xfs_dinode_t *dip; xfs_mount_t *mp; ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); ASSERT(ip->i_d.di_version > 1); iip = ip->i_itemp; mp = ip->i_mount; /* set *dip = inode's place in the buffer */ dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC); if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) || (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) ); } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) || (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) || (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) ); } ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks); ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize); /* bump the change count on v3 inodes */ if (ip->i_d.di_version == 3) ip->i_d.di_changecount++; /* * Copy the dirty parts of the inode into the on-disk * inode. We always copy out the core of the inode, * because if the inode is dirty at all the core must * be. */ xfs_dinode_to_disk(dip, &ip->i_d); xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); if (XFS_IFORK_Q(ip)) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); /* update the lsn in the on disk inode if required */ if (ip->i_d.di_version == 3) dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn); /* generate the checksum. */ xfs_dinode_calc_crc(mp, dip); return 0; }
int xfs_inode_hasattr( struct xfs_inode *ip) { if (!XFS_IFORK_Q(ip) || (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_anextents == 0)) return 0; return 1; }
/* * This returns the number of iovecs needed to log the given inode item. * * We need one iovec for the inode log format structure, one for the * inode core, and possibly one for the inode data/extents/b-tree root * and one for the inode attribute data/extents/b-tree root. */ STATIC void xfs_inode_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; *nvecs += 2; *nbytes += sizeof(struct xfs_inode_log_format) + xfs_log_dinode_size(ip->i_d.di_version); xfs_inode_item_data_fork_size(iip, nvecs, nbytes); if (XFS_IFORK_Q(ip)) xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); }
STATIC int xfs_check_acl( struct inode *inode, int mask) { struct xfs_inode *ip = XFS_I(inode); int error; xfs_itrace_entry(ip); if (XFS_IFORK_Q(ip)) { error = xfs_acl_iaccess(ip, mask, NULL); if (error != -1) return -error; } return -EAGAIN; }
int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { xfs_mount_t *mp; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; int ilf_fields, tilf_fields; static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; int error = 0; int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; char locked = 0; mp = ip->i_mount; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); goto error0; } sbp = &sxp->sx_stat; /* * we have to do two separate lock calls here to keep lockdep * happy. If we try to get all the locks in one call, lock will * report false positives when we drop the ILOCK and regain them * below. */ xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); locked = 1; /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto error0; } if (VN_CACHED(VFS_I(tip)) != 0) { xfs_inval_cached_trace(tip, 0, -1, 0, -1); error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); if (error) goto error0; } /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto error0; } /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { xfs_fs_cmn_err(CE_NOTE, mp, "%s: inode 0x%llx format is incompatible for exchanging.", __FILE__, ip->i_ino); goto error0; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { error = XFS_ERROR(EBUSY); goto error0; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto error0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ xfs_tosspages(ip, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0))) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); locked = 0; goto error0; } xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) { xfs_trans_cancel(tp, 0); goto error0; } } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) { xfs_trans_cancel(tp, 0); goto error0; } } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; *tempifp = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = *tempifp; /* struct copy */ /* * Fix the in-memory data fork values that are dependent on the fork * offset in the inode. We can't assume they remain the same as attr2 * has dynamic fork offsets. */ ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) / (uint)sizeof(xfs_bmbt_rec_t); tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) / (uint)sizeof(xfs_bmbt_rec_t); /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; ilf_fields = XFS_ILOG_CORE; switch(ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } ilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ilf_fields |= XFS_ILOG_DBROOT; break; } tilf_fields = XFS_ILOG_CORE; switch(tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } tilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: tilf_fields |= XFS_ILOG_DBROOT; break; } IHOLD(ip); xfs_trans_ijoin(tp, ip, lock_flags); IHOLD(tip); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); xfs_trans_log_inode(tp, tip, tilf_fields); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(tp); } error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); locked = 0; error0: if (locked) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); } if (tempifp != NULL) kmem_free(tempifp); return error; }
int xfs_attr_set( struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags) { struct xfs_mount *mp = dp->i_mount; struct xfs_da_args args; struct xfs_bmap_free flist; struct xfs_trans_res tres; xfs_fsblock_t firstblock; int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, committed, local; XFS_STATS_INC(xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; args.value = value; args.valuelen = valuelen; args.firstblock = &firstblock; args.flist = &flist; args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; args.total = xfs_attr_calc_size(&args, &local); error = xfs_qm_dqattach(dp, 0); if (error) return error; /* * If the inode doesn't have an attribute fork, add one. * (inode must not be locked when we call this routine) */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(xfs_attr_sf_hdr_t) + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); if (error) return error; } /* * Start our first transaction of the day. * * All future transactions during this code must be "chained" off * this one via the trans_dup() call. All transactions will contain * the inode, and the inode will always be marked with trans_ihold(). * Since the inode will be locked in all transactions, we must log * the inode in every transaction to let it float upward through * the log. */ args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET); /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ if (rsvd) args.trans->t_flags |= XFS_TRANS_RESERVE; tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + M_RES(mp)->tr_attrsetrt.tr_logres * args.total; tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; error = xfs_trans_reserve(args.trans, &tres, args.total, 0); if (error) { xfs_trans_cancel(args.trans); return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans); return error; } xfs_trans_ijoin(args.trans, dp, 0); /* * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && dp->i_d.di_anextents == 0)) { /* * Build initial attribute list (if required). */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) xfs_attr_shortform_create(&args); /* * Try to add the attr to the attribute list in * the inode. */ error = xfs_attr_shortform_addname(&args); if (error != -ENOSPC) { /* * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ ASSERT(args.trans != NULL); /* * If this is a synchronous mount, make sure that * the transaction goes to disk before returning * to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if (!error && (flags & ATTR_KERNOTIME) == 0) { xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); } err2 = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error ? error : err2; } /* * It won't fit in the shortform, transform to a leaf block. * GROT: another possible req'mt for a double-split btree op. */ xfs_bmap_init(args.flist, args.firstblock); error = xfs_attr_shortform_to_leaf(&args); if (!error) { error = xfs_bmap_finish(&args.trans, args.flist, &committed); } if (error) { ASSERT(committed); args.trans = NULL; xfs_bmap_cancel(&flist); goto out; } /* * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ if (committed) xfs_trans_ijoin(args.trans, dp, 0); /* * Commit the leaf transformation. We'll need another (linked) * transaction to add the new attribute to the leaf. */ error = xfs_trans_roll(&args.trans, dp); if (error) goto out; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) error = xfs_attr_leaf_addname(&args); else error = xfs_attr_node_addname(&args); if (error) goto out; /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }
STATIC void xfs_inode_item_format( struct xfs_log_item *lip, struct xfs_log_iovec *vecp) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; uint nvecs; size_t data_bytes; xfs_mount_t *mp; vecp->i_addr = &iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); vecp->i_type = XLOG_REG_TYPE_IFORMAT; vecp++; nvecs = 1; vecp->i_addr = &ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); vecp->i_type = XLOG_REG_TYPE_ICORE; vecp++; nvecs++; /* */ mp = ip->i_mount; ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); if (ip->i_d.di_version == 1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); ip->i_d.di_onlink = ip->i_d.di_nlink; } else { /* */ ip->i_d.di_version = 2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } } switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DEXT) && ip->i_d.di_nextents > 0 && ip->i_df.if_bytes > 0) { ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); ASSERT(iip->ili_extents_buf == NULL); #ifdef XFS_NATIVE_HOST if (ip->i_d.di_nextents == ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { /* */ vecp->i_addr = ip->i_df.if_u1.if_extents; vecp->i_len = ip->i_df.if_bytes; vecp->i_type = XLOG_REG_TYPE_IEXT; } else #endif { xfs_inode_item_format_extents(ip, vecp, XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; vecp++; nvecs++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; } break; case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IBROOT; vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; } else { ASSERT(!(iip->ili_fields & XFS_ILOG_DBROOT)); #ifdef XFS_TRANS_DEBUG if (iip->ili_root_size > 0) { ASSERT(iip->ili_root_size == ip->i_df.if_broot_bytes); ASSERT(memcmp(iip->ili_orig_root, ip->i_df.if_broot, iip->ili_root_size) == 0); } else { ASSERT(ip->i_df.if_broot_bytes == 0); } #endif iip->ili_fields &= ~XFS_ILOG_DBROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); vecp->i_addr = ip->i_df.if_u1.if_data; /* */ data_bytes = roundup(ip->i_df.if_bytes, 4); ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_ILOCAL; vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; } else { iip->ili_fields &= ~XFS_ILOG_DDATA; } break; case XFS_DINODE_FMT_DEV: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_UUID); if (iip->ili_fields & XFS_ILOG_DEV) { iip->ili_format.ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; } break; case XFS_DINODE_FMT_UUID: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV); if (iip->ili_fields & XFS_ILOG_UUID) { iip->ili_format.ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; } break; default: ASSERT(0); break; } /* */ if (!XFS_IFORK_Q(ip)) { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); goto out; } switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); if ((iip->ili_fields & XFS_ILOG_AEXT) && ip->i_d.di_anextents > 0 && ip->i_afp->if_bytes > 0) { ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_u1.if_extents != NULL); #ifdef XFS_NATIVE_HOST /* */ vecp->i_addr = ip->i_afp->if_u1.if_extents; vecp->i_len = ip->i_afp->if_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; #else ASSERT(iip->ili_aextents_buf == NULL); xfs_inode_item_format_extents(ip, vecp, XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; } break; case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); if ((iip->ili_fields & XFS_ILOG_ABROOT) && ip->i_afp->if_broot_bytes > 0) { ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; } else { iip->ili_fields &= ~XFS_ILOG_ABROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) { ASSERT(ip->i_afp->if_u1.if_data != NULL); vecp->i_addr = ip->i_afp->if_u1.if_data; /* */ data_bytes = roundup(ip->i_afp->if_bytes, 4); ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; } else { iip->ili_fields &= ~XFS_ILOG_ADATA; } break; default: ASSERT(0); break; } out: /* */ iip->ili_format.ilf_fields = XFS_ILOG_CORE | (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); iip->ili_format.ilf_size = nvecs; }
/* * Get inode's extents as described in bmv, and format for output. * Calls formatter to fill the user's buffer until all extents * are mapped, until the passed-in bmv->bmv_count slots have * been filled, or until the formatter short-circuits the loop, * if it is tracking filled-in extents on its own. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, struct getbmapx *bmv, /* user bmap structure */ xfs_bmap_format_t formatter, /* format to user */ void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ int error = 0; /* return value */ __int64_t fixlen; /* length for -1 case */ int i; /* extent number */ int lock; /* lock state */ xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ int cur_ext = 0; mp = ip->i_mount; iflags = bmv->bmv_iflags; whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; if (whichfork == XFS_ATTR_FORK) { if (XFS_IFORK_Q(ip)) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); } else if (unlikely( ip->i_d.di_aformat != 0 && ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } prealloced = 0; fixlen = 1LL << 32; } else { if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; fixlen = mp->m_super->s_maxbytes; } else { prealloced = 0; fixlen = XFS_ISIZE(ip); } } if (bmv->bmv_length == -1) { fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); bmv->bmv_length = max_t(__int64_t, fixlen - bmv->bmv_offset, 0); } else if (bmv->bmv_length == 0) { bmv->bmv_entries = 0; return 0; } else if (bmv->bmv_length < 0) { return XFS_ERROR(EINVAL); } nex = bmv->bmv_count - 1; if (nex <= 0) return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) return XFS_ERROR(ENOMEM); out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); if (!out) return XFS_ERROR(ENOMEM); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK) { if (!(iflags & BMV_IF_DELALLOC) && (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out_unlock_iolock; /* * Even after flushing the inode, there can still be * delalloc blocks on the inode beyond EOF due to * speculative preallocation. These are not removed * until the release function is called or the inode * is inactivated. Hence we cannot assert here that * ip->i_delayed_blks == 0. */ } lock = xfs_ilock_data_map_shared(ip); } else { lock = xfs_ilock_attr_map_shared(ip); } /* * Don't let nex be bigger than the number of extents * we can have assuming alternating holes and real extents. */ if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; bmapi_flags = xfs_bmapi_aflag(whichfork); if (!(iflags & BMV_IF_PREALLOC)) bmapi_flags |= XFS_BMAPI_IGSTATE; /* * Allocate enough space to handle "subnex" maps at a time. */ error = ENOMEM; subnex = 16; map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) goto out_unlock_ilock; bmv->bmv_entries = 0; if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { error = 0; goto out_free_map; } nexleft = nex; do { nmap = (nexleft > subnex) ? subnex : nexleft; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); if (error) goto out_free_map; ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; else if (map[i].br_startblock == DELAYSTARTBLOCK) out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; out[cur_ext].bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out[cur_ext].bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); out[cur_ext].bmv_unused1 = 0; out[cur_ext].bmv_unused2 = 0; /* * delayed allocation extents that start beyond EOF can * occur due to speculative EOF allocation when the * delalloc extent is larger than the largest freespace * extent at conversion time. These extents cannot be * converted by data writeback, so can exist here even * if we are not supposed to be finding delalloc * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ out[cur_ext].bmv_oflags |= BMV_OF_LAST; goto out_free_map; } if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], prealloced, bmvend, map[i].br_startblock)) goto out_free_map; bmv->bmv_offset = out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); /* * In case we don't want to return the hole, * don't increase cur_ext so that we can reuse * it in the next loop. */ if ((iflags & BMV_IF_NO_HOLES) && map[i].br_startblock == HOLESTARTBLOCK) { memset(&out[cur_ext], 0, sizeof(out[cur_ext])); continue; } nexleft--; bmv->bmv_entries++; cur_ext++; } } while (nmap && nexleft && bmv->bmv_length); out_free_map: kmem_free(map); out_unlock_ilock: xfs_iunlock(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); for (i = 0; i < cur_ext; i++) { int full = 0; /* user array is full */ /* format results & advance arg */ error = formatter(&arg, &out[i], &full); if (error || full) break; } kmem_free(out); return error; }
/* * This is called to fill in the vector of log iovecs for the * given inode log item. It fills the first item with an inode * log format structure, the second with the on-disk inode structure, * and a possible third and/or fourth with the inode data/extents/b-tree * root and inode attributes data/extents/b-tree root. */ STATIC void xfs_inode_item_format( struct xfs_log_item *lip, struct xfs_log_iovec *vecp) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; uint nvecs; size_t data_bytes; xfs_mount_t *mp; vecp->i_addr = &iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); vecp->i_type = XLOG_REG_TYPE_IFORMAT; vecp++; nvecs = 1; /* * Clear i_update_core if the timestamps (or any other * non-transactional modification) need flushing/logging * and we're about to log them with the rest of the core. * * This is the same logic as xfs_iflush() but this code can't * run at the same time as xfs_iflush because we're in commit * processing here and so we have the inode lock held in * exclusive mode. Although it doesn't really matter * for the timestamps if both routines were to grab the * timestamps or not. That would be ok. * * We clear i_update_core before copying out the data. * This is for coordination with our timestamp updates * that don't hold the inode lock. They will always * update the timestamps BEFORE setting i_update_core, * so if we clear i_update_core after they set it we * are guaranteed to see their updates to the timestamps * either here. Likewise, if they set it after we clear it * here, we'll see it either on the next commit of this * inode or the next time the inode gets flushed via * xfs_iflush(). This depends on strongly ordered memory * semantics, but we have that. We use the SYNCHRONIZE * macro to make sure that the compiler does not reorder * the i_update_core access below the data copy below. */ if (ip->i_update_core) { ip->i_update_core = 0; SYNCHRONIZE(); } /* * Make sure to get the latest timestamps from the Linux inode. */ xfs_synchronize_times(ip); vecp->i_addr = &ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); vecp->i_type = XLOG_REG_TYPE_ICORE; vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; /* * If this is really an old format inode, then we need to * log it as such. This means that we have to copy the link * count from the new field to the old. We don't have to worry * about the new fields, because nothing trusts them as long as * the old inode version number is there. If the superblock already * has a new version number, then we don't bother converting back. */ mp = ip->i_mount; ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); if (ip->i_d.di_version == 1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* * Convert it back. */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); ip->i_d.di_onlink = ip->i_d.di_nlink; } else { /* * The superblock version has already been bumped, * so just make the conversion to the new inode * format permanent. */ ip->i_d.di_version = 2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } } switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_d.di_nextents > 0); ASSERT(iip->ili_extents_buf == NULL); ASSERT((ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) > 0); #ifdef XFS_NATIVE_HOST if (ip->i_d.di_nextents == ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { /* * There are no delayed allocation * extents, so just point to the * real extents array. */ vecp->i_addr = ip->i_df.if_u1.if_extents; vecp->i_len = ip->i_df.if_bytes; vecp->i_type = XLOG_REG_TYPE_IEXT; } else #endif { xfs_inode_item_format_extents(ip, vecp, XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; vecp++; nvecs++; } break; case XFS_DINODE_FMT_BTREE: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { ASSERT(ip->i_df.if_broot_bytes > 0); ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IBROOT; vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; } break; case XFS_DINODE_FMT_LOCAL: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); vecp->i_addr = ip->i_df.if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_ILOCAL; vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; } break; case XFS_DINODE_FMT_DEV: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DDATA | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { iip->ili_format.ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; } break; case XFS_DINODE_FMT_UUID: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DDATA | XFS_ILOG_DEV))); if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { iip->ili_format.ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; } break; default: ASSERT(0); break; } /* * If there are no attributes associated with the file, * then we're done. * Assert that no attribute-related log flags are set. */ if (!XFS_IFORK_Q(ip)) { ASSERT(nvecs == lip->li_desc->lid_size); iip->ili_format.ilf_size = nvecs; ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); return; } switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { #ifdef DEBUG int nrecs = ip->i_afp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(nrecs > 0); ASSERT(nrecs == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_extents != NULL); ASSERT(ip->i_d.di_anextents > 0); #endif #ifdef XFS_NATIVE_HOST /* * There are not delayed allocation extents * for attributes, so just point at the array. */ vecp->i_addr = ip->i_afp->if_u1.if_extents; vecp->i_len = ip->i_afp->if_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; #else ASSERT(iip->ili_aextents_buf == NULL); xfs_inode_item_format_extents(ip, vecp, XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; } break; case XFS_DINODE_FMT_BTREE: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { ASSERT(ip->i_afp->if_broot_bytes > 0); ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; } break; case XFS_DINODE_FMT_LOCAL: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_data != NULL); vecp->i_addr = ip->i_afp->if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; } break; default: ASSERT(0); break; } ASSERT(nvecs == lip->li_desc->lid_size); iip->ili_format.ilf_size = nvecs; }
int xfs_ifork_q(xfs_inode_t *ip) { return XFS_IFORK_Q(ip); }
int xfs_attr_set( struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags) { struct xfs_mount *mp = dp->i_mount; struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_trans_res tres; int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, local; XFS_STATS_INC(mp, xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; args.value = value; args.valuelen = valuelen; args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; args.total = xfs_attr_calc_size(&args, &local); error = xfs_qm_dqattach(dp); if (error) return error; /* * If the inode doesn't have an attribute fork, add one. * (inode must not be locked when we call this routine) */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(xfs_attr_sf_hdr_t) + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); if (error) return error; } tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + M_RES(mp)->tr_attrsetrt.tr_logres * args.total; tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ error = xfs_trans_alloc(mp, &tres, args.total, 0, rsvd ? XFS_TRANS_RESERVE : 0, &args.trans); if (error) return error; xfs_ilock(dp, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans); return error; } xfs_trans_ijoin(args.trans, dp, 0); /* * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && dp->i_d.di_anextents == 0)) { /* * Build initial attribute list (if required). */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) xfs_attr_shortform_create(&args); /* * Try to add the attr to the attribute list in * the inode. */ error = xfs_attr_shortform_addname(&args); if (error != -ENOSPC) { /* * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ ASSERT(args.trans != NULL); /* * If this is a synchronous mount, make sure that * the transaction goes to disk before returning * to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if (!error && (flags & ATTR_KERNOTIME) == 0) { xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); } err2 = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error ? error : err2; } /* * It won't fit in the shortform, transform to a leaf block. * GROT: another possible req'mt for a double-split btree op. */ error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out; /* * Prevent the leaf buffer from being unlocked so that a * concurrent AIL push cannot grab the half-baked leaf * buffer and run into problems with the write verifier. */ xfs_trans_bhold(args.trans, leaf_bp); error = xfs_defer_finish(&args.trans); if (error) goto out; /* * Commit the leaf transformation. We'll need another (linked) * transaction to add the new attribute to the leaf, which * means that we have to hold & join the leaf buffer here too. */ error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; xfs_trans_bjoin(args.trans, leaf_bp); leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) error = xfs_attr_leaf_addname(&args); else error = xfs_attr_node_addname(&args); if (error) goto out; /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: if (leaf_bp) xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }
STATIC uint xfs_inode_item_size( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; uint nvecs = 2; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: if ((iip->ili_fields & XFS_ILOG_DEXT) && ip->i_d.di_nextents > 0 && ip->i_df.if_bytes > 0) nvecs++; break; case XFS_DINODE_FMT_BTREE: if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) nvecs++; break; case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) nvecs++; break; case XFS_DINODE_FMT_DEV: case XFS_DINODE_FMT_UUID: break; default: ASSERT(0); break; } if (!XFS_IFORK_Q(ip)) return nvecs; /* */ switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: if ((iip->ili_fields & XFS_ILOG_AEXT) && ip->i_d.di_anextents > 0 && ip->i_afp->if_bytes > 0) nvecs++; break; case XFS_DINODE_FMT_BTREE: if ((iip->ili_fields & XFS_ILOG_ABROOT) && ip->i_afp->if_broot_bytes > 0) nvecs++; break; case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) nvecs++; break; default: ASSERT(0); break; } return nvecs; }
/* * xfs_attr_inactive kills all traces of an attribute fork on an inode. It * removes both the on-disk and in-memory inode fork. Note that this also has to * handle the condition of inodes without attributes but with an attribute fork * configured, so we can't use xfs_inode_hasattr() here. * * The in-memory attribute fork is removed even on error. */ int xfs_attr_inactive( struct xfs_inode *dp) { struct xfs_trans *trans; struct xfs_mount *mp; int cancel_flags = 0; int lock_mode = XFS_ILOCK_SHARED; int error = 0; mp = dp->i_mount; ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); xfs_ilock(dp, lock_mode); if (!XFS_IFORK_Q(dp)) goto out_destroy_fork; xfs_iunlock(dp, lock_mode); /* * Start our first transaction of the day. * * All future transactions during this code must be "chained" off * this one via the trans_dup() call. All transactions will contain * the inode, and the inode will always be marked with trans_ihold(). * Since the inode will be locked in all transactions, we must log * the inode in every transaction to let it float upward through * the log. */ lock_mode = 0; trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); if (error) goto out_cancel; lock_mode = XFS_ILOCK_EXCL; cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT; xfs_ilock(dp, lock_mode); if (!XFS_IFORK_Q(dp)) goto out_cancel; /* * No need to make quota reservations here. We expect to release some * blocks, not allocate, in the common case. */ xfs_trans_ijoin(trans, dp, 0); /* * Invalidate and truncate the attribute fork extents. Make sure the * fork actually has attributes as otherwise the invalidation has no * blocks to read and returns an error. In this case, just do the fork * removal below. */ if (xfs_inode_hasattr(dp) && dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); if (error) goto out_cancel; } /* Reset the attribute fork - this also destroys the in-core fork */ xfs_attr_fork_remove(dp, trans); error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, lock_mode); return error; out_cancel: xfs_trans_cancel(trans, cancel_flags); out_destroy_fork: /* kill the in-core attr fork before we drop the inode lock */ if (dp->i_afp) xfs_idestroy_fork(dp, XFS_ATTR_FORK); if (lock_mode) xfs_iunlock(dp, lock_mode); return error; }
/* * Writes a modified inode's changes out to the inode's on disk home. * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel. */ int libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp) { xfs_inode_log_item_t *iip; xfs_dinode_t *dip; xfs_mount_t *mp; ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); iip = ip->i_itemp; mp = ip->i_mount; /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); #ifdef DEBUG ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC); if ((ip->i_d.di_mode & IFMT) == IFREG) { ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) || (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) ); } else if ((ip->i_d.di_mode & IFMT) == IFDIR) { ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) || (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) || (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) ); } ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks); ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize); #endif /* * Copy the dirty parts of the inode into the on-disk * inode. We always copy out the core of the inode, * because if the inode is dirty at all the core must * be. */ xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1, ARCH_CONVERT); /* * If this is really an old format inode and the superblock version * has not been updated to support only new format inodes, then * convert back to the old inode format. If the superblock version * has been updated, then make the conversion permanent. */ ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || XFS_SB_VERSION_HASNLINK(&mp->m_sb)); if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { /* * Convert it back. */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); INT_SET(dip->di_core.di_onlink, ARCH_CONVERT, ip->i_d.di_nlink); } else { /* * The superblock version has already been bumped, * so just make the conversion to the new inode * format permanent. */ ip->i_d.di_version = XFS_DINODE_VERSION_2; INT_SET(dip->di_core.di_version, ARCH_CONVERT, XFS_DINODE_VERSION_2); ip->i_d.di_onlink = 0; INT_ZERO(dip->di_core.di_onlink, ARCH_CONVERT); bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad)); bzero(&(dip->di_core.di_pad[0]), sizeof(dip->di_core.di_pad)); ASSERT(ip->i_d.di_projid == 0); } } if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) return EFSCORRUPTED; if (XFS_IFORK_Q(ip)) { /* The only error from xfs_iflush_fork is on the data fork. */ xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); } return 0; }
/* * This is called to fill in the vector of log iovecs for the * given inode log item. It fills the first item with an inode * log format structure, the second with the on-disk inode structure, * and a possible third and/or fourth with the inode data/extents/b-tree * root and inode attributes data/extents/b-tree root. */ STATIC void xfs_inode_item_format( xfs_inode_log_item_t *iip, xfs_log_iovec_t *log_vector) { uint nvecs; xfs_log_iovec_t *vecp; xfs_inode_t *ip; size_t data_bytes; xfs_bmbt_rec_t *ext_buffer; int nrecs; xfs_mount_t *mp; ip = iip->ili_inode; vecp = log_vector; vecp->i_addr = (xfs_caddr_t)&iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); vecp++; nvecs = 1; /* * Clear i_update_core if the timestamps (or any other * non-transactional modification) need flushing/logging * and we're about to log them with the rest of the core. * * This is the same logic as xfs_iflush() but this code can't * run at the same time as xfs_iflush because we're in commit * processing here and so we have the inode lock held in * exclusive mode. Although it doesn't really matter * for the timestamps if both routines were to grab the * timestamps or not. That would be ok. * * We clear i_update_core before copying out the data. * This is for coordination with our timestamp updates * that don't hold the inode lock. They will always * update the timestamps BEFORE setting i_update_core, * so if we clear i_update_core after they set it we * are guaranteed to see their updates to the timestamps * either here. Likewise, if they set it after we clear it * here, we'll see it either on the next commit of this * inode or the next time the inode gets flushed via * xfs_iflush(). This depends on strongly ordered memory * semantics, but we have that. We use the SYNCHRONIZE * macro to make sure that the compiler does not reorder * the i_update_core access below the data copy below. */ if (ip->i_update_core) { ip->i_update_core = 0; SYNCHRONIZE(); } /* * We don't have to worry about re-ordering here because * the update_size field is protected by the inode lock * and we have that held in exclusive mode. */ if (ip->i_update_size) ip->i_update_size = 0; vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(xfs_dinode_core_t); vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; /* * If this is really an old format inode, then we need to * log it as such. This means that we have to copy the link * count from the new field to the old. We don't have to worry * about the new fields, because nothing trusts them as long as * the old inode version number is there. If the superblock already * has a new version number, then we don't bother converting back. */ mp = ip->i_mount; ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || XFS_SB_VERSION_HASNLINK(&mp->m_sb)); if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { /* * Convert it back. */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); ip->i_d.di_onlink = ip->i_d.di_nlink; } else { /* * The superblock version has already been bumped, * so just make the conversion to the new inode * format permanent. */ ip->i_d.di_version = XFS_DINODE_VERSION_2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } } switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) { ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_d.di_nextents > 0); ASSERT(iip->ili_extents_buf == NULL); nrecs = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(nrecs > 0); #if __BYTE_ORDER == __BIG_ENDIAN if (nrecs == ip->i_d.di_nextents) { /* * There are no delayed allocation * extents, so just point to the * real extents array. */ vecp->i_addr = (char *)(ip->i_df.if_u1.if_extents); vecp->i_len = ip->i_df.if_bytes; } else #endif { /* * There are delayed allocation extents * in the inode, or we need to convert * the extents to on disk format. * Use xfs_iextents_copy() * to copy only the real extents into * a separate buffer. We'll free the * buffer in the unlock routine. */ ext_buffer = kmem_alloc(ip->i_df.if_bytes, KM_SLEEP); iip->ili_extents_buf = ext_buffer; vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; vecp++; nvecs++; } break; case XFS_DINODE_FMT_BTREE: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) { ASSERT(ip->i_df.if_broot_bytes > 0); ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; } break; case XFS_DINODE_FMT_LOCAL: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) { ASSERT(ip->i_df.if_bytes > 0); ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; } break; case XFS_DINODE_FMT_DEV: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DDATA | XFS_ILOG_UUID))); if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { iip->ili_format.ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; } break; case XFS_DINODE_FMT_UUID: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DDATA | XFS_ILOG_DEV))); if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { iip->ili_format.ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; } break; default: ASSERT(0); break; } /* * If there are no attributes associated with the file, * then we're done. * Assert that no attribute-related log flags are set. */ if (!XFS_IFORK_Q(ip)) { ASSERT(nvecs == iip->ili_item.li_desc->lid_size); iip->ili_format.ilf_size = nvecs; ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); return; } switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_ABROOT))); if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) { ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_extents != NULL); ASSERT(ip->i_d.di_anextents > 0); #ifdef DEBUG nrecs = ip->i_afp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); #endif ASSERT(nrecs > 0); ASSERT(nrecs == ip->i_d.di_anextents); #if __BYTE_ORDER == __BIG_ENDIAN /* * There are not delayed allocation extents * for attributes, so just point at the array. */ vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents); vecp->i_len = ip->i_afp->if_bytes; #else ASSERT(iip->ili_aextents_buf == NULL); /* * Need to endian flip before logging */ ext_buffer = kmem_alloc(ip->i_afp->if_bytes, KM_SLEEP); iip->ili_aextents_buf = ext_buffer; vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; } break; case XFS_DINODE_FMT_BTREE: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ADATA | XFS_ILOG_AEXT))); if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) { ASSERT(ip->i_afp->if_broot_bytes > 0); ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; } break; case XFS_DINODE_FMT_LOCAL: ASSERT(!(iip->ili_format.ilf_fields & (XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) { ASSERT(ip->i_afp->if_bytes > 0); ASSERT(ip->i_afp->if_u1.if_data != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; } break; default: ASSERT(0); break; } ASSERT(nvecs == iip->ili_item.li_desc->lid_size); iip->ili_format.ilf_size = nvecs; }
void xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; inode_sb_list_add(inode); hlist_add_fake(&inode->i_hash); inode->i_mode = ip->i_d.di_mode; set_nlink(inode, ip->i_d.di_nlink); inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; switch (inode->i_mode & S_IFMT) { case S_IFBLK: case S_IFCHR: inode->i_rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, sysv_minor(ip->i_df.if_u2.if_rdev)); break; default: inode->i_rdev = 0; break; } inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; inode->i_fop = &xfs_file_operations; inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) inode->i_op = &xfs_dir_ci_inode_operations; else inode->i_op = &xfs_dir_inode_operations; inode->i_fop = &xfs_dir_file_operations; break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; if (!(ip->i_df.if_flags & XFS_IFINLINE)) inode->i_mapping->a_ops = &xfs_address_space_operations; break; default: inode->i_op = &xfs_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); break; } if (!XFS_IFORK_Q(ip)) { inode_has_no_xattr(inode); cache_no_acl(inode); } xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(inode); }
int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { xfs_mount_t *mp = ip->i_mount; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; int src_log_flags, target_log_flags; int error = 0; int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); goto out; } /* * we have to do two separate lock calls here to keep lockdep * happy. If we try to get all the locks in one call, lock will * report false positives when we drop the ILOCK and regain them * below. */ xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto out_unlock; } error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); if (error) goto out_unlock; truncate_pagecache_range(VFS_I(tip), 0, -1); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto out_unlock; } trace_xfs_swap_extent_before(ip, 0); trace_xfs_swap_extent_before(tip, 1); /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { xfs_notice(mp, "%s: inode 0x%llx format is incompatible for exchanging.", __func__, ip->i_ino); goto out_unlock; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { error = XFS_ERROR(EBUSY); goto out_unlock; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto out_unlock; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ truncate_pagecache_range(VFS_I(ip), 0, -1); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); goto out; } xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) goto out_trans_cancel; } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); /* * Before we've swapped the forks, lets set the owners of the forks * appropriately. We have to do this as we are demand paging the btree * buffers, and so the validation done on read will expect the owner * field to be correctly set. Once we change the owners, we can swap the * inode forks. * * Note the trickiness in setting the log flags - we set the owner log * flag on the opposite inode (i.e. the inode we are setting the new * owner to be) because once we swap the forks and log that, log * recovery is going to see the fork as owned by the swapped inode, * not the pre-swapped inodes. */ src_log_flags = XFS_ILOG_CORE; target_log_flags = XFS_ILOG_CORE; if (ip->i_d.di_version == 3 && ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { target_log_flags |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, tip->i_ino, NULL); if (error) goto out_trans_cancel; } if (tip->i_d.di_version == 3 && tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { src_log_flags |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, ip->i_ino, NULL); if (error) goto out_trans_cancel; } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; *tempifp = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = *tempifp; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; /* * The extents in the source inode could still contain speculative * preallocation beyond EOF (e.g. the file is open but not modified * while defrag is in progress). In that case, we need to copy over the * number of delalloc blocks the data fork in the source inode is * tracking beyond EOF so that when the fork is truncated away when the * temporary inode is unlinked we don't underrun the i_delayed_blks * counter on that inode. */ ASSERT(tip->i_delayed_blks == 0); tip->i_delayed_blks = ip->i_delayed_blks; ip->i_delayed_blks = 0; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } src_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ASSERT(ip->i_d.di_version < 3 || (src_log_flags & XFS_ILOG_DOWNER)); src_log_flags |= XFS_ILOG_DBROOT; break; } switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } target_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: target_log_flags |= XFS_ILOG_DBROOT; ASSERT(tip->i_d.di_version < 3 || (target_log_flags & XFS_ILOG_DOWNER)); break; } xfs_trans_log_inode(tp, ip, src_log_flags); xfs_trans_log_inode(tp, tip, target_log_flags); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); out: kmem_free(tempifp); return error; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); goto out; out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; }
/* * Initialize the Linux inode, set up the operation vectors and * unlock the inode. * * When reading existing inodes from disk this is called directly * from xfs_iget, when creating a new inode it is called from * xfs_ialloc after setting up the inode. * * We are always called with an uninitialised linux inode here. * We need to initialise the necessary fields and take a reference * on it. */ void xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ hlist_add_fake(&inode->i_hash); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; switch (inode->i_mode & S_IFMT) { case S_IFBLK: case S_IFCHR: inode->i_rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, sysv_minor(ip->i_df.if_u2.if_rdev)); break; default: inode->i_rdev = 0; break; } inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; inode->i_fop = &xfs_file_operations; inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) inode->i_op = &xfs_dir_ci_inode_operations; else inode->i_op = &xfs_dir_inode_operations; inode->i_fop = &xfs_dir_file_operations; break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; if (!(ip->i_df.if_flags & XFS_IFINLINE)) inode->i_mapping->a_ops = &xfs_address_space_operations; break; default: inode->i_op = &xfs_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); break; } /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ if (!XFS_IFORK_Q(ip)) { inode_has_no_xattr(inode); cache_no_acl(inode); } xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(inode); }
/* * Initialize the Linux inode, set up the operation vectors and * unlock the inode. * * When reading existing inodes from disk this is called directly * from xfs_iget, when creating a new inode it is called from * xfs_ialloc after setting up the inode. * * We are always called with an uninitialised linux inode here. * We need to initialise the necessary fields and take a reference * on it. */ void xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; gfp_t gfp_mask; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ hlist_add_fake(&inode->i_hash); inode->i_mode = ip->i_d.di_mode; set_nlink(inode, ip->i_d.di_nlink); inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); switch (inode->i_mode & S_IFMT) { case S_IFBLK: case S_IFCHR: inode->i_rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, sysv_minor(ip->i_df.if_u2.if_rdev)); break; default: inode->i_rdev = 0; break; } inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); ip->d_ops = ip->i_mount->m_nondir_inode_ops; lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; inode->i_fop = &xfs_file_operations; inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) inode->i_op = &xfs_dir_ci_inode_operations; else inode->i_op = &xfs_dir_inode_operations; inode->i_fop = &xfs_dir_file_operations; ip->d_ops = ip->i_mount->m_dir_inode_ops; break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; if (!(ip->i_df.if_flags & XFS_IFINLINE)) inode->i_mapping->a_ops = &xfs_address_space_operations; break; default: inode->i_op = &xfs_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); break; } /* * Ensure all page cache allocations are done from GFP_NOFS context to * prevent direct reclaim recursion back into the filesystem and blowing * stacks or deadlocking. */ gfp_mask = mapping_gfp_mask(inode->i_mapping); mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ if (!XFS_IFORK_Q(ip)) { inode_has_no_xattr(inode); cache_no_acl(inode); } xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(inode); }
/* * Syssgi interface for swapext */ int xfs_swapext( xfs_swapext_t __user *sxp) { xfs_swapext_t sx; xfs_inode_t *ip=NULL, *tip=NULL, *ips[2]; xfs_trans_t *tp; xfs_mount_t *mp; xfs_bstat_t *sbp; struct file *fp = NULL, *tfp = NULL; vnode_t *vp, *tvp; bhv_desc_t *bdp, *tbdp; vn_bhv_head_t *bhp, *tbhp; uint lock_flags=0; int ilf_fields, tilf_fields; int error = 0; xfs_ifork_t tempif, *ifp, *tifp; __uint64_t tmp; int aforkblks = 0; int taforkblks = 0; int locked = 0; if (copy_from_user(&sx, sxp, sizeof(sx))) return XFS_ERROR(EFAULT); /* Pull information for the target fd */ if (((fp = fget((int)sx.sx_fdtarget)) == NULL) || ((vp = LINVFS_GET_VP(fp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } bhp = VN_BHV_HEAD(vp); bdp = vn_bhv_lookup(bhp, &xfs_vnodeops); if (bdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { ip = XFS_BHVTOI(bdp); } if (((tfp = fget((int)sx.sx_fdtmp)) == NULL) || ((tvp = LINVFS_GET_VP(tfp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } tbhp = VN_BHV_HEAD(tvp); tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops); if (tbdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { tip = XFS_BHVTOI(tbdp); } if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); goto error0; } if (ip->i_ino == tip->i_ino) { error = XFS_ERROR(EINVAL); goto error0; } mp = ip->i_mount; sbp = &sx.sx_stat; if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); goto error0; } locked = 1; /* Lock in i_ino order */ if (ip->i_ino < tip->i_ino) { ips[0] = ip; ips[1] = tip; } else { ips[0] = tip; ips[1] = ip; } lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; xfs_lock_inodes(ips, 2, 0, lock_flags); /* Check permissions */ error = xfs_iaccess(ip, S_IWUSR, NULL); if (error) goto error0; error = xfs_iaccess(tip, S_IWUSR, NULL); if (error) goto error0; /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify both files are either real-time or non-realtime */ if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { error = XFS_ERROR(EINVAL); goto error0; } /* Should never get a local format */ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { error = XFS_ERROR(EINVAL); goto error0; } if (VN_CACHED(tvp) != 0) xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore), (xfs_off_t)0, 0, 0); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(tvp) != 0) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify all data are being swapped */ if (sx.sx_offset != 0 || sx.sx_length != ip->i_d.di_size || sx.sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto error0; } /* * If the target has extended attributes, the tmp file * must also in order to ensure the correct data fork * format. */ if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { error = XFS_ERROR(EINVAL); goto error0; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { error = XFS_ERROR(EBUSY); goto error0; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * VOP_READ (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(vp)) { error = XFS_ERROR(EBUSY); goto error0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0))) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); return error; } xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; tempif = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = tempif; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; ilf_fields = XFS_ILOG_CORE; switch(ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } ilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ilf_fields |= XFS_ILOG_DBROOT; break; } tilf_fields = XFS_ILOG_CORE; switch(tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } tilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: tilf_fields |= XFS_ILOG_DBROOT; break; } /* * Increment vnode ref counts since xfs_trans_commit & * xfs_trans_cancel will both unlock the inodes and * decrement the associated ref counts. */ VN_HOLD(vp); VN_HOLD(tvp); xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); xfs_trans_log_inode(tp, tip, tilf_fields); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(tp); } error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL); fput(fp); fput(tfp); return error; error0: if (locked) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); } if (fp != NULL) fput(fp); if (tfp != NULL) fput(tfp); return error; }
/* * This returns the number of iovecs needed to log the given inode item. * * We need one iovec for the inode log format structure, one for the * inode core, and possibly one for the inode data/extents/b-tree root * and one for the inode attribute data/extents/b-tree root. */ STATIC uint xfs_inode_item_size( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; uint nvecs = 2; /* * Only log the data/extents/b-tree root if there is something * left to log. */ iip->ili_format.ilf_fields |= XFS_ILOG_CORE; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: iip->ili_format.ilf_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) && (ip->i_d.di_nextents > 0) && (ip->i_df.if_bytes > 0)) { ASSERT(ip->i_df.if_u1.if_extents != NULL); nvecs++; } else { iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT; } break; case XFS_DINODE_FMT_BTREE: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); iip->ili_format.ilf_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) && (ip->i_df.if_broot_bytes > 0)) { ASSERT(ip->i_df.if_broot != NULL); nvecs++; } else { ASSERT(!(iip->ili_format.ilf_fields & XFS_ILOG_DBROOT)); #ifdef XFS_TRANS_DEBUG if (iip->ili_root_size > 0) { ASSERT(iip->ili_root_size == ip->i_df.if_broot_bytes); ASSERT(memcmp(iip->ili_orig_root, ip->i_df.if_broot, iip->ili_root_size) == 0); } else { ASSERT(ip->i_df.if_broot_bytes == 0); } #endif iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_format.ilf_fields &= ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) && (ip->i_df.if_bytes > 0)) { ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); nvecs++; } else { iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA; } break; case XFS_DINODE_FMT_DEV: iip->ili_format.ilf_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_UUID); break; case XFS_DINODE_FMT_UUID: iip->ili_format.ilf_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV); break; default: ASSERT(0); break; } /* * If there are no attributes associated with this file, * then there cannot be anything more to log. * Clear all attribute-related log flags. */ if (!XFS_IFORK_Q(ip)) { iip->ili_format.ilf_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); return nvecs; } /* * Log any necessary attribute data. */ switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: iip->ili_format.ilf_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) && (ip->i_d.di_anextents > 0) && (ip->i_afp->if_bytes > 0)) { ASSERT(ip->i_afp->if_u1.if_extents != NULL); nvecs++; } else { iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT; } break; case XFS_DINODE_FMT_BTREE: iip->ili_format.ilf_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) && (ip->i_afp->if_broot_bytes > 0)) { ASSERT(ip->i_afp->if_broot != NULL); nvecs++; } else { iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_format.ilf_fields &= ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) && (ip->i_afp->if_bytes > 0)) { ASSERT(ip->i_afp->if_u1.if_data != NULL); nvecs++; } else { iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA; } break; default: ASSERT(0); break; } return nvecs; }
/* * This is called to fill in the vector of log iovecs for the * given inode log item. It fills the first item with an inode * log format structure, the second with the on-disk inode structure, * and a possible third and/or fourth with the inode data/extents/b-tree * root and inode attributes data/extents/b-tree root. */ STATIC void xfs_inode_item_format( struct xfs_log_item *lip, struct xfs_log_iovec *vecp) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; uint nvecs; size_t data_bytes; xfs_mount_t *mp; vecp->i_addr = &iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); vecp->i_type = XLOG_REG_TYPE_IFORMAT; vecp++; nvecs = 1; vecp->i_addr = &ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); vecp->i_type = XLOG_REG_TYPE_ICORE; vecp++; nvecs++; /* * If this is really an old format inode, then we need to * log it as such. This means that we have to copy the link * count from the new field to the old. We don't have to worry * about the new fields, because nothing trusts them as long as * the old inode version number is there. If the superblock already * has a new version number, then we don't bother converting back. */ mp = ip->i_mount; ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); if (ip->i_d.di_version == 1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* * Convert it back. */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); ip->i_d.di_onlink = ip->i_d.di_nlink; } else { /* * The superblock version has already been bumped, * so just make the conversion to the new inode * format permanent. */ ip->i_d.di_version = 2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } } switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DEXT) && ip->i_d.di_nextents > 0 && ip->i_df.if_bytes > 0) { ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); ASSERT(iip->ili_extents_buf == NULL); #ifdef XFS_NATIVE_HOST if (ip->i_d.di_nextents == ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { /* * There are no delayed allocation * extents, so just point to the * real extents array. */ vecp->i_addr = ip->i_df.if_u1.if_extents; vecp->i_len = ip->i_df.if_bytes; vecp->i_type = XLOG_REG_TYPE_IEXT; } else #endif { xfs_inode_item_format_extents(ip, vecp, XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; vecp++; nvecs++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; } break; case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IBROOT; vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; } else { ASSERT(!(iip->ili_fields & XFS_ILOG_DBROOT)); #ifdef XFS_TRANS_DEBUG if (iip->ili_root_size > 0) { ASSERT(iip->ili_root_size == ip->i_df.if_broot_bytes); ASSERT(memcmp(iip->ili_orig_root, ip->i_df.if_broot, iip->ili_root_size) == 0); } else { ASSERT(ip->i_df.if_broot_bytes == 0); } #endif iip->ili_fields &= ~XFS_ILOG_DBROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); vecp->i_addr = ip->i_df.if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_ILOCAL; vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; } else { iip->ili_fields &= ~XFS_ILOG_DDATA; } break; case XFS_DINODE_FMT_DEV: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_UUID); if (iip->ili_fields & XFS_ILOG_DEV) { iip->ili_format.ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; } break; case XFS_DINODE_FMT_UUID: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV); if (iip->ili_fields & XFS_ILOG_UUID) { iip->ili_format.ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; } break; default: ASSERT(0); break; } /* * If there are no attributes associated with the file, then we're done. */ if (!XFS_IFORK_Q(ip)) { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); goto out; } switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); if ((iip->ili_fields & XFS_ILOG_AEXT) && ip->i_d.di_anextents > 0 && ip->i_afp->if_bytes > 0) { ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_u1.if_extents != NULL); #ifdef XFS_NATIVE_HOST /* * There are not delayed allocation extents * for attributes, so just point at the array. */ vecp->i_addr = ip->i_afp->if_u1.if_extents; vecp->i_len = ip->i_afp->if_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; #else ASSERT(iip->ili_aextents_buf == NULL); xfs_inode_item_format_extents(ip, vecp, XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; } break; case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); if ((iip->ili_fields & XFS_ILOG_ABROOT) && ip->i_afp->if_broot_bytes > 0) { ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; } else { iip->ili_fields &= ~XFS_ILOG_ABROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) { ASSERT(ip->i_afp->if_u1.if_data != NULL); vecp->i_addr = ip->i_afp->if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; } else { iip->ili_fields &= ~XFS_ILOG_ADATA; } break; default: ASSERT(0); break; } out: /* * Now update the log format that goes out to disk from the in-core * values. We always write the inode core to make the arithmetic * games in recovery easier, which isn't a big deal as just about any * transaction would dirty it anyway. */ iip->ili_format.ilf_fields = XFS_ILOG_CORE | (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); iip->ili_format.ilf_size = nvecs; }