Exemple #1
0
void
zfs_rmnode(znode_t *zp)
{
    zfs_sb_t	*zsb = ZTOZSB(zp);
    objset_t	*os = zsb->z_os;
    znode_t		*xzp = NULL;
    dmu_tx_t	*tx;
    uint64_t	acl_obj;
    uint64_t	xattr_obj;
    uint64_t	count;
    int		error;

    ASSERT(zp->z_links == 0);
    ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);

    /*
     * If this is an attribute directory, purge its contents.
     */
    if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
        error = zap_count(os, zp->z_id, &count);
        if (error) {
            zfs_znode_dmu_fini(zp);
            return;
        }

        if (count > 0) {
            taskq_t *taskq;

            /*
             * There are still directory entries in this xattr
             * directory.  Let zfs_unlinked_drain() deal with
             * them to avoid deadlocking this process in the
             * zfs_purgedir()->zfs_zget()->ilookup() callpath
             * on the xattr inode's I_FREEING bit.
             */
            taskq = dsl_pool_iput_taskq(dmu_objset_pool(os));
            taskq_dispatch(taskq, (task_func_t *)
                           zfs_unlinked_drain, zsb, TQ_SLEEP);

            zfs_znode_dmu_fini(zp);
            return;
        }
    }

    /*
     * Free up all the data in the file.
     */
    error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
    if (error) {
        /*
         * Not enough space.  Leave the file in the unlinked set.
         */
        zfs_znode_dmu_fini(zp);
        return;
    }

    /*
     * If the file has extended attributes, we're going to unlink
     * the xattr dir.
     */
    error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
                      &xattr_obj, sizeof (xattr_obj));
    if (error == 0 && xattr_obj) {
        error = zfs_zget(zsb, xattr_obj, &xzp);
        ASSERT(error == 0);
    }

    acl_obj = zfs_external_acl(zp);

    /*
     * Set up the final transaction.
     */
    tx = dmu_tx_create(os);
    dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
    dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
    if (xzp) {
        dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
        dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
    }
    if (acl_obj)
        dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);

    zfs_sa_upgrade_txholds(tx, zp);
    error = dmu_tx_assign(tx, TXG_WAIT);
    if (error) {
        /*
         * Not enough space to delete the file.  Leave it in the
         * unlinked set, leaking it until the fs is remounted (at
         * which point we'll call zfs_unlinked_drain() to process it).
         */
        dmu_tx_abort(tx);
        zfs_znode_dmu_fini(zp);
        goto out;
    }

    if (xzp) {
        ASSERT(error == 0);
        mutex_enter(&xzp->z_lock);
        xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
        xzp->z_links = 0;	/* no more links to it */
        VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
                              &xzp->z_links, sizeof (xzp->z_links), tx));
        mutex_exit(&xzp->z_lock);
        zfs_unlinked_add(xzp, tx);
    }

    /* Remove this znode from the unlinked set */
    VERIFY3U(0, ==,
             zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));

    zfs_znode_delete(zp, tx);

    dmu_tx_commit(tx);
out:
    if (xzp)
        iput(ZTOI(xzp));
}
Exemple #2
0
/*
 * Link zp into dl.  Can only fail if zp has been unlinked.
 */
int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
    znode_t *dzp = dl->dl_dzp;
    zfs_sb_t *zsb = ZTOZSB(zp);
    uint64_t value;
    int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
    sa_bulk_attr_t bulk[5];
    uint64_t mtime[2], ctime[2];
    int count = 0;
    int error;

    mutex_enter(&zp->z_lock);

    if (!(flag & ZRENAMING)) {
        if (zp->z_unlinked) {	/* no new links to unlinked zp */
            ASSERT(!(flag & (ZNEW | ZEXISTS)));
            mutex_exit(&zp->z_lock);
            return (ENOENT);
        }
        zp->z_links++;
        SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
                         &zp->z_links, sizeof (zp->z_links));

    }
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
                     &dzp->z_id, sizeof (dzp->z_id));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
                     &zp->z_pflags, sizeof (zp->z_pflags));

    if (!(flag & ZNEW)) {
        SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
                         ctime, sizeof (ctime));
        zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
                                ctime, B_TRUE);
    }
    error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
    ASSERT(error == 0);

    mutex_exit(&zp->z_lock);

    mutex_enter(&dzp->z_lock);
    dzp->z_size++;
    dzp->z_links += zp_is_dir;
    count = 0;
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
                     &dzp->z_size, sizeof (dzp->z_size));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
                     &dzp->z_links, sizeof (dzp->z_links));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
                     mtime, sizeof (mtime));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
                     ctime, sizeof (ctime));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
                     &dzp->z_pflags, sizeof (dzp->z_pflags));
    zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
    error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
    ASSERT(error == 0);
    mutex_exit(&dzp->z_lock);

    value = zfs_dirent(zp, zp->z_mode);
    error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
                    8, 1, &value, tx);
    ASSERT(error == 0);

    return (0);
}
Exemple #3
0
void
zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
{
	dmu_buf_t *db = sa_get_db(hdl);
	znode_t *zp = sa_get_userdata(hdl);
	zfs_sb_t *zsb = ZTOZSB(zp);
	int count = 0;
	sa_bulk_attr_t *bulk, *sa_attrs;
	zfs_acl_locator_cb_t locate = { 0 };
	uint64_t uid, gid, mode, rdev, xattr, parent;
	uint64_t crtime[2], mtime[2], ctime[2];
	zfs_acl_phys_t znode_acl;
	char scanstamp[AV_SCANSTAMP_SZ];
	boolean_t drop_lock = B_FALSE;

	/*
	 * No upgrade if ACL isn't cached
	 * since we won't know which locks are held
	 * and ready the ACL would require special "locked"
	 * interfaces that would be messy
	 */
	if (zp->z_acl_cached == NULL || S_ISLNK(ZTOI(zp)->i_mode))
		return;

	/*
	 * If the z_lock is held and we aren't the owner
	 * the just return since we don't want to deadlock
	 * trying to update the status of z_is_sa.  This
	 * file can then be upgraded at a later time.
	 *
	 * Otherwise, we know we are doing the
	 * sa_update() that caused us to enter this function.
	 */
	if (mutex_owner(&zp->z_lock) != curthread) {
		if (mutex_tryenter(&zp->z_lock) == 0)
			return;
		else
			drop_lock = B_TRUE;
	}

	/* First do a bulk query of the attributes that aren't cached */
	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * 20, KM_SLEEP);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zsb), NULL, &xattr, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zsb), NULL, &rdev, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL,
	    &znode_acl, 88);

	if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) {
		kmem_free(bulk, sizeof (sa_bulk_attr_t) * 20);
		goto done;
	}

	/*
	 * While the order here doesn't matter its best to try and organize
	 * it is such a way to pick up an already existing layout number
	 */
	count = 0;
	sa_attrs = kmem_zalloc(sizeof (sa_bulk_attr_t) * 20, KM_SLEEP);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zsb), NULL,
	    &zp->z_size, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zsb),
	    NULL, &zp->z_gen, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zsb),
	    NULL, &parent, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zsb), NULL,
	    &zp->z_pflags, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zsb), NULL,
	    zp->z_atime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zsb), NULL,
	    &mtime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zsb), NULL,
	    &ctime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zsb), NULL,
	    &crtime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zsb), NULL,
	    &zp->z_links, 8);
	if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zsb), NULL,
		    &rdev, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zsb), NULL,
	    &zp->z_acl_cached->z_acl_count, 8);

	if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
		zfs_acl_xform(zp, zp->z_acl_cached, CRED());

	locate.cb_aclp = zp->z_acl_cached;
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zsb),
	    zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);

	if (xattr)
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zsb),
		    NULL, &xattr, 8);

	/* if scanstamp then add scanstamp */

	if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
		abd_copy_to_buf_off(scanstamp, db->db_data,
		    AV_SCANSTAMP_SZ, ZFS_OLD_ZNODE_PHYS_SIZE);
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zsb),
		    NULL, scanstamp, AV_SCANSTAMP_SZ);
		zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
	}

	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
	VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
	    count, tx) == 0);
	if (znode_acl.z_acl_extern_obj)
		VERIFY(0 == dmu_object_free(zsb->z_os,
		    znode_acl.z_acl_extern_obj, tx));

	zp->z_is_sa = B_TRUE;
	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * 20);
	kmem_free(bulk, sizeof (sa_bulk_attr_t) * 20);
done:
	if (drop_lock)
		mutex_exit(&zp->z_lock);
}
Exemple #4
0
/*
 * Lock a directory entry.  A dirlock on <dzp, name> protects that name
 * in dzp's directory zap object.  As long as you hold a dirlock, you can
 * assume two things: (1) dzp cannot be reaped, and (2) no other thread
 * can change the zap entry for (i.e. link or unlink) this name.
 *
 * Input arguments:
 *	dzp	- znode for directory
 *	name	- name of entry to lock
 *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
 *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
 *		  ZSHARED: allow concurrent access with other ZSHARED callers.
 *		  ZXATTR: we want dzp's xattr directory
 *		  ZCILOOK: On a mixed sensitivity file system,
 *			   this lookup should be case-insensitive.
 *		  ZCIEXACT: On a purely case-insensitive file system,
 *			    this lookup should be case-sensitive.
 *		  ZRENAMING: we are locking for renaming, force narrow locks
 *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
 *			     current thread already holds it.
 *
 * Output arguments:
 *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
 *	dlpp	- pointer to the dirlock for this entry (NULL on error)
 *      direntflags - (case-insensitive lookup only)
 *		flags if multiple case-sensitive matches exist in directory
 *      realpnp     - (case-insensitive lookup only)
 *		actual name matched within the directory
 *
 * Return value: 0 on success or errno on failure.
 *
 * NOTE: Always checks for, and rejects, '.' and '..'.
 * NOTE: For case-insensitive file systems we take wide locks (see below),
 *	 but return znode pointers to a single match.
 */
int
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
                int flag, int *direntflags, pathname_t *realpnp)
{
    zfs_sb_t	*zsb = ZTOZSB(dzp);
    zfs_dirlock_t	*dl;
    boolean_t	update;
    boolean_t	exact;
    uint64_t	zoid;
#ifdef HAVE_DNLC
    vnode_t		*vp = NULL;
#endif /* HAVE_DNLC */
    int		error = 0;
    int		cmpflags;

    *zpp = NULL;
    *dlpp = NULL;

    /*
     * Verify that we are not trying to lock '.', '..', or '.zfs'
     */
    if ((name[0] == '.' &&
            (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
            (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
        return (EEXIST);

    /*
     * Case sensitivity and normalization preferences are set when
     * the file system is created.  These are stored in the
     * zsb->z_case and zsb->z_norm fields.  These choices
     * affect what vnodes can be cached in the DNLC, how we
     * perform zap lookups, and the "width" of our dirlocks.
     *
     * A normal dirlock locks a single name.  Note that with
     * normalization a name can be composed multiple ways, but
     * when normalized, these names all compare equal.  A wide
     * dirlock locks multiple names.  We need these when the file
     * system is supporting mixed-mode access.  It is sometimes
     * necessary to lock all case permutations of file name at
     * once so that simultaneous case-insensitive/case-sensitive
     * behaves as rationally as possible.
     */

    /*
     * Decide if exact matches should be requested when performing
     * a zap lookup on file systems supporting case-insensitive
     * access.
     */
    exact =
        ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
        ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));

    /*
     * Only look in or update the DNLC if we are looking for the
     * name on a file system that does not require normalization
     * or case folding.  We can also look there if we happen to be
     * on a non-normalizing, mixed sensitivity file system IF we
     * are looking for the exact name.
     *
     * Maybe can add TO-UPPERed version of name to dnlc in ci-only
     * case for performance improvement?
     */
    update = !zsb->z_norm ||
             ((zsb->z_case == ZFS_CASE_MIXED) &&
              !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));

    /*
     * ZRENAMING indicates we are in a situation where we should
     * take narrow locks regardless of the file system's
     * preferences for normalizing and case folding.  This will
     * prevent us deadlocking trying to grab the same wide lock
     * twice if the two names happen to be case-insensitive
     * matches.
     */
    if (flag & ZRENAMING)
        cmpflags = 0;
    else
        cmpflags = zsb->z_norm;

    /*
     * Wait until there are no locks on this name.
     *
     * Don't grab the the lock if it is already held. However, cannot
     * have both ZSHARED and ZHAVELOCK together.
     */
    ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
    if (!(flag & ZHAVELOCK))
        rw_enter(&dzp->z_name_lock, RW_READER);

    mutex_enter(&dzp->z_lock);
    for (;;) {
        if (dzp->z_unlinked) {
            mutex_exit(&dzp->z_lock);
            if (!(flag & ZHAVELOCK))
                rw_exit(&dzp->z_name_lock);
            return (ENOENT);
        }
        for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
            if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
                           U8_UNICODE_LATEST, &error) == 0) || error != 0)
                break;
        }
        if (error != 0) {
            mutex_exit(&dzp->z_lock);
            if (!(flag & ZHAVELOCK))
                rw_exit(&dzp->z_name_lock);
            return (ENOENT);
        }
        if (dl == NULL)	{
            /*
             * Allocate a new dirlock and add it to the list.
             */
            dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
            cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
            dl->dl_name = name;
            dl->dl_sharecnt = 0;
            dl->dl_namelock = 0;
            dl->dl_namesize = 0;
            dl->dl_dzp = dzp;
            dl->dl_next = dzp->z_dirlocks;
            dzp->z_dirlocks = dl;
            break;
        }
        if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
            break;
        cv_wait(&dl->dl_cv, &dzp->z_lock);
    }

    /*
     * If the z_name_lock was NOT held for this dirlock record it.
     */
    if (flag & ZHAVELOCK)
        dl->dl_namelock = 1;

    if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
        /*
         * We're the second shared reference to dl.  Make a copy of
         * dl_name in case the first thread goes away before we do.
         * Note that we initialize the new name before storing its
         * pointer into dl_name, because the first thread may load
         * dl->dl_name at any time.  He'll either see the old value,
         * which is his, or the new shared copy; either is OK.
         */
        dl->dl_namesize = strlen(dl->dl_name) + 1;
        name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
        bcopy(dl->dl_name, name, dl->dl_namesize);
        dl->dl_name = name;
    }

    mutex_exit(&dzp->z_lock);

    /*
     * We have a dirlock on the name.  (Note that it is the dirlock,
     * not the dzp's z_lock, that protects the name in the zap object.)
     * See if there's an object by this name; if so, put a hold on it.
     */
    if (flag & ZXATTR) {
        error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
                          sizeof (zoid));
        if (error == 0)
            error = (zoid == 0 ? ENOENT : 0);
    } else {
#ifdef HAVE_DNLC
        if (update)
            vp = dnlc_lookup(ZTOI(dzp), name);
        if (vp == DNLC_NO_VNODE) {
            iput(vp);
            error = ENOENT;
        } else if (vp) {
            if (flag & ZNEW) {
                zfs_dirent_unlock(dl);
                iput(vp);
                return (EEXIST);
            }
            *dlpp = dl;
            *zpp = VTOZ(vp);
            return (0);
        } else {
            error = zfs_match_find(zsb, dzp, name, exact,
                                   update, direntflags, realpnp, &zoid);
        }
#else
        error = zfs_match_find(zsb, dzp, name, exact,
                               update, direntflags, realpnp, &zoid);
#endif /* HAVE_DNLC */
    }
    if (error) {
        if (error != ENOENT || (flag & ZEXISTS)) {
            zfs_dirent_unlock(dl);
            return (error);
        }
    } else {
        if (flag & ZNEW) {
            zfs_dirent_unlock(dl);
            return (EEXIST);
        }
        error = zfs_zget(zsb, zoid, zpp);
        if (error) {
            zfs_dirent_unlock(dl);
            return (error);
        }
#ifdef HAVE_DNLC
        if (!(flag & ZXATTR) && update)
            dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
#endif /* HAVE_DNLC */
    }

    *dlpp = dl;

    return (0);
}
Exemple #5
0
/*
 * Create a new DMU object to hold a zfs znode.
 *
 *	IN:	dzp	- parent directory for new znode
 *		vap	- file attributes for new znode
 *		tx	- dmu transaction id for zap operations
 *		cr	- credentials of caller
 *		flag	- flags:
 *			  IS_ROOT_NODE	- new object will be root
 *			  IS_XATTR	- new object is an attribute
 *		bonuslen - length of bonus buffer
 *		setaclp  - File/Dir initial ACL
 *		fuidp	 - Tracks fuid allocation.
 *
 *	OUT:	zpp	- allocated znode
 *
 */
void
zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
{
	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
	uint64_t	mode, size, links, parent, pflags;
	uint64_t	dzp_pflags = 0;
	uint64_t	rdev = 0;
	zfs_sb_t	*zsb = ZTOZSB(dzp);
	dmu_buf_t	*db;
	timestruc_t	now;
	uint64_t	gen, obj;
	int		err;
	int		bonuslen;
	sa_handle_t	*sa_hdl;
	dmu_object_type_t obj_type;
	sa_bulk_attr_t	*sa_attrs;
	int		cnt = 0;
	zfs_acl_locator_cb_t locate = { 0 };

	if (zsb->z_replay) {
		obj = vap->va_nodeid;
		now = vap->va_ctime;		/* see zfs_replay_create() */
		gen = vap->va_nblocks;		/* ditto */
	} else {
		obj = 0;
		gethrestime(&now);
		gen = dmu_tx_get_txg(tx);
	}

	obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
	bonuslen = (obj_type == DMU_OT_SA) ?
	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;

	/*
	 * Create a new DMU object.
	 */
	/*
	 * There's currently no mechanism for pre-reading the blocks that will
	 * be needed to allocate a new object, so we accept the small chance
	 * that there will be an i/o error and we will fail one of the
	 * assertions below.
	 */
	if (S_ISDIR(vap->va_mode)) {
		if (zsb->z_replay) {
			err = zap_create_claim_norm(zsb->z_os, obj,
			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
			    obj_type, bonuslen, tx);
			ASSERT0(err);
		} else {
			obj = zap_create_norm(zsb->z_os,
			    zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
			    obj_type, bonuslen, tx);
		}
	} else {
		if (zsb->z_replay) {
			err = dmu_object_claim(zsb->z_os, obj,
			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
			    obj_type, bonuslen, tx);
			ASSERT0(err);
		} else {
			obj = dmu_object_alloc(zsb->z_os,
			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
			    obj_type, bonuslen, tx);
		}
	}

	ZFS_OBJ_HOLD_ENTER(zsb, obj);
	VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));

	/*
	 * If this is the root, fix up the half-initialized parent pointer
	 * to reference the just-allocated physical data area.
	 */
	if (flag & IS_ROOT_NODE) {
		dzp->z_id = obj;
	} else {
		dzp_pflags = dzp->z_pflags;
	}

	/*
	 * If parent is an xattr, so am I.
	 */
	if (dzp_pflags & ZFS_XATTR) {
		flag |= IS_XATTR;
	}

	if (zsb->z_use_fuids)
		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
	else
		pflags = 0;

	if (S_ISDIR(vap->va_mode)) {
		size = 2;		/* contents ("." and "..") */
		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
	} else {
		size = links = 0;
	}

	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
		rdev = vap->va_rdev;

	parent = dzp->z_id;
	mode = acl_ids->z_mode;
	if (flag & IS_XATTR)
		pflags |= ZFS_XATTR;

	/*
	 * No execs denied will be deterimed when zfs_mode_compute() is called.
	 */
	pflags |= acl_ids->z_aclp->z_hints &
	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);

	ZFS_TIME_ENCODE(&now, crtime);
	ZFS_TIME_ENCODE(&now, ctime);

	if (vap->va_mask & ATTR_ATIME) {
		ZFS_TIME_ENCODE(&vap->va_atime, atime);
	} else {
		ZFS_TIME_ENCODE(&now, atime);
	}

	if (vap->va_mask & ATTR_MTIME) {
		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
	} else {
		ZFS_TIME_ENCODE(&now, mtime);
	}

	/* Now add in all of the "SA" attributes */
	VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
	    &sa_hdl));

	/*
	 * Setup the array of attributes to be replaced/set on the new file
	 *
	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
	 * in the old znode_phys_t format.  Don't change this ordering
	 */
	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_PUSHPAGE);

	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
		    NULL, &atime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
		    NULL, &mtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
		    NULL, &ctime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
		    NULL, &crtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
		    NULL, &gen, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
		    NULL, &mode, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
		    NULL, &size, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
		    NULL, &parent, 8);
	} else {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
		    NULL, &mode, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
		    NULL, &size, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
		    NULL, &gen, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
		    NULL, &acl_ids->z_fuid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
		    NULL, &acl_ids->z_fgid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
		    NULL, &parent, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
		    NULL, &pflags, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
		    NULL, &atime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
		    NULL, &mtime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
		    NULL, &ctime, 16);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
		    NULL, &crtime, 16);
	}

	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);

	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
		    &empty_xattr, 8);
	}
	if (obj_type == DMU_OT_ZNODE ||
	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
		    NULL, &rdev, 8);
	}
	if (obj_type == DMU_OT_ZNODE) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
		    NULL, &pflags, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
		    &acl_ids->z_fuid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
		    &acl_ids->z_fgid, 8);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
		    sizeof (uint64_t) * 4);
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
		    &acl_phys, sizeof (zfs_acl_phys_t));
	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
		    &acl_ids->z_aclp->z_acl_count, 8);
		locate.cb_aclp = acl_ids->z_aclp;
		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
		    zfs_acl_data_locator, &locate,
		    acl_ids->z_aclp->z_acl_bytes);
		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
		    acl_ids->z_fuid, acl_ids->z_fgid);
	}

	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);

	if (!(flag & IS_ROOT_NODE)) {
		*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
		    ZTOI(dzp));
		VERIFY(*zpp != NULL);
		VERIFY(dzp != NULL);
	} else {
		/*
		 * If we are creating the root node, the "parent" we
		 * passed in is the znode for the root.
		 */
		*zpp = dzp;

		(*zpp)->z_sa_hdl = sa_hdl;
	}

	(*zpp)->z_pflags = pflags;
	(*zpp)->z_mode = mode;

	if (obj_type == DMU_OT_ZNODE ||
	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
		err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
		ASSERT0(err);
	}
	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
	ZFS_OBJ_HOLD_EXIT(zsb, obj);
}
Exemple #6
0
/*
 * Reopen zfs_sb_t and release VFS ops.
 */
int
zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
{
	int err, err2;

	ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock));
	ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock));

	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os);
	if (err) {
		zsb->z_os = NULL;
	} else {
		znode_t *zp;
		uint64_t sa_obj = 0;

		err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
		    ZFS_SA_ATTRS, 8, 1, &sa_obj);

		if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA)
			goto bail;


		if ((err = sa_setup(zsb->z_os, sa_obj,
		    zfs_attr_table,  ZPL_END, &zsb->z_attr_table)) != 0)
			goto bail;

		VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
		zsb->z_rollback_time = jiffies;

		/*
		 * Attempt to re-establish all the active inodes with their
		 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
		 * and mark it stale.  This prevents a collision if a new
		 * inode/object is created which must use the same inode
		 * number.  The stale inode will be be released when the
		 * VFS prunes the dentry holding the remaining references
		 * on the stale inode.
		 */
		mutex_enter(&zsb->z_znodes_lock);
		for (zp = list_head(&zsb->z_all_znodes); zp;
		    zp = list_next(&zsb->z_all_znodes, zp)) {
			err2 = zfs_rezget(zp);
			if (err2) {
				remove_inode_hash(ZTOI(zp));
				zp->z_is_stale = B_TRUE;
			}
		}
		mutex_exit(&zsb->z_znodes_lock);
	}

bail:
	/* release the VFS ops */
	rw_exit(&zsb->z_teardown_inactive_lock);
	rrw_exit(&zsb->z_teardown_lock, FTAG);

	if (err) {
		/*
		 * Since we couldn't reopen zfs_sb_t or, setup the
		 * sa framework, force unmount this file system.
		 */
		if (zsb->z_os)
			(void) zfs_umount(zsb->z_sb);
	}
	return (err);
}
Exemple #7
0
int
zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
{
	zfs_sb_t	*zsb = sb->s_fs_info;
	znode_t		*zp;
	uint64_t	object = 0;
	uint64_t	fid_gen = 0;
	uint64_t	gen_mask;
	uint64_t	zp_gen;
	int		i, err;

	*ipp = NULL;

	ZFS_ENTER(zsb);

	if (fidp->fid_len == LONG_FID_LEN) {
		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
		uint64_t	objsetid = 0;
		uint64_t	setgen = 0;

		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);

		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);

		ZFS_EXIT(zsb);

		err = zfsctl_lookup_objset(sb, objsetid, &zsb);
		if (err)
			return (SET_ERROR(EINVAL));

		ZFS_ENTER(zsb);
	}

	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
		zfid_short_t	*zfid = (zfid_short_t *)fidp;

		for (i = 0; i < sizeof (zfid->zf_object); i++)
			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);

		for (i = 0; i < sizeof (zfid->zf_gen); i++)
			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
	} else {
		ZFS_EXIT(zsb);
		return (SET_ERROR(EINVAL));
	}

	/* A zero fid_gen means we are in the .zfs control directories */
	if (fid_gen == 0 &&
	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
		*ipp = zsb->z_ctldir;
		ASSERT(*ipp != NULL);
		if (object == ZFSCTL_INO_SNAPDIR) {
			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
			    0, kcred, NULL, NULL) == 0);
		} else {
			igrab(*ipp);
		}
		ZFS_EXIT(zsb);
		return (0);
	}

	gen_mask = -1ULL >> (64 - 8 * i);

	dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
	if ((err = zfs_zget(zsb, object, &zp))) {
		ZFS_EXIT(zsb);
		return (err);
	}
	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &zp_gen,
	    sizeof (uint64_t));
	zp_gen = zp_gen & gen_mask;
	if (zp_gen == 0)
		zp_gen = 1;
	if (zp->z_unlinked || zp_gen != fid_gen) {
		dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
		    fid_gen);
		iput(ZTOI(zp));
		ZFS_EXIT(zsb);
		return (SET_ERROR(EINVAL));
	}

	*ipp = ZTOI(zp);
	if (*ipp)
		zfs_inode_update(ITOZ(*ipp));

	ZFS_EXIT(zsb);
	return (0);
}
Exemple #8
0
/*
 * Free space in a file
 *
 *	IN:	zp	- znode of file to free data in.
 *		off	- start of range
 *		len	- end of range (0 => EOF)
 *		flag	- current file open mode flags.
 *		log	- TRUE if this action should be logged
 *
 * 	RETURN:	0 on success, error code on failure
 */
int
zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
{
	struct inode *ip = ZTOI(zp);
	dmu_tx_t *tx;
	zfs_sb_t *zsb = ZTOZSB(zp);
	zilog_t *zilog = zsb->z_log;
	uint64_t mode;
	uint64_t mtime[2], ctime[2];
	sa_bulk_attr_t bulk[3];
	int count = 0;
	int error;

	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode,
	    sizeof (mode))) != 0)
		return (error);

	if (off > zp->z_size) {
		error =  zfs_extend(zp, off+len);
		if (error == 0 && log)
			goto log;
		else
			return (error);
	}

	/*
	 * Check for any locks in the region to be freed.
	 */
	if (ip->i_flock && mandatory_lock(ip)) {
		uint64_t length = (len ? len : zp->z_size - off);
		if (!lock_may_write(ip, off, length))
			return (SET_ERROR(EAGAIN));
	}

	if (len == 0) {
		error = zfs_trunc(zp, off);
	} else {
		if ((error = zfs_free_range(zp, off, len)) == 0 &&
		    off + len > zp->z_size)
			error = zfs_extend(zp, off+len);
	}
	if (error || !log)
		return (error);
log:
	tx = dmu_tx_create(zsb->z_os);
	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
	zfs_sa_upgrade_txholds(tx, zp);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		dmu_tx_abort(tx);
		return (error);
	}

	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
	    NULL, &zp->z_pflags, 8);
	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
	ASSERT(error == 0);

	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);

	dmu_tx_commit(tx);
	zfs_inode_update(zp);
	return (0);
}
Exemple #9
0
/*
 * Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
 * TK_MKXATTR transactions.
 *
 * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID
 * domain information appended prior to the name.  In this case the
 * uid/gid in the log record will be a log centric FUID.
 *
 * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that
 * may contain attributes, ACL and optional fuid information.
 *
 * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify
 * and ACL and normal users/groups in the ACEs.
 *
 * There may be an optional xvattr attribute information similar
 * to zfs_log_setattr.
 *
 * Also, after the file name "domain" strings may be appended.
 */
void
zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
    znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
    zfs_fuid_info_t *fuidp, vattr_t *vap)
{
	itx_t *itx;
	lr_create_t *lr;
	lr_acl_create_t *lracl;
	size_t aclsize = 0;
	size_t xvatsize = 0;
	size_t txsize;
	xvattr_t *xvap = (xvattr_t *)vap;
	void *end;
	size_t lrsize;
	size_t namesize = strlen(name) + 1;
	size_t fuidsz = 0;

	if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
		return;

	/*
	 * If we have FUIDs present then add in space for
	 * domains and ACE fuid's if any.
	 */
	if (fuidp) {
		fuidsz += fuidp->z_domain_str_sz;
		fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t);
	}

	if (vap->va_mask & ATTR_XVATTR)
		xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize);

	if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
	    (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR ||
	    (int)txtype == TX_MKXATTR) {
		txsize = sizeof (*lr) + namesize + fuidsz + xvatsize;
		lrsize = sizeof (*lr);
	} else {
		txsize =
		    sizeof (lr_acl_create_t) + namesize + fuidsz +
		    ZIL_ACE_LENGTH(aclsize) + xvatsize;
		lrsize = sizeof (lr_acl_create_t);
	}

	itx = zil_itx_create(txtype, txsize);

	lr = (lr_create_t *)&itx->itx_lr;
	lr->lr_doid = dzp->z_id;
	lr->lr_foid = zp->z_id;
	/* Store dnode slot count in 8 bits above object id. */
	LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT);
	lr->lr_mode = zp->z_mode;
	if (!IS_EPHEMERAL(KUID_TO_SUID(ZTOI(zp)->i_uid))) {
		lr->lr_uid = (uint64_t)KUID_TO_SUID(ZTOI(zp)->i_uid);
	} else {
		lr->lr_uid = fuidp->z_fuid_owner;
	}
	if (!IS_EPHEMERAL(KGID_TO_SGID(ZTOI(zp)->i_gid))) {
		lr->lr_gid = (uint64_t)KGID_TO_SGID(ZTOI(zp)->i_gid);
	} else {
		lr->lr_gid = fuidp->z_fuid_group;
	}
	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
	    sizeof (uint64_t));
	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
	    lr->lr_crtime, sizeof (uint64_t) * 2);

	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev,
	    sizeof (lr->lr_rdev)) != 0)
		lr->lr_rdev = 0;

	/*
	 * Fill in xvattr info if any
	 */
	if (vap->va_mask & ATTR_XVATTR) {
		zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
		end = (caddr_t)lr + lrsize + xvatsize;
	} else {
		end = (caddr_t)lr + lrsize;
	}

	/* Now fill in any ACL info */

	if (vsecp) {
		lracl = (lr_acl_create_t *)&itx->itx_lr;
		lracl->lr_aclcnt = vsecp->vsa_aclcnt;
		lracl->lr_acl_bytes = aclsize;
		lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0;
		lracl->lr_fuidcnt  = fuidp ? fuidp->z_fuid_cnt : 0;
		if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS)
			lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags;
		else
			lracl->lr_acl_flags = 0;

		bcopy(vsecp->vsa_aclentp, end, aclsize);
		end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize);
	}

	/* drop in FUID info */
	if (fuidp) {
		end = zfs_log_fuid_ids(fuidp, end);
		end = zfs_log_fuid_domains(fuidp, end);
	}
	/*
	 * Now place file name in log record
	 */
	bcopy(name, end, namesize);

	zil_itx_assign(zilog, itx, tx);
}
Exemple #10
0
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
	dmu_object_info_t doi;
	dmu_buf_t	*db;
	znode_t		*zp;
	int err;
	sa_handle_t	*hdl;

	*zpp = NULL;

again:
	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (SET_ERROR(EINVAL));
	}

	hdl = dmu_buf_get_user(db);
	if (hdl != NULL) {
		zp = sa_get_userdata(hdl);


		/*
		 * Since "SA" does immediate eviction we
		 * should never find a sa handle that doesn't
		 * know about the znode.
		 */

		ASSERT3P(zp, !=, NULL);

		mutex_enter(&zp->z_lock);
		ASSERT3U(zp->z_id, ==, obj_num);
		if (zp->z_unlinked) {
			err = SET_ERROR(ENOENT);
		} else {
			/*
			 * If igrab() returns NULL the VFS has independently
			 * determined the inode should be evicted and has
			 * called iput_final() to start the eviction process.
			 * The SA handle is still valid but because the VFS
			 * requires that the eviction succeed we must drop
			 * our locks and references to allow the eviction to
			 * complete.  The zfs_zget() may then be retried.
			 *
			 * This unlikely case could be optimized by registering
			 * a sops->drop_inode() callback.  The callback would
			 * need to detect the active SA hold thereby informing
			 * the VFS that this inode should not be evicted.
			 */
			if (igrab(ZTOI(zp)) == NULL) {
				mutex_exit(&zp->z_lock);
				sa_buf_rele(db, NULL);
				ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
				goto again;
			}
			*zpp = zp;
			err = 0;
		}
		mutex_exit(&zp->z_lock);
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		return (err);
	}
Exemple #11
0
static int
zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
{
	char *data = (char *)(lr + 1);	/* data follows lr_write_t */
	znode_t	*zp;
	int error;
	uint64_t eod, offset, length;

	if (byteswap)
		byteswap_uint64_array(lr, sizeof (*lr));

	if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) {
		/*
		 * As we can log writes out of order, it's possible the
		 * file has been removed. In this case just drop the write
		 * and return success.
		 */
		if (error == ENOENT)
			error = 0;
		return (error);
	}

	offset = lr->lr_offset;
	length = lr->lr_length;
	eod = offset + length;	/* end of data for this write */

	/*
	 * This may be a write from a dmu_sync() for a whole block,
	 * and may extend beyond the current end of the file.
	 * We can't just replay what was written for this TX_WRITE as
	 * a future TX_WRITE2 may extend the eof and the data for that
	 * write needs to be there. So we write the whole block and
	 * reduce the eof. This needs to be done within the single dmu
	 * transaction created within vn_rdwr -> zfs_write. So a possible
	 * new end of file is passed through in zsb->z_replay_eof
	 */

	zsb->z_replay_eof = 0; /* 0 means don't change end of file */

	/* If it's a dmu_sync() block, write the whole block */
	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
		if (length < blocksize) {
			offset -= offset % blocksize;
			length = blocksize;
		}
		if (zp->z_size < eod)
			zsb->z_replay_eof = eod;
	}

	error = zpl_write_common(ZTOI(zp), data, length, offset,
	    UIO_SYSSPACE, 0, kcred);
	if (error) {
		if (error < 0)
			error = -error;
		else
			error = EIO; /* Short write */
	}

	iput(ZTOI(zp));
	zsb->z_replay_eof = 0;	/* safety */

	return (error);
}
Exemple #12
0
static int
zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
{
	char *name = NULL;		/* location determined later */
	char *link;			/* symlink content follows name */
	znode_t *dzp;
	struct inode *ip = NULL;
	xvattr_t xva;
	int vflg = 0;
	size_t lrsize = sizeof (lr_create_t);
	lr_attr_t *lrattr;
	void *start;
	size_t xvatlen;
	uint64_t txtype;
	int error;

	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
	if (byteswap) {
		byteswap_uint64_array(lr, sizeof (*lr));
		if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
			zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
	}


	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
		return (error);

	xva_init(&xva);
	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);

	/*
	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
	 * eventually end up in zfs_mknode(), which assigns the object's
	 * creation time and generation number.  The generic zfs_create()
	 * doesn't have either concept, so we smuggle the values inside
	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
	 */
	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
	xva.xva_vattr.va_nblocks = lr->lr_gen;

	error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
	if (error != ENOENT)
		goto out;

	if (lr->lr_common.lrc_txtype & TX_CI)
		vflg |= FIGNORECASE;

	/*
	 * Symlinks don't have fuid info, and CIFS never creates
	 * symlinks.
	 *
	 * The _ATTR versions will grab the fuid info in their subcases.
	 */
	if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
	    (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
	    (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
		start = (lr + 1);
		zsb->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
	}

	switch (txtype) {
	case TX_CREATE_ATTR:
		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
		start = (caddr_t)(lr + 1) + xvatlen;
		zsb->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
		name = (char *)start;

		/*FALLTHROUGH*/
	case TX_CREATE:
		if (name == NULL)
			name = (char *)start;

		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
		    0, 0, &ip, kcred, vflg, NULL);
		break;
	case TX_MKDIR_ATTR:
		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
		start = (caddr_t)(lr + 1) + xvatlen;
		zsb->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
		name = (char *)start;

		/*FALLTHROUGH*/
	case TX_MKDIR:
		if (name == NULL)
			name = (char *)(lr + 1);

		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
		    &ip, kcred, vflg, NULL);
		break;
	case TX_MKXATTR:
		error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &ip, kcred);
		break;
	case TX_SYMLINK:
		name = (char *)(lr + 1);
		link = name + strlen(name) + 1;
		error = zfs_symlink(ZTOI(dzp), name, &xva.xva_vattr,
		    link, &ip, kcred, vflg);
		break;
	default:
		error = ENOTSUP;
	}

out:
	if (error == 0 && ip != NULL)
		iput(ip);

	iput(ZTOI(dzp));

	if (zsb->z_fuid_replay)
		zfs_fuid_info_free(zsb->z_fuid_replay);
	zsb->z_fuid_replay = NULL;
	return (error);
}
Exemple #13
0
/*
 * Replay file create with optional ACL, xvattr information as well
 * as option FUID information.
 */
static int
zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
{
	char *name = NULL;		/* location determined later */
	lr_create_t *lr = (lr_create_t *)lracl;
	znode_t *dzp;
	struct inode *ip = NULL;
	xvattr_t xva;
	int vflg = 0;
	vsecattr_t vsec = { 0 };
	lr_attr_t *lrattr;
	void *aclstart;
	void *fuidstart;
	size_t xvatlen = 0;
	uint64_t txtype;
	int error;

	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
	if (byteswap) {
		byteswap_uint64_array(lracl, sizeof (*lracl));
		if (txtype == TX_CREATE_ACL_ATTR ||
		    txtype == TX_MKDIR_ACL_ATTR) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			zfs_replay_swap_attrs(lrattr);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		}

		aclstart = (caddr_t)(lracl + 1) + xvatlen;
		zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
		/* swap fuids */
		if (lracl->lr_fuidcnt) {
			byteswap_uint64_array((caddr_t)aclstart +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
			    lracl->lr_fuidcnt * sizeof (uint64_t));
		}
	}

	if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
		return (error);

	xva_init(&xva);
	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);

	/*
	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
	 * eventually end up in zfs_mknode(), which assigns the object's
	 * creation time and generation number.  The generic zfs_create()
	 * doesn't have either concept, so we smuggle the values inside
	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
	 */
	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
	xva.xva_vattr.va_nblocks = lr->lr_gen;

	error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
	if (error != ENOENT)
		goto bail;

	if (lr->lr_common.lrc_txtype & TX_CI)
		vflg |= FIGNORECASE;
	switch (txtype) {
	case TX_CREATE_ACL:
		aclstart = (caddr_t)(lracl + 1);
		fuidstart = (caddr_t)aclstart +
		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
		zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
		    lr->lr_uid, lr->lr_gid);
		/*FALLTHROUGH*/
	case TX_CREATE_ACL_ATTR:
		if (name == NULL) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
			xva.xva_vattr.va_mask |= ATTR_XVATTR;
			zfs_replay_xvattr(lrattr, &xva);
		}
		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
		vsec.vsa_aclcnt = lracl->lr_aclcnt;
		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
		vsec.vsa_aclflags = lracl->lr_acl_flags;
		if (zsb->z_fuid_replay == NULL) {
			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
			zsb->z_fuid_replay =
			    zfs_replay_fuids(fuidstart,
			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
			    lr->lr_uid, lr->lr_gid);
		}

		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
		    0, 0, &ip, kcred, vflg, &vsec);
		break;
	case TX_MKDIR_ACL:
		aclstart = (caddr_t)(lracl + 1);
		fuidstart = (caddr_t)aclstart +
		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
		zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
		    lr->lr_uid, lr->lr_gid);
		/*FALLTHROUGH*/
	case TX_MKDIR_ACL_ATTR:
		if (name == NULL) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
			zfs_replay_xvattr(lrattr, &xva);
		}
		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
		vsec.vsa_aclcnt = lracl->lr_aclcnt;
		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
		vsec.vsa_aclflags = lracl->lr_acl_flags;
		if (zsb->z_fuid_replay == NULL) {
			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
			zsb->z_fuid_replay =
			    zfs_replay_fuids(fuidstart,
			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
			    lr->lr_uid, lr->lr_gid);
		}
		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
		    &ip, kcred, vflg, &vsec);
		break;
	default:
		error = ENOTSUP;
	}

bail:
	if (error == 0 && ip != NULL)
		iput(ip);

	iput(ZTOI(dzp));

	if (zsb->z_fuid_replay)
		zfs_fuid_info_free(zsb->z_fuid_replay);
	zsb->z_fuid_replay = NULL;

	return (error);
}
Exemple #14
0
/*
 * Unlink zp from dl, and mark zp for deletion if this was the last link.
 * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
 * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
 * If it's non-NULL, we use it to indicate whether the znode needs deletion,
 * and it's the caller's job to do it.
 */
int
zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
                 boolean_t *unlinkedp)
{
    znode_t *dzp = dl->dl_dzp;
    zfs_sb_t *zsb = ZTOZSB(dzp);
    int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
    boolean_t unlinked = B_FALSE;
    sa_bulk_attr_t bulk[5];
    uint64_t mtime[2], ctime[2];
    int count = 0;
    int error;

#ifdef HAVE_DNLC
    dnlc_remove(ZTOI(dzp), dl->dl_name);
#endif /* HAVE_DNLC */

    if (!(flag & ZRENAMING)) {
        mutex_enter(&zp->z_lock);

        if (zp_is_dir && !zfs_dirempty(zp)) {
            mutex_exit(&zp->z_lock);
            return (EEXIST);
        }

        /*
         * If we get here, we are going to try to remove the object.
         * First try removing the name from the directory; if that
         * fails, return the error.
         */
        error = zfs_dropname(dl, zp, dzp, tx, flag);
        if (error != 0) {
            mutex_exit(&zp->z_lock);
            return (error);
        }

        if (zp->z_links <= zp_is_dir) {
            zfs_panic_recover("zfs: link count on %lu is %u, "
                              "should be at least %u", zp->z_id,
                              (int)zp->z_links, zp_is_dir + 1);
            zp->z_links = zp_is_dir + 1;
        }
        if (--zp->z_links == zp_is_dir) {
            zp->z_unlinked = B_TRUE;
            zp->z_links = 0;
            unlinked = B_TRUE;
        } else {
            SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
                             NULL, &ctime, sizeof (ctime));
            SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
                             NULL, &zp->z_pflags, sizeof (zp->z_pflags));
            zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
                                    B_TRUE);
        }
        SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
                         NULL, &zp->z_links, sizeof (zp->z_links));
        error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
        count = 0;
        ASSERT(error == 0);
        mutex_exit(&zp->z_lock);
    } else {
        error = zfs_dropname(dl, zp, dzp, tx, flag);
        if (error != 0)
            return (error);
    }

    mutex_enter(&dzp->z_lock);
    dzp->z_size--;		/* one dirent removed */
    dzp->z_links -= zp_is_dir;	/* ".." link from zp */
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
                     NULL, &dzp->z_links, sizeof (dzp->z_links));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
                     NULL, &dzp->z_size, sizeof (dzp->z_size));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
                     NULL, ctime, sizeof (ctime));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
                     NULL, mtime, sizeof (mtime));
    SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
                     NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
    zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
    error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
    ASSERT(error == 0);
    mutex_exit(&dzp->z_lock);

    if (unlinkedp != NULL)
        *unlinkedp = unlinked;
    else if (unlinked)
        zfs_unlinked_add(zp, tx);

    return (0);
}
Exemple #15
0
/*
 * Reopen zfs_sb_t and release VFS ops.
 */
int
zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
{
	int err, err2;
	znode_t *zp;
	uint64_t sa_obj = 0;

	ASSERT(RRM_WRITE_HELD(&zsb->z_teardown_lock));
	ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock));

	/*
	 * We already own this, so just hold and rele it to update the
	 * objset_t, as the one we had before may have been evicted.
	 */
	VERIFY0(dmu_objset_hold(osname, zsb, &zsb->z_os));
	VERIFY3P(zsb->z_os->os_dsl_dataset->ds_owner, ==, zsb);
	VERIFY(dsl_dataset_long_held(zsb->z_os->os_dsl_dataset));
	dmu_objset_rele(zsb->z_os, zsb);

	/*
	 * Make sure version hasn't changed
	 */

	err = zfs_get_zplprop(zsb->z_os, ZFS_PROP_VERSION,
	    &zsb->z_version);

	if (err)
		goto bail;

	err = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
	    ZFS_SA_ATTRS, 8, 1, &sa_obj);

	if (err && zsb->z_version >= ZPL_VERSION_SA)
		goto bail;

	if ((err = sa_setup(zsb->z_os, sa_obj,
	    zfs_attr_table,  ZPL_END, &zsb->z_attr_table)) != 0)
		goto bail;

	if (zsb->z_version >= ZPL_VERSION_SA)
		sa_register_update_callback(zsb->z_os,
		    zfs_sa_upgrade);

	VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);

	zfs_set_fuid_feature(zsb);
	zsb->z_rollback_time = jiffies;

	/*
	 * Attempt to re-establish all the active inodes with their
	 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
	 * and mark it stale.  This prevents a collision if a new
	 * inode/object is created which must use the same inode
	 * number.  The stale inode will be be released when the
	 * VFS prunes the dentry holding the remaining references
	 * on the stale inode.
	 */
	mutex_enter(&zsb->z_znodes_lock);
	for (zp = list_head(&zsb->z_all_znodes); zp;
	    zp = list_next(&zsb->z_all_znodes, zp)) {
		err2 = zfs_rezget(zp);
		if (err2) {
			remove_inode_hash(ZTOI(zp));
			zp->z_is_stale = B_TRUE;
		}
	}
	mutex_exit(&zsb->z_znodes_lock);

bail:
	/* release the VFS ops */
	rw_exit(&zsb->z_teardown_inactive_lock);
	rrm_exit(&zsb->z_teardown_lock, FTAG);

	if (err) {
		/*
		 * Since we couldn't setup the sa framework, try to force
		 * unmount this file system.
		 */
		if (zsb->z_os)
			(void) zfs_umount(zsb->z_sb);
	}
	return (err);
}
Exemple #16
0
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
{
    zfs_sb_t *zsb = ZTOZSB(zp);
    znode_t *xzp;
    dmu_tx_t *tx;
    int error;
    zfs_acl_ids_t acl_ids;
    boolean_t fuid_dirtied;
#ifdef DEBUG
    uint64_t parent;
#endif

    *xipp = NULL;

    if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
        return (error);

    if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
                                    &acl_ids)) != 0)
        return (error);
    if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
        zfs_acl_ids_free(&acl_ids);
        return (EDQUOT);
    }

top:
    tx = dmu_tx_create(zsb->z_os);
    dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
                          ZFS_SA_BASE_ATTR_SIZE);
    dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
    dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
    fuid_dirtied = zsb->z_fuid_dirty;
    if (fuid_dirtied)
        zfs_fuid_txhold(zsb, tx);
    error = dmu_tx_assign(tx, TXG_NOWAIT);
    if (error) {
        if (error == ERESTART) {
            dmu_tx_wait(tx);
            dmu_tx_abort(tx);
            goto top;
        }
        zfs_acl_ids_free(&acl_ids);
        dmu_tx_abort(tx);
        return (error);
    }
    zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);

    if (fuid_dirtied)
        zfs_fuid_sync(zsb, tx);

#ifdef DEBUG
    error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb),
                      &parent, sizeof (parent));
    ASSERT(error == 0 && parent == zp->z_id);
#endif

    VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
                          sizeof (xzp->z_id), tx));

    (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
                          xzp, "", NULL, acl_ids.z_fuidp, vap);

    zfs_acl_ids_free(&acl_ids);
    dmu_tx_commit(tx);

    *xipp = ZTOI(xzp);

    return (0);
}
Exemple #17
0
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	uint64_t	xattr_obj;
	uint64_t	links;
	int		error;

	ASSERT(ZTOI(zp)->i_nlink == 0);
	ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
		if (zfs_purgedir(zp) != 0) {
			/*
			 * Not enough space to delete some xattrs.
			 * Leave it in the unlinked set.
			 */
			zfs_znode_dmu_fini(zp);

			return;
		}
	}

	/*
	 * Free up all the data in the file.  We don't do this for directories
	 * because we need truncate and remove to be in the same tx, like in
	 * zfs_znode_delete(). Otherwise, if we crash here we'll end up with
	 * an inconsistent truncated zap object in the delete queue.  Note a
	 * truncated file is harmless since it only contains user data.
	 */
	if (S_ISREG(ZTOI(zp)->i_mode)) {
		error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
		if (error) {
			/*
			 * Not enough space or we were interrupted by unmount.
			 * Leave the file in the unlinked set.
			 */
			zfs_znode_dmu_fini(zp);
			return;
		}
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
	    &xattr_obj, sizeof (xattr_obj));
	if (error == 0 && xattr_obj) {
		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zfs_external_acl(zp);

	/*
	 * Set up the final transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);

	zfs_sa_upgrade_txholds(tx, zp);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
		zfs_znode_dmu_fini(zp);
		goto out;
	}

	if (xzp) {
		ASSERT(error == 0);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		clear_nlink(ZTOI(xzp));		/* no more links to it */
		links = 0;
		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
		    &links, sizeof (links), tx));
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	VERIFY3U(0, ==,
	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);
out:
	if (xzp)
		zfs_iput_async(ZTOI(xzp));
}
Exemple #18
0
void
zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
{
	struct super_block *sb;
	zfs_sb_t	*zsb;
	uint64_t	moid, obj, sa_obj, version;
	uint64_t	sense = ZFS_CASE_SENSITIVE;
	uint64_t	norm = 0;
	nvpair_t	*elem;
	int		error;
	int		i;
	znode_t		*rootzp = NULL;
	vattr_t		vattr;
	znode_t		*zp;
	zfs_acl_ids_t	acl_ids;

	/*
	 * First attempt to create master node.
	 */
	/*
	 * In an empty objset, there are no blocks to read and thus
	 * there can be no i/o errors (which we assert below).
	 */
	moid = MASTER_NODE_OBJ;
	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
	    DMU_OT_NONE, 0, tx);
	ASSERT(error == 0);

	/*
	 * Set starting attributes.
	 */
	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
	elem = NULL;
	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
		/* For the moment we expect all zpl props to be uint64_ts */
		uint64_t val;
		char *name;

		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
		VERIFY(nvpair_value_uint64(elem, &val) == 0);
		name = nvpair_name(elem);
		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
			if (val < version)
				version = val;
		} else {
			error = zap_update(os, moid, name, 8, 1, &val, tx);
		}
		ASSERT(error == 0);
		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
			norm = val;
		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
			sense = val;
	}
	ASSERT(version != 0);
	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);

	/*
	 * Create zap object used for SA attribute registration
	 */

	if (version >= ZPL_VERSION_SA) {
		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
		    DMU_OT_NONE, 0, tx);
		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
		ASSERT(error == 0);
	} else {
		sa_obj = 0;
	}
	/*
	 * Create a delete queue.
	 */
	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);

	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
	ASSERT(error == 0);

	/*
	 * Create root znode.  Create minimal znode/inode/zsb/sb
	 * to allow zfs_mknode to work.
	 */
	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
	vattr.va_mode = S_IFDIR|0755;
	vattr.va_uid = crgetuid(cr);
	vattr.va_gid = crgetgid(cr);

	rootzp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
	rootzp->z_moved = 0;
	rootzp->z_unlinked = 0;
	rootzp->z_atime_dirty = 0;
	rootzp->z_is_sa = USE_SA(version, os);

	zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE | KM_NODEBUG);
	zsb->z_os = os;
	zsb->z_parent = zsb;
	zsb->z_version = version;
	zsb->z_use_fuids = USE_FUIDS(version, os);
	zsb->z_use_sa = USE_SA(version, os);
	zsb->z_norm = norm;

	sb = kmem_zalloc(sizeof (struct super_block), KM_PUSHPAGE);
	sb->s_fs_info = zsb;

	ZTOI(rootzp)->i_sb = sb;

	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
	    &zsb->z_attr_table);

	ASSERT(error == 0);

	/*
	 * Fold case on file systems that are always or sometimes case
	 * insensitive.
	 */
	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
		zsb->z_norm |= U8_TEXTPREP_TOUPPER;

	mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
	list_create(&zsb->z_all_znodes, sizeof (znode_t),
	    offsetof(znode_t, z_link_node));

	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);

	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
	    cr, NULL, &acl_ids));
	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
	ASSERT3P(zp, ==, rootzp);
	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
	ASSERT(error == 0);
	zfs_acl_ids_free(&acl_ids);

	atomic_set(&ZTOI(rootzp)->i_count, 0);
	sa_handle_destroy(rootzp->z_sa_hdl);
	kmem_cache_free(znode_cache, rootzp);

	/*
	 * Create shares directory
	 */
	error = zfs_create_share_dir(zsb, tx);
	ASSERT(error == 0);

	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
		mutex_destroy(&zsb->z_hold_mtx[i]);

	kmem_free(sb, sizeof (struct super_block));
	kmem_free(zsb, sizeof (zfs_sb_t));
}
Exemple #19
0
/*
 * Link zp into dl.  Can only fail if zp has been unlinked.
 */
int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
	znode_t *dzp = dl->dl_dzp;
	zfsvfs_t *zfsvfs = ZTOZSB(zp);
	uint64_t value;
	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
	sa_bulk_attr_t bulk[5];
	uint64_t mtime[2], ctime[2];
	uint64_t links;
	int count = 0;
	int error;

	mutex_enter(&zp->z_lock);

	if (!(flag & ZRENAMING)) {
		if (zp->z_unlinked) {	/* no new links to unlinked zp */
			ASSERT(!(flag & (ZNEW | ZEXISTS)));
			mutex_exit(&zp->z_lock);
			return (SET_ERROR(ENOENT));
		}
		if (!(flag & ZNEW)) {
			/*
			 * ZNEW nodes come from zfs_mknode() where the link
			 * count has already been initialised
			 */
			inc_nlink(ZTOI(zp));
			links = ZTOI(zp)->i_nlink;
			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
			    NULL, &links, sizeof (links));
		}
	}
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
	    &dzp->z_id, sizeof (dzp->z_id));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
	    &zp->z_pflags, sizeof (zp->z_pflags));

	if (!(flag & ZNEW)) {
		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
		    ctime, sizeof (ctime));
		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
		    ctime);
	}
	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
	ASSERT(error == 0);

	mutex_exit(&zp->z_lock);

	mutex_enter(&dzp->z_lock);
	dzp->z_size++;
	if (zp_is_dir)
		inc_nlink(ZTOI(dzp));
	links = ZTOI(dzp)->i_nlink;
	count = 0;
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
	    &dzp->z_size, sizeof (dzp->z_size));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
	    &links, sizeof (links));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
	    mtime, sizeof (mtime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
	    ctime, sizeof (ctime));
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
	    &dzp->z_pflags, sizeof (dzp->z_pflags));
	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
	ASSERT(error == 0);
	mutex_exit(&dzp->z_lock);

	value = zfs_dirent(zp, zp->z_mode);
	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
	    8, 1, &value, tx);
	ASSERT(error == 0);

	return (0);
}
Exemple #20
0
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
	dmu_object_info_t doi;
	dmu_buf_t	*db;
	znode_t		*zp;
	int err;
	sa_handle_t	*hdl;
	struct inode	*ip;

	*zpp = NULL;

again:
	ip = ilookup(zsb->z_sb, obj_num);

	ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

	err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
	if (err) {
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (err);
	}

	dmu_object_info_from_db(db, &doi);
	if (doi.doi_bonus_type != DMU_OT_SA &&
	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
		sa_buf_rele(db, NULL);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (SET_ERROR(EINVAL));
	}

	hdl = dmu_buf_get_user(db);
	if (hdl != NULL) {
		if (ip == NULL) {
			/*
			 * ilookup returned NULL, which means
			 * the znode is dying - but the SA handle isn't
			 * quite dead yet, we need to drop any locks
			 * we're holding, re-schedule the task and try again.
			 */
			sa_buf_rele(db, NULL);
			ZFS_OBJ_HOLD_EXIT(zsb, obj_num);

			schedule();
			goto again;
		}

		zp = sa_get_userdata(hdl);

		/*
		 * Since "SA" does immediate eviction we
		 * should never find a sa handle that doesn't
		 * know about the znode.
		 */

		ASSERT3P(zp, !=, NULL);

		mutex_enter(&zp->z_lock);
		ASSERT3U(zp->z_id, ==, obj_num);
		if (zp->z_unlinked) {
			err = SET_ERROR(ENOENT);
		} else {
			igrab(ZTOI(zp));
			*zpp = zp;
			err = 0;
		}
		sa_buf_rele(db, NULL);
		mutex_exit(&zp->z_lock);
		ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
		iput(ip);
		return (err);
	}

	ASSERT3P(ip, ==, NULL);

	/*
	 * Not found create new znode/vnode but only if file exists.
	 *
	 * There is a small window where zfs_vget() could
	 * find this object while a file create is still in
	 * progress.  This is checked for in zfs_znode_alloc()
	 *
	 * if zfs_znode_alloc() fails it will drop the hold on the
	 * bonus buffer.
	 */
	zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
	    doi.doi_bonus_type, obj_num, NULL, NULL);
	if (zp == NULL) {
		err = SET_ERROR(ENOENT);
	} else {
		*zpp = zp;
	}
	ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
	return (err);
}
Exemple #21
0
/*
 * Teardown the zfs_sb_t.
 *
 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
 * and 'z_teardown_inactive_lock' held.
 */
int
zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
{
	znode_t	*zp;

	rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);

	if (!unmounting) {
		/*
		 * We purge the parent filesystem's super block as the
		 * parent filesystem and all of its snapshots have their
		 * inode's super block set to the parent's filesystem's
		 * super block.  Note,  'z_parent' is self referential
		 * for non-snapshots.
		 */
		shrink_dcache_sb(zsb->z_parent->z_sb);
	}

	/*
	 * If someone has not already unmounted this file system,
	 * drain the iput_taskq to ensure all active references to the
	 * zfs_sb_t have been handled only then can it be safely destroyed.
	 */
	if (zsb->z_os)
		taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));

	/*
	 * Close the zil. NB: Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zsb->z_log) {
		zil_close(zsb->z_log);
		zsb->z_log = NULL;
	}

	rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER);

	/*
	 * If we are not unmounting (ie: online recv) and someone already
	 * unmounted this file system while we were doing the switcheroo,
	 * or a reopen of z_os failed then just bail out now.
	 */
	if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) {
		rw_exit(&zsb->z_teardown_inactive_lock);
		rrw_exit(&zsb->z_teardown_lock, FTAG);
		return (EIO);
	}

	/*
	 * At this point there are no VFS ops active, and any new VFS ops
	 * will fail with EIO since we have z_teardown_lock for writer (only
	 * relevant for forced unmount).
	 *
	 * Release all holds on dbufs.
	 */
	mutex_enter(&zsb->z_znodes_lock);
	for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
	    zp = list_next(&zsb->z_all_znodes, zp)) {
		if (zp->z_sa_hdl) {
			ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
			zfs_znode_dmu_fini(zp);
		}
	}
	mutex_exit(&zsb->z_znodes_lock);

	/*
	 * If we are unmounting, set the unmounted flag and let new VFS ops
	 * unblock.  zfs_inactive will have the unmounted behavior, and all
	 * other VFS ops will fail with EIO.
	 */
	if (unmounting) {
		zsb->z_unmounted = B_TRUE;
		rrw_exit(&zsb->z_teardown_lock, FTAG);
		rw_exit(&zsb->z_teardown_inactive_lock);
	}

	/*
	 * z_os will be NULL if there was an error in attempting to reopen
	 * zsb, so just return as the properties had already been
	 *
	 * unregistered and cached data had been evicted before.
	 */
	if (zsb->z_os == NULL)
		return (0);

	/*
	 * Unregister properties.
	 */
	zfs_unregister_callbacks(zsb);

	/*
	 * Evict cached data
	 */
	if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
	    !zfs_is_readonly(zsb))
		txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
	(void) dmu_objset_evict_dbufs(zsb->z_os);

	return (0);
}