static uint64_t zpios_dmu_object_create(run_args_t *run_args, objset_t *os) { struct dmu_tx *tx; uint64_t obj = 0ULL; int rc; tx = dmu_tx_create(os); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE); rc = dmu_tx_assign(tx, TXG_WAIT); if (rc) { zpios_print(run_args->file, "dmu_tx_assign() failed: %d\n", rc); dmu_tx_abort(tx); return obj; } obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx); rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx); if (rc) { zpios_print(run_args->file, "dmu_object_set_blocksize() failed: %d\n", rc); dmu_tx_abort(tx); return obj; } dmu_tx_commit(tx); return obj; }
static int zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj) { struct dmu_tx *tx; int rc; tx = dmu_tx_create(os); dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); rc = dmu_tx_assign(tx, TXG_WAIT); if (rc) { zpios_print(run_args->file, "dmu_tx_assign() failed: %d\n", rc); dmu_tx_abort(tx); return (rc); } rc = dmu_object_free(os, obj, tx); if (rc) { zpios_print(run_args->file, "dmu_object_free() failed: %d\n", rc); dmu_tx_abort(tx); return (rc); } dmu_tx_commit(tx); return (0); }
static int zfs_vfs_sync(struct mount *mp, __unused int waitfor, __unused vfs_context_t context) { zfsvfs_t *zfsvfs = vfs_fsprivate(mp); ZFS_ENTER(zfsvfs); /* * Mac OS X needs a file system modify time * * We use the mtime of the "com.apple.system.mtime" * extended attribute, which is associated with the * file system root directory. * * Here we sync any mtime changes to this attribute. */ if (zfsvfs->z_mtime_vp != NULL) { timestruc_t mtime; znode_t *zp; top: zp = VTOZ(zfsvfs->z_mtime_vp); ZFS_TIME_DECODE(&mtime, zp->z_phys->zp_mtime); if (zfsvfs->z_last_mtime_synced < mtime.tv_sec) { dmu_tx_t *tx; int error; tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); } else { dmu_buf_will_dirty(zp->z_dbuf, tx); dmu_tx_commit(tx); zfsvfs->z_last_mtime_synced = mtime.tv_sec; } } } if (zfsvfs->z_log != NULL) zil_commit(zfsvfs->z_log, UINT64_MAX, 0); else txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); ZFS_EXIT(zfsvfs); return (0); }
/* * TX_WRITE2 are only generated when dmu_sync() returns EALREADY * meaning the pool block is already being synced. So now that we always write * out full blocks, all we have to do is expand the eof if * the file is grown. */ static int zfs_replay_write2(zfsvfs_t *zfsvfs, void *data, boolean_t byteswap) { #ifndef TODO_OSV kprintf("TX_WRITE2\n"); return EOPNOTSUPP; #else lr_write_t *lr = data; znode_t *zp; int error; uint64_t end; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); top: end = lr->lr_offset + lr->lr_length; if (end > zp->z_size) { dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); zp->z_size = end; dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { VN_RELE(ZTOV(zp)); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); return (error); } (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), (void *)&zp->z_size, sizeof (uint64_t), tx); /* Ensure the replayed seq is updated */ (void) zil_replaying(zfsvfs->z_log, tx); dmu_tx_commit(tx); } VN_RELE(ZTOV(zp)); return (error); #endif }
int zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type, const char *domain, uint64_t rid, uint64_t quota) { char buf[32]; int err; dmu_tx_t *tx; uint64_t *objp; boolean_t fuid_dirtied; if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) return (SET_ERROR(EINVAL)); if (zsb->z_version < ZPL_VERSION_USERSPACE) return (SET_ERROR(ENOTSUP)); objp = (type == ZFS_PROP_USERQUOTA) ? &zsb->z_userquota_obj : &zsb->z_groupquota_obj; err = id_to_fuidstr(zsb, domain, rid, buf, B_TRUE); if (err) return (err); fuid_dirtied = zsb->z_fuid_dirty; tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); if (*objp == 0) { dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, zfs_userquota_prop_prefixes[type]); } if (fuid_dirtied) zfs_fuid_txhold(zsb, tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } mutex_enter(&zsb->z_lock); if (*objp == 0) { *objp = zap_create(zsb->z_os, DMU_OT_USERGROUP_QUOTA, DMU_OT_NONE, 0, tx); VERIFY(0 == zap_add(zsb->z_os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); } mutex_exit(&zsb->z_lock); if (quota == 0) { err = zap_remove(zsb->z_os, *objp, buf, tx); if (err == ENOENT) err = 0; } else { err = zap_update(zsb->z_os, *objp, buf, 8, 1, "a, tx); } ASSERT(err == 0); if (fuid_dirtied) zfs_fuid_sync(zsb, tx); dmu_tx_commit(tx); return (err); }
int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { int err = 0; dmu_tx_t *tx; nvlist_t *nvarg; if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) return (SET_ERROR(EINVAL)); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP)); if (spa_history_zone() != NULL) { fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, spa_history_zone()); } fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, 0, ZFS_SPACE_CHECK_NONE, tx); dmu_tx_commit(tx); /* spa_history_log_sync will free nvl */ return (err); }
void spa_history_log_internal(spa_t *spa, const char *operation, dmu_tx_t *tx, const char *fmt, ...) { dmu_tx_t *htx = tx; va_list adx; nvlist_t *nvl; /* create a tx if we didn't get one */ if (tx == NULL) { htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); if (dmu_tx_assign(htx, TXG_WAIT) != 0) { dmu_tx_abort(htx); return; } } va_start(adx, fmt); VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); log_internal(nvl, operation, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ if (tx == NULL) dmu_tx_commit(htx); }
/* * Ensure the zap is flushed then inform the VFS of the capacity change. */ static int zvol_update_volsize(uint64_t volsize, objset_t *os) { dmu_tx_t *tx; int error; uint64_t txg; ASSERT(MUTEX_HELD(&zvol_state_lock)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (SET_ERROR(error)); } txg = dmu_tx_get_txg(tx); error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); dmu_tx_commit(tx); txg_wait_synced(dmu_objset_pool(os), txg); if (error == 0) error = dmu_free_long_range(os, ZVOL_OBJ, volsize, DMU_OBJECT_END); return (error); }
/* * Replay a TX_WRITE ZIL transaction that didn't get committed * after a system failure */ static int zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) { objset_t *os = zv->zv_objset; char *data = (char *)(lr + 1); /* data follows lr_write_t */ uint64_t off = lr->lr_offset; uint64_t len = lr->lr_length; dmu_tx_t *tx; int error; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { dmu_write(os, ZVOL_OBJ, off, len, data, tx); dmu_tx_commit(tx); } return (SET_ERROR(error)); }
/* * Write out a history event. */ int spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) { history_arg_t *ha; int err = 0; dmu_tx_t *tx; ASSERT(what != LOG_INTERNAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); ha->ha_history_str = strdup(history_str); ha->ha_zone = strdup(spa_history_zone()); ha->ha_log_type = what; ha->ha_uid = crgetuid(CRED()); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, spa_history_log_sync, spa, ha, 0, tx); dmu_tx_commit(tx); /* spa_history_log_sync will free ha and strings */ return (err); }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t dl; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } /* ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); */ tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); //VN_RELE(ZTOV(xzp)); // async VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); skipped += 1; continue; } bzero(&dl, sizeof (dl)); dl.dl_dzp = dzp; dl.dl_name = zap.za_name; error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); //VN_RELE(ZTOV(xzp)); // async VN_RELE_ASYNC(ZTOV(xzp), dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os))); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
/* * TX_WRITE2 are only generated when dmu_sync() returns EALREADY * meaning the pool block is already being synced. So now that we always write * out full blocks, all we have to do is expand the eof if * the file is grown. */ static int zfs_replay_write2(void *arg1, void *arg2, boolean_t byteswap) { zfsvfs_t *zfsvfs = arg1; lr_write_t *lr = arg2; znode_t *zp; int error; uint64_t end; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); top: end = lr->lr_offset + lr->lr_length; if (end > zp->z_size) { dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); zp->z_size = end; dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { iput(ZTOI(zp)); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); return (error); } (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), (void *)&zp->z_size, sizeof (uint64_t), tx); /* Ensure the replayed seq is updated */ (void) zil_replaying(zfsvfs->z_log, tx); dmu_tx_commit(tx); } iput(ZTOI(zp)); return (error); }
int zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; dmu_tx_t *tx; int error; zfs_fuid_info_t *fuidp = NULL; *xvpp = NULL; /* * In FreeBSD, access checking for creating an EA is being done * in zfs_setextattr(), */ #ifndef __FreeBSD__ if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) return (error); #endif tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) { if (zfsvfs->z_fuid_obj == 0) { dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, FUID_SIZE_ESTIMATE(zfsvfs)); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL); } else { dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, FUID_SIZE_ESTIMATE(zfsvfs)); } } error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) dmu_tx_wait(tx); dmu_tx_abort(tx); return (error); } zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp); ASSERT(xzp->z_phys->zp_parent == zp->z_id); dmu_buf_will_dirty(zp->z_dbuf, tx); zp->z_phys->zp_xattr = xzp->z_id; (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, fuidp, vap); if (fuidp) zfs_fuid_info_free(fuidp); dmu_tx_commit(tx); *xvpp = ZTOV(xzp); return (0); }
int zfs_sa_set_xattr(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; char *obj; size_t size; int error; ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); ASSERT(zp->z_xattr_cached); ASSERT(zp->z_is_sa); error = nvlist_size(zp->z_xattr_cached, &size, NV_ENCODE_XDR); if (error) goto out; obj = zio_buf_alloc(size); error = nvlist_pack(zp->z_xattr_cached, &obj, &size, NV_ENCODE_XDR, KM_SLEEP); if (error) goto out_free; tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa_create(tx, size); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { error = sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size, tx); if (error) dmu_tx_abort(tx); else dmu_tx_commit(tx); } out_free: zio_buf_free(obj, size); out: return (error); }
static int zvol_write(struct bio *bio) { zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; uint64_t offset = BIO_BI_SECTOR(bio) << 9; uint64_t size = BIO_BI_SIZE(bio); int error = 0; dmu_tx_t *tx; rl_t *rl; uio_t uio; if (bio->bi_rw & VDEV_REQ_FLUSH) zil_commit(zv->zv_zilog, ZVOL_OBJ); /* * Some requests are just for flush and nothing else. */ if (size == 0) goto out; uio.uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)]; uio.uio_skip = BIO_BI_SKIP(bio); uio.uio_resid = size; uio.uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio); uio.uio_loffset = offset; uio.uio_limit = MAXOFFSET_T; uio.uio_segflg = UIO_BVEC; rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size); /* This will only fail for ENOSPC */ error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); goto out; } error = dmu_write_uio(zv->zv_objset, ZVOL_OBJ, &uio, size, tx); if (error == 0) zvol_log_write(zv, tx, offset, size, !!(bio->bi_rw & VDEV_REQ_FUA)); dmu_tx_commit(tx); zfs_range_unlock(rl); if ((bio->bi_rw & VDEV_REQ_FUA) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zv->zv_zilog, ZVOL_OBJ); out: return (error); }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = ZTOZSB(dzp); zfs_dirlock_t dl; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } ASSERT(S_ISREG(ZTOI(xzp)->i_mode) || S_ISLNK(ZTOI(xzp)->i_mode)); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_iput_async(ZTOI(xzp)); skipped += 1; continue; } bzero(&dl, sizeof (dl)); dl.dl_dzp = dzp; dl.dl_name = zap.za_name; error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); zfs_iput_async(ZTOI(xzp)); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
int zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; dmu_tx_t *tx; int error; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; *xvpp = NULL; if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) return (error); if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, &acl_ids)) != 0) return (error); if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { zfs_acl_ids_free(&acl_ids); return (EDQUOT); } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_acl_ids_free(&acl_ids); if (error == ERESTART) dmu_tx_wait(tx); dmu_tx_abort(tx); return (error); } zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, &acl_ids); if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); ASSERT(xzp->z_phys->zp_parent == zp->z_id); dmu_buf_will_dirty(zp->z_dbuf, tx); zp->z_phys->zp_xattr = xzp->z_id; (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); *xvpp = ZTOV(xzp); return (0); }
int zvol_set_volsize(zfs_cmd_t *zc) { zvol_state_t *zv; dev_t dev = zc->zc_dev; dmu_tx_t *tx; int error; dmu_object_info_t doi; mutex_enter(&zvol_state_lock); if ((zv = zvol_minor_lookup(zc->zc_name)) == NULL) { mutex_exit(&zvol_state_lock); return (ENXIO); } if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 || (error = zvol_check_volsize(zc, doi.doi_data_block_size)) != 0) { mutex_exit(&zvol_state_lock); return (error); } if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) { mutex_exit(&zvol_state_lock); return (EROFS); } tx = dmu_tx_create(zv->zv_objset); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); dmu_tx_hold_free(tx, ZVOL_OBJ, zc->zc_volsize, DMU_OBJECT_END); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); mutex_exit(&zvol_state_lock); return (error); } error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1, &zc->zc_volsize, tx); if (error == 0) { error = dmu_free_range(zv->zv_objset, ZVOL_OBJ, zc->zc_volsize, DMU_OBJECT_END, tx); } dmu_tx_commit(tx); if (error == 0) { zv->zv_volsize = zc->zc_volsize; zvol_size_changed(zv, dev); } mutex_exit(&zvol_state_lock); return (error); }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_dirlock_t dl; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, dzp->z_id); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_bonus(tx, xzp->z_id); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); VN_RELE(ZTOV(xzp)); skipped += 1; continue; } bzero(&dl, sizeof (dl)); dl.dl_dzp = dzp; dl.dl_name = zap.za_name; error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); VN_RELE(ZTOV(xzp)); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
/* * Common write path running under the zvol taskq context. This function * is responsible for copying the request structure data in to the DMU and * signaling the request queue with the result of the copy. */ static void zvol_write(void *arg) { struct request *req = (struct request *)arg; struct request_queue *q = req->q; zvol_state_t *zv = q->queuedata; fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); int error = 0; dmu_tx_t *tx; rl_t *rl; if (req->cmd_flags & VDEV_REQ_FLUSH) zil_commit(zv->zv_zilog, ZVOL_OBJ); /* * Some requests are just for flush and nothing else. */ if (size == 0) { error = 0; goto out; } rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size); /* This will only fail for ENOSPC */ error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); goto out; } error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx); if (error == 0) zvol_log_write(zv, tx, offset, size, req->cmd_flags & VDEV_REQ_FUA); dmu_tx_commit(tx); zfs_range_unlock(rl); if ((req->cmd_flags & VDEV_REQ_FUA) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zv->zv_zilog, ZVOL_OBJ); out: blk_end_request(req, -error, size); spl_fstrans_unmark(cookie); }
int zfs_set_version(zfs_sb_t *zsb, uint64_t newvers) { int error; objset_t *os = zsb->z_os; dmu_tx_t *tx; if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) return (SET_ERROR(EINVAL)); if (newvers < zsb->z_version) return (SET_ERROR(EINVAL)); if (zfs_spa_version_map(newvers) > spa_version(dmu_objset_spa(zsb->z_os))) return (SET_ERROR(ENOTSUP)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) { dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, ZFS_SA_ATTRS); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); } error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (error); } error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &newvers, tx); if (error) { dmu_tx_commit(tx); return (error); } if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) { uint64_t sa_obj; ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=, SPA_VERSION_SA); sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT0(error); VERIFY(0 == sa_set_sa_object(os, sa_obj)); sa_register_update_callback(os, zfs_sa_upgrade); }
/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays * in the unlinked set. * * NOTE: this function assumes that the directory is inactive, * so there is no need to lock its entries before deletion. * Also, it assumes the directory contents is *only* regular * files. */ static int zfs_purgedir(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; int skipped = 0; int error; for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { error = zfs_zget(zfsvfs, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); vput(ZTOV(xzp)); skipped += 1; continue; } error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); vput(ZTOV(xzp)); } zap_cursor_fini(&zc); if (error != ENOENT) skipped += 1; return (skipped); }
static uint64_t zpios_dmu_object_create(run_args_t *run_args, objset_t *os) { struct dmu_tx *tx; uint64_t obj = 0ULL; uint64_t blksize = run_args->block_size; int rc; if (blksize < SPA_MINBLOCKSIZE || blksize > spa_maxblocksize(dmu_objset_spa(os)) || !ISP2(blksize)) { zpios_print(run_args->file, "invalid block size for pool: %d\n", (int)blksize); return (obj); } tx = dmu_tx_create(os); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE); rc = dmu_tx_assign(tx, TXG_WAIT); if (rc) { zpios_print(run_args->file, "dmu_tx_assign() failed: %d\n", rc); dmu_tx_abort(tx); return (obj); } obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx); rc = dmu_object_set_blocksize(os, obj, blksize, 0, tx); if (rc) { zpios_print(run_args->file, "dmu_object_set_blocksize to %d failed: %d\n", (int)blksize, rc); dmu_tx_abort(tx); return (obj); } dmu_tx_commit(tx); return (obj); }
static int zvol_discard(struct bio *bio) { zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; uint64_t start = BIO_BI_SECTOR(bio) << 9; uint64_t size = BIO_BI_SIZE(bio); uint64_t end = start + size; int error; rl_t *rl; dmu_tx_t *tx; ASSERT(zv && zv->zv_open_count > 0); if (end > zv->zv_volsize) return (SET_ERROR(EIO)); /* * Align the request to volume block boundaries when REQ_SECURE is * available, but not requested. If we don't, then this will force * dnode_free_range() to zero out the unaligned parts, which is slow * (read-modify-write) and useless since we are not freeing any space * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through * 2.6.35) will not receive this optimization. */ #ifdef REQ_SECURE if (!(bio->bi_rw & REQ_SECURE)) { start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); size = end - start; } #endif if (start >= end) return (0); rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) { dmu_tx_abort(tx); } else { zvol_log_truncate(zv, tx, start, size, B_TRUE); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); } zfs_range_unlock(rl); return (error); }
static int zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf) { struct dmu_tx *tx; int rc, how = TXG_WAIT; // int flags = 0; if (run_args->flags & DMU_WRITE_NOWAIT) how = TXG_NOWAIT; while (1) { tx = dmu_tx_create(os); dmu_tx_hold_write(tx, object, offset, size); rc = dmu_tx_assign(tx, how); if (rc) { if (rc == ERESTART && how == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); continue; } zpios_print(run_args->file, "Error in dmu_tx_assign(), %d", rc); dmu_tx_abort(tx); return (rc); } break; } // if (run_args->flags & DMU_WRITE_ZC) // flags |= DMU_WRITE_ZEROCOPY; dmu_write(os, object, offset, size, buf, tx); dmu_tx_commit(tx); return (0); }
/* * Ensure the zap is flushed then inform the VFS of the capacity change. */ static int zvol_update_volsize(zvol_state_t *zv, uint64_t volsize, objset_t *os) { struct block_device *bdev; dmu_tx_t *tx; int error; ASSERT(MUTEX_HELD(&zvol_state_lock)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (error); } error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); dmu_tx_commit(tx); if (error) return (error); error = dmu_free_long_range(os, ZVOL_OBJ, volsize, DMU_OBJECT_END); if (error) return (error); bdev = bdget_disk(zv->zv_disk, 0); if (!bdev) return (EIO); /* * 2.6.28 API change * Added check_disk_size_change() helper function. */ #ifdef HAVE_CHECK_DISK_SIZE_CHANGE set_capacity(zv->zv_disk, volsize >> 9); zv->zv_volsize = volsize; check_disk_size_change(zv->zv_disk, bdev); #else zv->zv_volsize = volsize; zv->zv_changed = 1; (void) check_disk_change(bdev); #endif /* HAVE_CHECK_DISK_SIZE_CHANGE */ bdput(bdev); return (0); }
int zfs_set_version(const char *name, uint64_t newvers) { int error; objset_t *os; dmu_tx_t *tx; uint64_t curvers; /* * XXX for now, require that the filesystem be unmounted. Would * be nice to find the zfsvfs_t and just update that if * possible. */ if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) return (EINVAL); error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); if (error) return (error); error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &curvers); if (error) goto out; if (newvers < curvers) { error = EINVAL; goto out; } tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); goto out; } error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &newvers, tx); spa_history_internal_log(LOG_DS_UPGRADE, dmu_objset_spa(os), tx, CRED(), "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, dmu_objset_id(os)); dmu_tx_commit(tx); out: dmu_objset_close(os); return (error); }
static int zvol_discard(struct bio *bio) { zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; uint64_t start = BIO_BI_SECTOR(bio) << 9; uint64_t size = BIO_BI_SIZE(bio); uint64_t end = start + size; int error; rl_t *rl; dmu_tx_t *tx; ASSERT(zv && zv->zv_open_count > 0); if (end > zv->zv_volsize) return (SET_ERROR(EIO)); /* * Align the request to volume block boundaries when a secure erase is * not required. This will prevent dnode_free_range() from zeroing out * the unaligned parts which is slow (read-modify-write) and useless * since we are not freeing any space by doing so. */ if (!bio_is_secure_erase(bio)) { start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); size = end - start; } if (start >= end) return (0); rl = zfs_range_lock(&zv->zv_range_lock, start, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) { dmu_tx_abort(tx); } else { zvol_log_truncate(zv, tx, start, size, B_TRUE); dmu_tx_commit(tx); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); } zfs_range_unlock(rl); return (error); }
int dmu_objset_userspace_upgrade(objset_t *os) { uint64_t obj; int err = 0; if (dmu_objset_userspace_present(os)) return (0); if (!dmu_objset_userused_enabled(os->os)) return (ENOTSUP); if (dmu_objset_is_snapshot(os)) return (EINVAL); /* * We simply need to mark every object dirty, so that it will be * synced out and now accounted. If this is called * concurrently, or if we already did some work before crashing, * that's fine, since we track each object's accounted state * independently. */ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { dmu_tx_t *tx; dmu_buf_t *db; int objerr; if (issig(JUSTLOOKING) && issig(FORREAL)) return (EINTR); objerr = dmu_bonus_hold(os, obj, FTAG, &db); if (objerr) continue; tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, obj); objerr = dmu_tx_assign(tx, TXG_WAIT); if (objerr) { dmu_tx_abort(tx); continue; } dmu_buf_will_dirty(db, tx); dmu_buf_rele(db, FTAG); dmu_tx_commit(tx); } os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; txg_wait_synced(dmu_objset_pool(os), 0); return (0); }
int zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; dmu_tx_t *tx; int error; zfs_fuid_info_t *fuidp = NULL; *xvpp = NULL; if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) return (error); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); if (zfsvfs->z_fuid_obj == 0) { dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL); } else { dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, SPA_MAXBLOCKSIZE); } error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) dmu_tx_wait(tx); dmu_tx_abort(tx); return (error); } zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp); ASSERT(xzp->z_phys->zp_parent == zp->z_id); dmu_buf_will_dirty(zp->z_dbuf, tx); zp->z_phys->zp_xattr = xzp->z_id; (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, fuidp, vap); if (fuidp) zfs_fuid_info_free(fuidp); dmu_tx_commit(tx); *xvpp = ZTOV(xzp); return (0); }