/*
 * Through message handle for write side stream
 *
 * Requires Lock (( M: Mandatory, P: Prohibited, A: Allowed ))
 *  -. uinst_t->lock   : M [RW_READER or RW_WRITER]
 *  -. uinst_t->u_lock : P
 *  -. uinst_t->l_lock : P
 *  -. uinst_t->c_lock : P
 */
int
oplmsu_wcmn_through_hndl(queue_t *q, mblk_t *mp, int pri_flag, krw_t rw)
{
	queue_t	*usr_queue = NULL, *dst_queue = NULL;
	ctrl_t	*ctrl;

	ASSERT(RW_LOCK_HELD(&oplmsu_uinst->lock));

	mutex_enter(&oplmsu_uinst->c_lock);
	if ((ctrl = oplmsu_uinst->user_ctrl) != NULL) {
		usr_queue = ctrl->queue;
		mutex_exit(&oplmsu_uinst->c_lock);
	} else {
		mutex_exit(&oplmsu_uinst->c_lock);
		if (mp->b_datap->db_type == M_IOCTL) {
			rw_exit(&oplmsu_uinst->lock);
			oplmsu_iocack(q, mp, ENODEV);
			rw_enter(&oplmsu_uinst->lock, rw);
		} else {
			freemsg(mp);
		}
		return (SUCCESS);
	}

	if (oplmsu_uinst->lower_queue != NULL) {
		dst_queue = WR(oplmsu_uinst->lower_queue);
	} else {
		cmn_err(CE_WARN, "!oplmsu: through-lwq: "
		    "Active path doesn't exist");

		if (mp->b_datap->db_type == M_IOCTL) {
			rw_exit(&oplmsu_uinst->lock);
			oplmsu_iocack(q, mp, ENODEV);
			rw_enter(&oplmsu_uinst->lock, rw);
		} else {
			freemsg(mp);
		}
		return (SUCCESS);
	}

	if ((usr_queue == WR(q)) || (usr_queue == RD(q))) {
		if (pri_flag == MSU_HIGH) {
			putq(dst_queue, mp);
		} else {
			if (canput(dst_queue)) {
				putq(dst_queue, mp);
			} else {
				oplmsu_wcmn_norm_putbq(WR(q), mp, dst_queue);
				return (FAILURE);
			}
		}
	} else {
		cmn_err(CE_WARN, "oplmsu: through-lwq: "
		    "Inappropriate message for this node");

		if (mp->b_datap->db_type == M_IOCTL) {
			rw_exit(&oplmsu_uinst->lock);
			oplmsu_iocack(q, mp, ENODEV);
			rw_enter(&oplmsu_uinst->lock, rw);
		} else {
			freemsg(mp);
		}
	}
	return (SUCCESS);
}
示例#2
0
文件: dmu_objset.c 项目: Bingfeng/zfs
void
dmu_objset_evict(objset_t *os)
{
	int t;

	dsl_dataset_t *ds = os->os_dsl_dataset;

	for (t = 0; t < TXG_SIZE; t++)
		ASSERT(!dmu_objset_is_dirty(os, t));

	if (ds) {
		if (!dsl_dataset_is_snapshot(ds)) {
			VERIFY0(dsl_prop_unregister(ds,
			    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
			    checksum_changed_cb, os));
			VERIFY0(dsl_prop_unregister(ds,
			    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
			    compression_changed_cb, os));
			VERIFY0(dsl_prop_unregister(ds,
			    zfs_prop_to_name(ZFS_PROP_COPIES),
			    copies_changed_cb, os));
			VERIFY0(dsl_prop_unregister(ds,
			    zfs_prop_to_name(ZFS_PROP_DEDUP),
			    dedup_changed_cb, os));
			VERIFY0(dsl_prop_unregister(ds,
			    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
			    logbias_changed_cb, os));
			VERIFY0(dsl_prop_unregister(ds,
			    zfs_prop_to_name(ZFS_PROP_SYNC),
			    sync_changed_cb, os));
		}
		VERIFY0(dsl_prop_unregister(ds,
		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
		    primary_cache_changed_cb, os));
		VERIFY0(dsl_prop_unregister(ds,
		    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
		    secondary_cache_changed_cb, os));
	}

	if (os->os_sa)
		sa_tear_down(os);

	dmu_objset_evict_dbufs(os);

	dnode_special_close(&os->os_meta_dnode);
	if (DMU_USERUSED_DNODE(os)) {
		dnode_special_close(&os->os_userused_dnode);
		dnode_special_close(&os->os_groupused_dnode);
	}
	zil_free(os->os_zil);

	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);

	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));

	/*
	 * This is a barrier to prevent the objset from going away in
	 * dnode_move() until we can safely ensure that the objset is still in
	 * use. We consider the objset valid before the barrier and invalid
	 * after the barrier.
	 */
	rw_enter(&os_lock, RW_READER);
	rw_exit(&os_lock);

	mutex_destroy(&os->os_lock);
	mutex_destroy(&os->os_obj_lock);
	mutex_destroy(&os->os_user_ptr_lock);
	kmem_free(os, sizeof (objset_t));
}
示例#3
0
static void
devvt_cleandir(struct vnode *dvp, struct cred *cred)
{
	struct sdev_node *sdvp = VTOSDEV(dvp);
	struct sdev_node *dv, *next = NULL;
	int min, cnt;
	char found = 0;

	mutex_enter(&vc_lock);
	cnt = VC_INSTANCES_COUNT;
	mutex_exit(&vc_lock);

/* We have to fool warlock this way, otherwise it will complain */
#ifndef	__lock_lint
	if (rw_tryupgrade(&sdvp->sdev_contents) == NULL) {
		rw_exit(&sdvp->sdev_contents);
		rw_enter(&sdvp->sdev_contents, RW_WRITER);
	}
#else
	rw_enter(&sdvp->sdev_contents, RW_WRITER);
#endif

	/* 1.  prune invalid nodes and rebuild stale symlinks */
	devvt_prunedir(sdvp);

	/* 2. create missing nodes */
	for (min = 0; min < cnt; min++) {
		char nm[16];

		if (vt_minor_valid(min) == B_FALSE)
			continue;

		(void) snprintf(nm, sizeof (nm), "%d", min);
		found = 0;
		for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) {
			next = SDEV_NEXT_ENTRY(sdvp, dv);

			/* validate only ready nodes */
			if (dv->sdev_state != SDEV_READY)
				continue;
			if (strcmp(nm, dv->sdev_name) == 0) {
				found = 1;
				break;
			}
		}
		if (!found) {
			devvt_create_snode(sdvp, nm, cred, SDEV_VATTR);
		}
	}

	/* 3. create active link node and console user link node */
	found = 0;
	for (dv = SDEV_FIRST_ENTRY(sdvp); dv; dv = next) {
		next = SDEV_NEXT_ENTRY(sdvp, dv);

		/* validate only ready nodes */
		if (dv->sdev_state != SDEV_READY)
			continue;
		if ((strcmp(dv->sdev_name, DEVVT_ACTIVE_NAME) == NULL))
			found |= 0x01;
		if ((strcmp(dv->sdev_name, DEVVT_CONSUSER_NAME) == NULL))
			found |= 0x02;

		if ((found & 0x01) && (found & 0x02))
			break;
	}
	if (!(found & 0x01))
		devvt_create_snode(sdvp, DEVVT_ACTIVE_NAME, cred, SDEV_VLINK);
	if (!(found & 0x02))
		devvt_create_snode(sdvp, DEVVT_CONSUSER_NAME, cred, SDEV_VLINK);

#ifndef	__lock_lint
	rw_downgrade(&sdvp->sdev_contents);
#else
	rw_exit(&sdvp->sdev_contents);
#endif
}
void
memlist_read_lock(void)
{
	rw_enter(&memlists_lock, RW_READER);
}
示例#5
0
/* ARGSUSED */
int
mac_register(mac_register_t *mregp, mac_handle_t *mhp)
{
	mac_impl_t		*mip;
	mactype_t		*mtype;
	int			err = EINVAL;
	struct devnames		*dnp = NULL;
	uint_t			instance;
	boolean_t		style1_created = B_FALSE;
	boolean_t		style2_created = B_FALSE;
	char			*driver;
	minor_t			minor = 0;

	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
		return (EINVAL);

	/* Find the required MAC-Type plugin. */
	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
		return (EINVAL);

	/* Create a mac_impl_t to represent this MAC. */
	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);

	/*
	 * The mac is not ready for open yet.
	 */
	mip->mi_state_flags |= MIS_DISABLED;

	/*
	 * When a mac is registered, the m_instance field can be set to:
	 *
	 *  0:	Get the mac's instance number from m_dip.
	 *	This is usually used for physical device dips.
	 *
	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
	 *	For example, when an aggregation is created with the key option,
	 *	"key" will be used as the instance number.
	 *
	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
	 *	This is often used when a MAC of a virtual link is registered
	 *	(e.g., aggregation when "key" is not specified, or vnic).
	 *
	 * Note that the instance number is used to derive the mi_minor field
	 * of mac_impl_t, which will then be used to derive the name of kstats
	 * and the devfs nodes.  The first 2 cases are needed to preserve
	 * backward compatibility.
	 */
	switch (mregp->m_instance) {
	case 0:
		instance = ddi_get_instance(mregp->m_dip);
		break;
	case ((uint_t)-1):
		minor = mac_minor_hold(B_TRUE);
		if (minor == 0) {
			err = ENOSPC;
			goto fail;
		}
		instance = minor - 1;
		break;
	default:
		instance = mregp->m_instance;
		if (instance >= MAC_MAX_MINOR) {
			err = EINVAL;
			goto fail;
		}
		break;
	}

	mip->mi_minor = (minor_t)(instance + 1);
	mip->mi_dip = mregp->m_dip;
	mip->mi_clients_list = NULL;
	mip->mi_nclients = 0;

	/* Set the default IEEE Port VLAN Identifier */
	mip->mi_pvid = 1;

	/* Default bridge link learning protection values */
	mip->mi_llimit = 1000;
	mip->mi_ldecay = 200;

	driver = (char *)ddi_driver_name(mip->mi_dip);

	/* Construct the MAC name as <drvname><instance> */
	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
	    driver, instance);

	mip->mi_driver = mregp->m_driver;

	mip->mi_type = mtype;
	mip->mi_margin = mregp->m_margin;
	mip->mi_info.mi_media = mtype->mt_type;
	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
	if (mregp->m_max_sdu <= mregp->m_min_sdu)
		goto fail;
	if (mregp->m_multicast_sdu == 0)
		mregp->m_multicast_sdu = mregp->m_max_sdu;
	if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
	    mregp->m_multicast_sdu > mregp->m_max_sdu)
		goto fail;
	mip->mi_sdu_min = mregp->m_min_sdu;
	mip->mi_sdu_max = mregp->m_max_sdu;
	mip->mi_sdu_multicast = mregp->m_multicast_sdu;
	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
	/*
	 * If the media supports a broadcast address, cache a pointer to it
	 * in the mac_info_t so that upper layers can use it.
	 */
	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;

	mip->mi_v12n_level = mregp->m_v12n;

	/*
	 * Copy the unicast source address into the mac_info_t, but only if
	 * the MAC-Type defines a non-zero address length.  We need to
	 * handle MAC-Types that have an address length of 0
	 * (point-to-point protocol MACs for example).
	 */
	if (mip->mi_type->mt_addr_length > 0) {
		if (mregp->m_src_addr == NULL)
			goto fail;
		mip->mi_info.mi_unicst_addr =
		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
		    mip->mi_type->mt_addr_length);

		/*
		 * Copy the fixed 'factory' MAC address from the immutable
		 * info.  This is taken to be the MAC address currently in
		 * use.
		 */
		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
		    mip->mi_type->mt_addr_length);

		/*
		 * At this point, we should set up the classification
		 * rules etc but we delay it till mac_open() so that
		 * the resource discovery has taken place and we
		 * know someone wants to use the device. Otherwise
		 * memory gets allocated for Rx ring structures even
		 * during probe.
		 */

		/* Copy the destination address if one is provided. */
		if (mregp->m_dst_addr != NULL) {
			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
			    mip->mi_type->mt_addr_length);
			mip->mi_dstaddr_set = B_TRUE;
		}
	} else if (mregp->m_src_addr != NULL) {
		goto fail;
	}

	/*
	 * The format of the m_pdata is specific to the plugin.  It is
	 * passed in as an argument to all of the plugin callbacks.  The
	 * driver can update this information by calling
	 * mac_pdata_update().
	 */
	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
		/*
		 * Verify if the supplied plugin data is valid.  Note that
		 * even if the caller passed in a NULL pointer as plugin data,
		 * we still need to verify if that's valid as the plugin may
		 * require plugin data to function.
		 */
		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
		    mregp->m_pdata_size)) {
			goto fail;
		}
		if (mregp->m_pdata != NULL) {
			mip->mi_pdata =
			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
			bcopy(mregp->m_pdata, mip->mi_pdata,
			    mregp->m_pdata_size);
			mip->mi_pdata_size = mregp->m_pdata_size;
		}
	} else if (mregp->m_pdata != NULL) {
		/*
		 * The caller supplied non-NULL plugin data, but the plugin
		 * does not recognize plugin data.
		 */
		err = EINVAL;
		goto fail;
	}

	/*
	 * Register the private properties.
	 */
	mac_register_priv_prop(mip, mregp->m_priv_props);

	/*
	 * Stash the driver callbacks into the mac_impl_t, but first sanity
	 * check to make sure all mandatory callbacks are set.
	 */
	if (mregp->m_callbacks->mc_getstat == NULL ||
	    mregp->m_callbacks->mc_start == NULL ||
	    mregp->m_callbacks->mc_stop == NULL ||
	    mregp->m_callbacks->mc_setpromisc == NULL ||
	    mregp->m_callbacks->mc_multicst == NULL) {
		goto fail;
	}
	mip->mi_callbacks = mregp->m_callbacks;

	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
	    &mip->mi_capab_legacy)) {
		mip->mi_state_flags |= MIS_LEGACY;
		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
	} else {
		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
		    mip->mi_minor);
	}

	/*
	 * Allocate a notification thread. thread_create blocks for memory
	 * if needed, it never fails.
	 */
	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
	    mip, 0, &p0, TS_RUN, minclsyspri);

	/*
	 * Initialize the capabilities
	 */

	bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
	bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));

	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
		mip->mi_state_flags |= MIS_IS_VNIC;

	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
		mip->mi_state_flags |= MIS_IS_AGGR;

	mac_addr_factory_init(mip);

	/*
	 * Enforce the virtrualization level registered.
	 */
	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
			goto fail;

		/*
		 * The driver needs to register at least rx rings for this
		 * virtualization level.
		 */
		if (mip->mi_rx_groups == NULL)
			goto fail;
	}

	/*
	 * The driver must set mc_unicst entry point to NULL when it advertises
	 * CAP_RINGS for rx groups.
	 */
	if (mip->mi_rx_groups != NULL) {
		if (mregp->m_callbacks->mc_unicst != NULL)
			goto fail;
	} else {
		if (mregp->m_callbacks->mc_unicst == NULL)
			goto fail;
	}

	/*
	 * Initialize MAC addresses. Must be called after mac_init_rings().
	 */
	mac_init_macaddr(mip);

	mip->mi_share_capab.ms_snum = 0;
	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
		    &mip->mi_share_capab);
	}

	/*
	 * Initialize the kstats for this device.
	 */
	mac_driver_stat_create(mip);

	/* Zero out any properties. */
	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));

	if (mip->mi_minor <= MAC_MAX_MINOR) {
		/* Create a style-2 DLPI device */
		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
			goto fail;
		style2_created = B_TRUE;

		/* Create a style-1 DLPI device */
		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
			goto fail;
		style1_created = B_TRUE;
	}

	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);

	rw_enter(&i_mac_impl_lock, RW_WRITER);
	if (mod_hash_insert(i_mac_impl_hash,
	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
		rw_exit(&i_mac_impl_lock);
		err = EEXIST;
		goto fail;
	}

	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
	    (mac_impl_t *), mip);

	/*
	 * Mark the MAC to be ready for open.
	 */
	mip->mi_state_flags &= ~MIS_DISABLED;
	rw_exit(&i_mac_impl_lock);

	atomic_inc_32(&i_mac_impl_count);

	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
	*mhp = (mac_handle_t)mip;
	return (0);

fail:
	if (style1_created)
		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);

	if (style2_created)
		ddi_remove_minor_node(mip->mi_dip, driver);

	mac_addr_factory_fini(mip);

	/* Clean up registered MAC addresses */
	mac_fini_macaddr(mip);

	/* Clean up registered rings */
	mac_free_rings(mip, MAC_RING_TYPE_RX);
	mac_free_rings(mip, MAC_RING_TYPE_TX);

	/* Clean up notification thread */
	if (mip->mi_notify_thread != NULL)
		i_mac_notify_exit(mip);

	if (mip->mi_info.mi_unicst_addr != NULL) {
		kmem_free(mip->mi_info.mi_unicst_addr,
		    mip->mi_type->mt_addr_length);
		mip->mi_info.mi_unicst_addr = NULL;
	}

	mac_driver_stat_delete(mip);

	if (mip->mi_type != NULL) {
		atomic_dec_32(&mip->mi_type->mt_ref);
		mip->mi_type = NULL;
	}

	if (mip->mi_pdata != NULL) {
		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
		mip->mi_pdata = NULL;
		mip->mi_pdata_size = 0;
	}

	if (minor != 0) {
		ASSERT(minor > MAC_MAX_MINOR);
		mac_minor_rele(minor);
	}

	mip->mi_state_flags = 0;
	mac_unregister_priv_prop(mip);

	/*
	 * Clear the state before destroying the mac_impl_t
	 */
	mip->mi_state_flags = 0;

	kmem_cache_free(i_mac_impl_cachep, mip);
	return (err);
}
示例#6
0
/*ARGSUSED*/
static kmem_cbrc_t
zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
{
	znode_t *ozp = buf, *nzp = newbuf;
	zfsvfs_t *zfsvfs;
	vnode_t *vp;

	/*
	 * The znode is on the file system's list of known znodes if the vfs
	 * pointer is valid. We set the low bit of the vfs pointer when freeing
	 * the znode to invalidate it, and the memory patterns written by kmem
	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
	 * created znode sets the vfs pointer last of all to indicate that the
	 * znode is known and in a valid state to be moved by this function.
	 */
	zfsvfs = ozp->z_zfsvfs;
	if (!POINTER_IS_VALID(zfsvfs)) {
		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
		return (KMEM_CBRC_DONT_KNOW);
	}

	/*
	 * Close a small window in which it's possible that the filesystem could
	 * be unmounted and freed, and zfsvfs, though valid in the previous
	 * statement, could point to unrelated memory by the time we try to
	 * prevent the filesystem from being unmounted.
	 */
	rw_enter(&zfsvfs_lock, RW_WRITER);
	if (zfsvfs != ozp->z_zfsvfs) {
		rw_exit(&zfsvfs_lock);
		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
		return (KMEM_CBRC_DONT_KNOW);
	}

	/*
	 * If the znode is still valid, then so is the file system. We know that
	 * no valid file system can be freed while we hold zfsvfs_lock, so we
	 * can safely ensure that the filesystem is not and will not be
	 * unmounted. The next statement is equivalent to ZFS_ENTER().
	 */
	rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
	if (zfsvfs->z_unmounted) {
		ZFS_EXIT(zfsvfs);
		rw_exit(&zfsvfs_lock);
		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
		return (KMEM_CBRC_DONT_KNOW);
	}
	rw_exit(&zfsvfs_lock);

	mutex_enter(&zfsvfs->z_znodes_lock);
	/*
	 * Recheck the vfs pointer in case the znode was removed just before
	 * acquiring the lock.
	 */
	if (zfsvfs != ozp->z_zfsvfs) {
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
		return (KMEM_CBRC_DONT_KNOW);
	}

	/*
	 * At this point we know that as long as we hold z_znodes_lock, the
	 * znode cannot be freed and fields within the znode can be safely
	 * accessed. Now, prevent a race with zfs_zget().
	 */
	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
		return (KMEM_CBRC_LATER);
	}

	vp = ZTOV(ozp);
	if (mutex_tryenter(&vp->v_lock) == 0) {
		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
		return (KMEM_CBRC_LATER);
	}

	/* Only move znodes that are referenced _only_ by the DNLC. */
	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
		mutex_exit(&vp->v_lock);
		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
		return (KMEM_CBRC_LATER);
	}

	/*
	 * The znode is known and in a valid state to move. We're holding the
	 * locks needed to execute the critical section.
	 */
	zfs_znode_move_impl(ozp, nzp);
	mutex_exit(&vp->v_lock);
	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);

	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
	mutex_exit(&zfsvfs->z_znodes_lock);
	ZFS_EXIT(zfsvfs);

	return (KMEM_CBRC_YES);
}
示例#7
0
/*
 * Lock a directory entry.  A dirlock on <dzp, name> protects that name
 * in dzp's directory zap object.  As long as you hold a dirlock, you can
 * assume two things: (1) dzp cannot be reaped, and (2) no other thread
 * can change the zap entry for (i.e. link or unlink) this name.
 *
 * Input arguments:
 *	dzp	- znode for directory
 *	name	- name of entry to lock
 *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
 *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
 *		  ZSHARED: allow concurrent access with other ZSHARED callers.
 *		  ZXATTR: we want dzp's xattr directory
 *		  ZCILOOK: On a mixed sensitivity file system,
 *			   this lookup should be case-insensitive.
 *		  ZCIEXACT: On a purely case-insensitive file system,
 *			    this lookup should be case-sensitive.
 *		  ZRENAMING: we are locking for renaming, force narrow locks
 *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
 *			     current thread already holds it.
 *
 * Output arguments:
 *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
 *	dlpp	- pointer to the dirlock for this entry (NULL on error)
 *      direntflags - (case-insensitive lookup only)
 *		flags if multiple case-sensitive matches exist in directory
 *      realpnp     - (case-insensitive lookup only)
 *		actual name matched within the directory
 *
 * Return value: 0 on success or errno on failure.
 *
 * NOTE: Always checks for, and rejects, '.' and '..'.
 * NOTE: For case-insensitive file systems we take wide locks (see below),
 *	 but return znode pointers to a single match.
 */
int
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
    int flag, int *direntflags, pathname_t *realpnp)
{
	zfs_sb_t	*zsb = ZTOZSB(dzp);
	zfs_dirlock_t	*dl;
	boolean_t	update;
	boolean_t	exact;
	uint64_t	zoid;
#ifdef HAVE_DNLC
	vnode_t		*vp = NULL;
#endif /* HAVE_DNLC */
	int		error = 0;
	int		cmpflags;

	*zpp = NULL;
	*dlpp = NULL;

	/*
	 * Verify that we are not trying to lock '.', '..', or '.zfs'
	 */
	if ((name[0] == '.' &&
	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
		return (SET_ERROR(EEXIST));

	/*
	 * Case sensitivity and normalization preferences are set when
	 * the file system is created.  These are stored in the
	 * zsb->z_case and zsb->z_norm fields.  These choices
	 * affect what vnodes can be cached in the DNLC, how we
	 * perform zap lookups, and the "width" of our dirlocks.
	 *
	 * A normal dirlock locks a single name.  Note that with
	 * normalization a name can be composed multiple ways, but
	 * when normalized, these names all compare equal.  A wide
	 * dirlock locks multiple names.  We need these when the file
	 * system is supporting mixed-mode access.  It is sometimes
	 * necessary to lock all case permutations of file name at
	 * once so that simultaneous case-insensitive/case-sensitive
	 * behaves as rationally as possible.
	 */

	/*
	 * Decide if exact matches should be requested when performing
	 * a zap lookup on file systems supporting case-insensitive
	 * access.
	 */
	exact =
	    ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
	    ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));

	/*
	 * Only look in or update the DNLC if we are looking for the
	 * name on a file system that does not require normalization
	 * or case folding.  We can also look there if we happen to be
	 * on a non-normalizing, mixed sensitivity file system IF we
	 * are looking for the exact name.
	 *
	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
	 * case for performance improvement?
	 */
	update = !zsb->z_norm ||
	    ((zsb->z_case == ZFS_CASE_MIXED) &&
	    !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));

	/*
	 * ZRENAMING indicates we are in a situation where we should
	 * take narrow locks regardless of the file system's
	 * preferences for normalizing and case folding.  This will
	 * prevent us deadlocking trying to grab the same wide lock
	 * twice if the two names happen to be case-insensitive
	 * matches.
	 */
	if (flag & ZRENAMING)
		cmpflags = 0;
	else
		cmpflags = zsb->z_norm;

	/*
	 * Wait until there are no locks on this name.
	 *
	 * Don't grab the the lock if it is already held. However, cannot
	 * have both ZSHARED and ZHAVELOCK together.
	 */
	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
	if (!(flag & ZHAVELOCK))
		rw_enter(&dzp->z_name_lock, RW_READER);

	mutex_enter(&dzp->z_lock);
	for (;;) {
		if (dzp->z_unlinked) {
			mutex_exit(&dzp->z_lock);
			if (!(flag & ZHAVELOCK))
				rw_exit(&dzp->z_name_lock);
			return (SET_ERROR(ENOENT));
		}
		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
				break;
		}
		if (error != 0) {
			mutex_exit(&dzp->z_lock);
			if (!(flag & ZHAVELOCK))
				rw_exit(&dzp->z_name_lock);
			return (SET_ERROR(ENOENT));
		}
		if (dl == NULL)	{
			/*
			 * Allocate a new dirlock and add it to the list.
			 */
			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
			dl->dl_name = name;
			dl->dl_sharecnt = 0;
			dl->dl_namelock = 0;
			dl->dl_namesize = 0;
			dl->dl_dzp = dzp;
			dl->dl_next = dzp->z_dirlocks;
			dzp->z_dirlocks = dl;
			break;
		}
		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
			break;
		cv_wait(&dl->dl_cv, &dzp->z_lock);
	}

	/*
	 * If the z_name_lock was NOT held for this dirlock record it.
	 */
	if (flag & ZHAVELOCK)
		dl->dl_namelock = 1;

	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
		/*
		 * We're the second shared reference to dl.  Make a copy of
		 * dl_name in case the first thread goes away before we do.
		 * Note that we initialize the new name before storing its
		 * pointer into dl_name, because the first thread may load
		 * dl->dl_name at any time.  He'll either see the old value,
		 * which is his, or the new shared copy; either is OK.
		 */
		dl->dl_namesize = strlen(dl->dl_name) + 1;
		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
		bcopy(dl->dl_name, name, dl->dl_namesize);
		dl->dl_name = name;
	}

	mutex_exit(&dzp->z_lock);

	/*
	 * We have a dirlock on the name.  (Note that it is the dirlock,
	 * not the dzp's z_lock, that protects the name in the zap object.)
	 * See if there's an object by this name; if so, put a hold on it.
	 */
	if (flag & ZXATTR) {
		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
		    sizeof (zoid));
		if (error == 0)
			error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
	} else {
#ifdef HAVE_DNLC
		if (update)
			vp = dnlc_lookup(ZTOI(dzp), name);
		if (vp == DNLC_NO_VNODE) {
			iput(vp);
			error = SET_ERROR(ENOENT);
		} else if (vp) {
			if (flag & ZNEW) {
				zfs_dirent_unlock(dl);
				iput(vp);
				return (SET_ERROR(EEXIST));
			}
			*dlpp = dl;
			*zpp = VTOZ(vp);
			return (0);
		} else {
			error = zfs_match_find(zsb, dzp, name, exact,
			    update, direntflags, realpnp, &zoid);
		}
#else
		error = zfs_match_find(zsb, dzp, name, exact,
		    update, direntflags, realpnp, &zoid);
#endif /* HAVE_DNLC */
	}
	if (error) {
		if (error != ENOENT || (flag & ZEXISTS)) {
			zfs_dirent_unlock(dl);
			return (error);
		}
	} else {
		if (flag & ZNEW) {
			zfs_dirent_unlock(dl);
			return (SET_ERROR(EEXIST));
		}
		error = zfs_zget(zsb, zoid, zpp);
		if (error) {
			zfs_dirent_unlock(dl);
			return (error);
		}
#ifdef HAVE_DNLC
		if (!(flag & ZXATTR) && update)
			dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
#endif /* HAVE_DNLC */
	}

	*dlpp = dl;

	return (0);
}
示例#8
0
int
ud_dirrename(struct ud_inode *sdp, struct ud_inode *sip,
	struct ud_inode *tdp, struct ud_inode *tip, char *namep,
	uint8_t *buf, struct slot *slotp, struct cred *cr)
{
	int32_t error = 0, doingdirectory;
	struct file_id *fid;

	ud_printf("ud_dirrename\n");
	ASSERT(sdp->i_udf != NULL);
	ASSERT(MUTEX_HELD(&sdp->i_udf->udf_rename_lck));
	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
	ASSERT(buf);
	ASSERT(slotp->ep);

	fid = slotp->ep;

	/*
	 * Short circuit rename of something to itself.
	 */
	if (sip->i_icb_lbano == tip->i_icb_lbano) {
		return (ESAME);		/* special KLUDGE error code */
	}
	/*
	 * Everything is protected under the vfs_rename_lock so the ordering
	 * of i_contents locks doesn't matter here.
	 */
	rw_enter(&sip->i_contents, RW_READER);
	rw_enter(&tip->i_contents, RW_READER);

	/*
	 * Check that everything is on the same filesystem.
	 */
	if ((ITOV(tip)->v_vfsp != ITOV(tdp)->v_vfsp) ||
	    (ITOV(tip)->v_vfsp != ITOV(sip)->v_vfsp)) {
		error = EXDEV;		/* XXX archaic */
		goto out;
	}

	/*
	 * Must have write permission to rewrite target entry.
	 */
	if ((error = ud_iaccess(tdp, IWRITE, cr)) != 0 ||
	    (error = ud_sticky_remove_access(tdp, tip, cr)) != 0)
		goto out;

	/*
	 * Ensure source and target are compatible (both directories
	 * or both not directories).  If target is a directory it must
	 * be empty and have no links to it; in addition it must not
	 * be a mount point, and both the source and target must be
	 * writable.
	 */
	doingdirectory = (sip->i_type == VDIR);
	if (tip->i_type == VDIR) {
		if (!doingdirectory) {
			error = EISDIR;
			goto out;
		}
		/*
		 * vn_vfswlock will prevent mounts from using the directory
		 * until we are done.
		 */
		if (vn_vfswlock(ITOV(tip))) {
			error = EBUSY;
			goto out;
		}
		if (vn_mountedvfs(ITOV(tip)) != NULL) {
			vn_vfsunlock(ITOV(tip));
			error = EBUSY;
			goto out;
		}
		if (!ud_dirempty(tip, tdp->i_uniqid, cr) || tip->i_nlink > 2) {
			vn_vfsunlock(ITOV(tip));
			error = EEXIST;	/* SIGH should be ENOTEMPTY */
			goto out;
		}
	} else if (doingdirectory) {
		error = ENOTDIR;
		goto out;
	}

	/*
	 * Rewrite the inode pointer for target name entry
	 * from the target inode (ip) to the source inode (sip).
	 * This prevents the target entry from disappearing
	 * during a crash. Mark the directory inode to reflect the changes.
	 */
	dnlc_remove(ITOV(tdp), namep);
	fid->fid_icb.lad_ext_prn = SWAP_16(sip->i_icb_prn);
	fid->fid_icb.lad_ext_loc = SWAP_32(sip->i_icb_block);
	dnlc_enter(ITOV(tdp), namep, ITOV(sip));

	ud_make_tag(tdp->i_udf, &fid->fid_tag, UD_FILE_ID_DESC,
			SWAP_32(fid->fid_tag.tag_loc), FID_LEN(fid));

	error = ud_write_fid(tdp, slotp, buf);

	if (error) {
		if (doingdirectory) {
			vn_vfsunlock(ITOV(tip));
		}
		goto out;
	}

	/*
	 * Upgrade to write lock on tip
	 */
	rw_exit(&tip->i_contents);
	rw_enter(&tip->i_contents, RW_WRITER);

	mutex_enter(&tdp->i_tlock);
	tdp->i_flag |= IUPD|ICHG;
	mutex_exit(&tdp->i_tlock);
	/*
	 * Decrement the link count of the target inode.
	 * Fix the ".." entry in sip to point to dp.
	 * This is done after the new entry is on the disk.
	 */
	tip->i_nlink--;
	mutex_enter(&tip->i_tlock);
	tip->i_flag |= ICHG;
	mutex_exit(&tip->i_tlock);

	if (doingdirectory) {
		/*
		 * The entry for tip no longer exists so I can unlock the
		 * vfslock.
		 */
		vn_vfsunlock(ITOV(tip));
		/*
		 * Decrement target link count once more if it was a directory.
		 */
		if (tip->i_nlink != 0) {
			cmn_err(CE_WARN,
			"ud_direnter: target directory link count != 0");
			rw_exit(&tip->i_contents);
			rw_exit(&sip->i_contents);
			return (EINVAL);
		}
		/*
		 * Renaming a directory with the parent different
		 * requires that ".." be rewritten.  The window is
		 * still there for ".." to be inconsistent, but this
		 * is unavoidable, and a lot shorter than when it was
		 * done in a user process.  We decrement the link
		 * count in the new parent as appropriate to reflect
		 * the just-removed target.  If the parent is the
		 * same, this is appropriate since the original
		 * directory is going away.  If the new parent is
		 * different, dirfixdotdot() will bump the link count
		 * back.
		 */
		tdp->i_nlink--;
		mutex_enter(&tdp->i_tlock);
		tdp->i_flag |= ICHG;
		mutex_exit(&tdp->i_tlock);
		ITIMES_NOLOCK(tdp);
		if (sdp != tdp) {
			rw_exit(&tip->i_contents);
			rw_exit(&sip->i_contents);
			error = ud_dirfixdotdot(sip, sdp, tdp);
			return (error);
		}
	}

out:
	rw_exit(&tip->i_contents);
	rw_exit(&sip->i_contents);
	return (error);
}
示例#9
0
/*
 * Fix the FID_PARENT entry of the child directory so that it points
 * to the new parent directory instead of the old one.  Routine
 * assumes that dp is a directory and that all the inodes are on
 * the same file system.
 */
int
ud_dirfixdotdot(struct ud_inode *dp,
	struct ud_inode *opdp, struct ud_inode *npdp)
{
	int32_t err = 0;
	struct fbuf *fbp;
	struct file_id *fid;
	uint32_t loc, dummy, tbno;

	ud_printf("ud_dirfixdotdot\n");

	ASSERT(opdp->i_type == VDIR);
	ASSERT(npdp->i_type == VDIR);

	ASSERT(RW_WRITE_HELD(&npdp->i_rwlock));

	err = fbread(ITOV(dp), (offset_t)0,
			dp->i_udf->udf_lbsize, S_WRITE, &fbp);

	if (err || dp->i_nlink == 0 ||
		dp->i_size < sizeof (struct file_id)) {
		goto bad;
	}

	if ((err = ud_ip_off2bno(dp, 0, &tbno)) != 0) {
		goto bad;
	}

	fid = (struct file_id *)fbp->fb_addr;
	if ((ud_verify_tag_and_desc(&fid->fid_tag, UD_FILE_ID_DESC,
	    tbno,
	    1, dp->i_udf->udf_lbsize) != 0) ||
	    ((fid->fid_flags & (FID_DIR | FID_PARENT)) !=
	    (FID_DIR | FID_PARENT))) {
		err = ENOTDIR;
		goto bad;
	}

	loc = ud_xlate_to_daddr(dp->i_udf,
		SWAP_16(fid->fid_icb.lad_ext_prn),
		SWAP_32(fid->fid_icb.lad_ext_loc), 1, &dummy);
	ASSERT(dummy == 1);
	if (loc == npdp->i_icb_lbano) {
		goto bad;
	}

	/*
	 * Increment the link count in the new parent inode and force it out.
	 */
	if (npdp->i_nlink == MAXLINK) {
		err = EMLINK;
		goto bad;
	}

	npdp->i_nlink++;
	mutex_enter(&npdp->i_tlock);
	npdp->i_flag |= ICHG;
	mutex_exit(&npdp->i_tlock);
	ud_iupdat(npdp, 1);

	/*
	 * Rewrite the child FID_PARENT entry and force it out.
	 */
	dnlc_remove(ITOV(dp), "..");
	fid->fid_icb.lad_ext_loc = SWAP_32(npdp->i_icb_block);
	fid->fid_icb.lad_ext_prn = SWAP_16(npdp->i_icb_prn);
	ud_make_tag(npdp->i_udf, &fid->fid_tag,
		UD_FILE_ID_DESC, tbno, FID_LEN(fid));
	dnlc_enter(ITOV(dp), "..", ITOV(npdp));

	err = ud_fbwrite(fbp, dp);
	fbp = NULL;
	if (err != 0) {
		goto bad;
	}

	/*
	 * Decrement the link count of the old parent inode and force
	 * it out.  If opdp is NULL, then this is a new directory link;
	 * it has no parent, so we need not do anything.
	 */
	if (opdp != NULL) {
		rw_enter(&opdp->i_contents, RW_WRITER);
		if (opdp->i_nlink != 0) {
			opdp->i_nlink--;
			mutex_enter(&opdp->i_tlock);
			opdp->i_flag |= ICHG;
			mutex_exit(&opdp->i_tlock);
			ud_iupdat(opdp, 1);
		}
		rw_exit(&opdp->i_contents);
	}
	return (0);

bad:
	if (fbp) {
		fbrelse(fbp, S_OTHER);
	}
	return (err);
}
示例#10
0
int
ud_dirlook(struct ud_inode *dip,
	char *namep, struct ud_inode **ipp, struct cred *cr, int32_t skipdnlc)
{
	struct udf_vfs *udf_vfsp;
	int32_t error = 0, namelen, adhoc_search;
	u_offset_t offset, adhoc_offset, dirsize, end;
	struct vnode *dvp, *vp;
	struct fbuf *fbp;
	struct file_id *fid;
	uint8_t *fname, dummy[3];
	int32_t id_len, doingchk;
	uint32_t old_loc;
	uint16_t old_prn;

	uint8_t *dname;
	uint8_t *buf = NULL;

	ud_printf("ud_dirlook\n");

	udf_vfsp = dip->i_udf;

restart:
	doingchk = 0;
	old_prn = 0xFFFF;
	old_loc = 0;
	dvp = ITOV(dip);
	/*
	 * Check accessibility of directory.
	 */
	if (dip->i_type != VDIR) {
		return (ENOTDIR);
	}
	if (error = ud_iaccess(dip, IEXEC, cr)) {
		return (error);
	}

	/*
	 * Null component name is synonym for directory being searched.
	 */
	if (*namep == '\0') {
		VN_HOLD(dvp);
		*ipp = dip;
		return (0);
	}
	namelen = strlen(namep);
	if ((namelen == 1) &&
		(namep[0] == '.') && (namep[1] == '\0')) {
		/* Current directory */
		VN_HOLD(dvp);
		*ipp = dip;
		dnlc_enter(dvp, namep, ITOV(*ipp));
		return (0);
	}

	if ((!skipdnlc) && (vp = dnlc_lookup(dvp, namep))) {
		/* vp is already held from dnlc_lookup */

		*ipp = VTOI(vp);
		return (0);
	}

	dname = kmem_zalloc(1024, KM_SLEEP);
	buf = kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);

	/*
	 * Read lock the inode we are searching.  You will notice that we
	 * didn't hold the read lock while searching the dnlc.  This means
	 * that the entry could now be in the dnlc.  This doesn't cause any
	 * problems because dnlc_enter won't add an entry if it is already
	 * there.
	 */
	rw_enter(&dip->i_rwlock, RW_READER);

	/*
	 * Take care to look at dip->i_diroff only once, as it
	 * may be changing due to other threads/cpus.
	 */

recheck:
	offset = dip->i_diroff;
	end = dirsize = dip->i_size;

	if (offset > dirsize) {
		offset = 0;
	}
	adhoc_offset = offset;
	adhoc_search = (offset == 0) ? 1 : 2;

	fbp = NULL;

	while (adhoc_search--) {
		while (offset < end) {
			error = ud_get_next_fid(dip, &fbp,
					offset, &fid, &fname, buf);
			if (error != 0) {
				break;
			}
			if ((fid->fid_flags & FID_DELETED) == 0) {
				if (fid->fid_flags & FID_PARENT) {
					id_len = 2;
					fname = dummy;
					dummy[0] = '.';
					dummy[1] = '.';
					dummy[2] = '\0';
				} else {
					if ((error = ud_uncompress(
						fid->fid_idlen, &id_len,
						fname, dname)) != 0) {
						break;
					}
					fname = (uint8_t *)dname;
					fname[id_len] = '\0';
				}
				if ((namelen == id_len) &&
					(strncmp(namep, (caddr_t)fname,
							namelen) == 0)) {
					uint32_t loc;
					uint16_t prn;


					loc = SWAP_32(fid->fid_icb.lad_ext_loc);
					prn = SWAP_16(fid->fid_icb.lad_ext_prn);
					dip->i_diroff = offset +
							FID_LEN(fid);

					if (doingchk) {
						if ((loc == old_loc) &&
							(prn == old_prn)) {
							goto checkok;
						} else {
							if (fbp != NULL) {
								fbrelse(fbp,
								S_READ);
								fbp = NULL;
							}
							VN_RELE(ITOV(*ipp));
							rw_exit(&dip->i_rwlock);
							goto restart;
						}
						/* NOTREACHED */
					}

					if (namelen == 2 &&
						fname[0] == '.' &&
						fname[1] == '.') {

						struct timespec32 omtime;

						omtime = dip->i_mtime;
						rw_exit(&dip->i_rwlock);

						error = ud_iget(dip->i_vfs, prn,
							loc, ipp, NULL, cr);

						rw_enter(&dip->i_rwlock,
							RW_READER);

						if (error) {
							goto done;
						}

						if ((omtime.tv_sec !=
							dip->i_mtime.tv_sec) ||
							(omtime.tv_nsec !=
							dip->i_mtime.tv_nsec)) {

							doingchk = 1;
							old_prn = prn;
							old_loc = loc;
							dip->i_diroff = 0;
							if (fbp != NULL) {
								fbrelse(fbp,
								S_READ);
								fbp = NULL;
							}
							goto recheck;
						}
					} else {

						error = ud_iget(dip->i_vfs, prn,
							loc, ipp, NULL, cr);
					}
checkok:
					if (error == 0) {
						dnlc_enter(dvp, namep,
							ITOV(*ipp));
					}
					goto done;
				}
			}
			offset += FID_LEN(fid);
		}
		if (fbp != NULL) {
			fbrelse(fbp, S_READ);
			fbp = NULL;
		}
		end = adhoc_offset;
		offset = 0;
	}
	error = ENOENT;
done:
	kmem_free(buf, udf_vfsp->udf_lbsize);
	kmem_free(dname, 1024);
	if (fbp != NULL) {
		fbrelse(fbp, S_READ);
	}
	rw_exit(&dip->i_rwlock);
	return (error);
}
示例#11
0
/* ARGSUSED2 */
int
ud_dirmakedirect(struct ud_inode *ip,
	struct ud_inode *dp, struct cred *cr)
{
	int32_t err;
	uint32_t blkno, size, parent_len, tbno;
	struct fbuf *fbp;
	struct file_id *fid;
	struct icb_ext *iext;

	ud_printf("ud_dirmakedirect\n");

	ASSERT(RW_WRITE_HELD(&ip->i_contents));
	ASSERT(RW_WRITE_HELD(&dp->i_rwlock));

	parent_len = sizeof (struct file_id);

	if ((ip->i_desc_type != ICB_FLAG_ONE_AD) ||
		(parent_len > ip->i_max_emb)) {
		ASSERT(ip->i_ext);
		/*
		 * Allocate space for the directory we're creating.
		 */
		if ((err = ud_alloc_space(ip->i_vfs, ip->i_icb_prn,
				0, 1, &blkno, &size, 0, 0)) != 0) {
			return (err);
		}
		/*
		 * init with the size of
		 * directory with just the
		 * parent
		 */
		ip->i_size = sizeof (struct file_id);
		ip->i_flag |= IUPD|ICHG|IATTCHG;
		iext = ip->i_ext;
		iext->ib_prn = ip->i_icb_prn;
		iext->ib_block = blkno;
		iext->ib_count = ip->i_size;
		iext->ib_offset = 0;
		ip->i_ext_used = 1;
	} else {
		ip->i_size = sizeof (struct file_id);
		ip->i_flag |= IUPD|ICHG|IATTCHG;
	}

	ITIMES_NOLOCK(ip);

	/*
	 * Update the dp link count and write out the change.
	 * This reflects the ".." entry we'll soon write.
	 */
	if (dp->i_nlink == MAXLINK) {
		return (EMLINK);
	}
	dp->i_nlink++;
	dp->i_flag |= ICHG;
	ud_iupdat(dp, 1);

	/*
	 * Initialize directory with ".."
	 * Since the parent directory is locked, we don't have to
	 * worry about anything changing when we drop the write
	 * lock on (ip).
	 */
	rw_exit(&ip->i_contents);
	if ((err = fbread(ITOV(ip), (offset_t)0,
			ip->i_udf->udf_lbsize, S_WRITE, &fbp)) != 0) {
		rw_enter(&ip->i_contents, RW_WRITER);
		return (err);
	}

	bzero(fbp->fb_addr, ip->i_udf->udf_lbsize);

	fid = (struct file_id *)fbp->fb_addr;
	fid->fid_ver = SWAP_16(1);
	fid->fid_flags = FID_DIR | FID_PARENT;
	fid->fid_icb.lad_ext_len = SWAP_32(dp->i_udf->udf_lbsize);
	fid->fid_icb.lad_ext_loc = SWAP_32(dp->i_icb_block);
	fid->fid_icb.lad_ext_prn = SWAP_16(dp->i_icb_prn);

	/*
	 * fid_idlen, fid_iulen and fid_spec are zero
	 * due to bzero above
	 */

	if ((err = ud_ip_off2bno(ip, 0, &tbno)) == 0) {
		ud_make_tag(ip->i_udf, &fid->fid_tag,
			UD_FILE_ID_DESC, tbno, FID_LEN(fid));
	}

	err = ud_fbwrite(fbp, ip);
	rw_enter(&ip->i_contents, RW_WRITER);

	return (err);
}
示例#12
0
int
ud_dircheckpath(int32_t blkno,
	struct ud_inode *target, struct cred *cr)
{
	int32_t err = 0;
	struct vfs *vfsp;
	struct udf_vfs *udf_vfsp;
	struct fbuf *fbp;
	struct file_id *fid;
	struct ud_inode *ip, *tip;
	uint16_t prn;
	uint32_t lbno, dummy, tbno;
	daddr_t parent_icb_loc;

	ud_printf("ud_dircheckpath\n");

	udf_vfsp = target->i_udf;
	ip = target;

	ASSERT(udf_vfsp != NULL);
	ASSERT(MUTEX_HELD(&target->i_udf->udf_rename_lck));
	ASSERT(RW_WRITE_HELD(&ip->i_rwlock));

	if (ip->i_icb_lbano == blkno) {
		err = EINVAL;
		goto out;
	}
	if (ip->i_icb_lbano == udf_vfsp->udf_root_blkno) {
		goto out;
	}

	/*
	 * Search back through the directory tree, using the PARENT entries
	 * Fail any attempt to move a directory into an ancestor directory.
	 */
	for (;;) {
		if ((err = fbread(ITOV(ip), 0,
			udf_vfsp->udf_lbsize, S_READ, &fbp)) != 0) {
			break;
		}

		if ((err = ud_ip_off2bno(ip, 0, &tbno)) != 0) {
			break;
		}
		fid = (struct file_id *)fbp->fb_addr;
		/* IS this a valid file_identifier */
		if (ud_verify_tag_and_desc(&fid->fid_tag,
		    UD_FILE_ID_DESC,
		    tbno,
		    1, udf_vfsp->udf_lbsize) != 0) {
			break;
		}
		if ((fid->fid_flags & FID_DELETED) != 0) {
			break;
		}
		if ((fid->fid_flags & FID_PARENT) == 0) {
			/*
			 * This cannot happen unless
			 * something is grossly wrong
			 * First entry has to be parent
			 */
			break;
		}
		prn = SWAP_16(fid->fid_icb.lad_ext_prn);
		lbno = SWAP_32(fid->fid_icb.lad_ext_loc);
		parent_icb_loc = ud_xlate_to_daddr(udf_vfsp,
				prn, lbno, 1, &dummy);
		ASSERT(dummy == 1);
		if (parent_icb_loc == blkno) {
			err = EINVAL;
			break;
		}
		vfsp = ip->i_vfs;
		udf_vfsp = ip->i_udf;
		if (parent_icb_loc == udf_vfsp->udf_root_blkno) {
			break;
		}
		if (fbp != NULL) {
			fbrelse(fbp, S_OTHER);
			fbp = NULL;
		}
		if (ip != target) {
			rw_exit(&ip->i_rwlock);
			VN_RELE(ITOV(ip));
		}

		/*
		 * Race to get the inode.
		 */
		if (err = ud_iget(vfsp, prn, lbno, &tip, NULL, cr)) {
			ip = NULL;
			break;
		}
		ip = tip;
		rw_enter(&ip->i_rwlock, RW_READER);
	}
	if (fbp) {
		fbrelse(fbp, S_OTHER);
	}
out:
	if (ip) {
		if (ip != target) {
			rw_exit(&ip->i_rwlock);
			VN_RELE(ITOV(ip));
		}
	}
	return (err);
}
示例#13
0
/*
 * do a simple estimate of the space needed to hold the statefile
 * taking compression into account, but be fairly conservative
 * so we have a better chance of completing; when dump fails,
 * the retry cost is fairly high.
 *
 * Do disk blocks allocation for the state file if no space has
 * been allocated yet. Since the state file will not be removed,
 * allocation should only be done once.
 */
static int
cpr_statefile_ok(vnode_t *vp, int alloc_retry)
{
	extern size_t cpr_bitmap_size;
	struct inode *ip = VTOI(vp);
	const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */
	u_longlong_t size, isize, ksize, raw_data;
	char *str, *est_fmt;
	size_t space;
	int error;

	/*
	 * number of pages short for swapping.
	 */
	STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv;
	if (STAT->cs_nosw_pages < 0)
		STAT->cs_nosw_pages = 0;

	str = "cpr_statefile_ok:";

	CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n",
	    k_anoninfo.ani_max, k_anoninfo.ani_phys_resv);
	CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n",
	    MAX(availrmem - swapfs_minfree, 0),
	    k_anoninfo.ani_mem_resv);
	CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n",
	    CURRENT_TOTAL_AVAILABLE_SWAP);

	/*
	 * try increasing filesize by 15%
	 */
	if (alloc_retry) {
		/*
		 * block device doesn't get any bigger
		 */
		if (vp->v_type == VBLK) {
			if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
				prom_printf(
				    "Retry statefile on special file\n");
			return (ENOMEM);
		} else {
			rw_enter(&ip->i_contents, RW_READER);
			size = (ip->i_size * SIZE_RATE) / INTEGRAL;
			rw_exit(&ip->i_contents);
		}
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("Retry statefile size = %lld\n", size);
	} else {
		u_longlong_t cpd_size;
		pgcnt_t npages, nback;
		int ndvram;

		ndvram = 0;
		(void) callb_execute_class(CB_CL_CPR_FB,
		    (int)(uintptr_t)&ndvram);
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("ndvram size = %d\n", ndvram);

		/*
		 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages
		 */
		npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit);
		cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2));
		raw_data = cpd_size + cpr_bitmap_size;
		ksize = ndvram + mmu_ptob(npages);

		est_fmt = "%s estimated size with "
		    "%scompression %lld, ksize %lld\n";
		nback = mmu_ptob(STAT->cs_nosw_pages);
		if (CPR->c_flags & C_COMPRESSING) {
			size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) +
			    raw_data + ((nback * 10) / UCOMP_RATE);
			CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize);
		} else {
			size = ksize + raw_data + nback;
			CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ",
			    size, ksize);
		}
	}

	/*
	 * All this is much simpler for a block device
	 */
	if (vp->v_type == VBLK) {
		space = cpr_get_devsize(vp->v_rdev);
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("statefile dev size %lu\n", space);

		/*
		 * Export the estimated filesize info, this value will be
		 * compared before dumping out the statefile in the case of
		 * no compression.
		 */
		STAT->cs_est_statefsz = size;
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
			prom_printf("%s Estimated statefile size %llu, "
			    "space %lu\n", str, size, space);
		if (size > space) {
			cpr_err(CE_CONT, "Statefile partition too small.");
			return (ENOMEM);
		}
		return (0);
	} else {
		if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) {
			cpr_err(CE_CONT, "Statefile allocation retry failed\n");
			return (ENOMEM);
		}

		/*
		 * Estimate space needed for the state file.
		 *
		 * State file size in bytes:
		 * 	kernel size + non-cache pte seg +
		 *	bitmap size + cpr state file headers size
		 * (round up to fs->fs_bsize)
		 */
		size = blkroundup(ip->i_fs, size);

		/*
		 * Export the estimated filesize info, this value will be
		 * compared before dumping out the statefile in the case of
		 * no compression.
		 */
		STAT->cs_est_statefsz = size;
		error = cpr_grow_statefile(vp, size);
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) {
			rw_enter(&ip->i_contents, RW_READER);
			isize = ip->i_size;
			rw_exit(&ip->i_contents);
			prom_printf("%s Estimated statefile size %lld, "
			    "i_size %lld\n", str, size, isize);
		}

		return (error);
	}
}
/*
 * Restart queuing for high priority message of read stream
 * when flow control failed
 *
 * Requires Lock (( M: Mandatory, P: Prohibited, A: Allowed ))
 *  -. uinst_t->lock   : P
 *  -. uinst_t->u_lock : P
 *  -. uinst_t->l_lock : P
 *  -. uinst_t->c_lock : P
 */
void
oplmsu_rcmn_high_qenable(queue_t *q)
{
	mblk_t		*mp;
	struct iocblk	*iocp = NULL;
	lpath_t		*lpath;
	int		rval;

	rw_enter(&oplmsu_uinst->lock, RW_READER);

	for (;;) {	/* Handle high priority message */
		mutex_enter(&oplmsu_uinst->l_lock);
		lpath = (lpath_t *)q->q_ptr;
		if ((mp = lpath->first_lpri_hi) == NULL) {
			mutex_exit(&oplmsu_uinst->l_lock);
			break;
		}

		if (mp->b_next == NULL) {
			lpath->first_lpri_hi = NULL;
			lpath->last_lpri_hi = NULL;
		} else {
			lpath->first_lpri_hi = mp->b_next;
			mp->b_next->b_prev = NULL;
			mp->b_next = NULL;
		}
		mp->b_prev = NULL;
		mutex_exit(&oplmsu_uinst->l_lock);

		rval = SUCCESS;
		switch (mp->b_datap->db_type) {
		case M_IOCACK :		/* FALLTHRU */
		case M_IOCNAK :
			iocp = (struct iocblk *)mp->b_rptr;
			switch (iocp->ioc_cmd) {
			case TCSETS :		/* FALLTHRU */
			case TCSETSW :		/* FALLTHRU */
			case TCSETSF :		/* FALLTHRU */
			case TIOCMSET :		/* FALLTHRU */
			case TIOCSPPS :		/* FALLTHRU */
			case TIOCSWINSZ :	/* FALLTHRU */
			case TIOCSSOFTCAR :
				rw_exit(&oplmsu_uinst->lock);
				rval = oplmsu_lrioctl_termios(q, mp);
				rw_enter(&oplmsu_uinst->lock, RW_WRITER);
				break;

			default :
				rval = oplmsu_rcmn_through_hndl(
				    q, mp, MSU_HIGH);
				if (rval == FAILURE) {
					rw_exit(&oplmsu_uinst->lock);
					return;
				}
			}
			break;

		case M_ERROR :
			rw_exit(&oplmsu_uinst->lock);
			rval = oplmsu_lrmsg_error(q, mp);
			rw_enter(&oplmsu_uinst->lock, RW_WRITER);
			break;

		case M_FLUSH :
			oplmsu_rcmn_flush_hndl(q, mp);
			break;

		default :
			rval = oplmsu_rcmn_through_hndl(q, mp, MSU_HIGH);
			if (rval == FAILURE) {
				rw_exit(&oplmsu_uinst->lock);
				return;
			}
		}

		if (rval == FAILURE) {
			break;
		}
	}

	rw_exit(&oplmsu_uinst->lock);
	qenable(q);	/* Enable lower read queue */
}
示例#15
0
/*ARGSUSED*/
static int
rds_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
	rds_t	*rds;
	int	ret;

	if (is_system_labeled()) {
		/*
		 * RDS socket is not supported on labeled systems
		 */
		return (ESOCKTNOSUPPORT);
	}

	/* Open the transport driver if IB HW is present */
	rw_enter(&rds_transport_lock, RW_READER);
	if (rds_transport_handle == NULL) {
		rw_exit(&rds_transport_lock);
		ret = rds_open_transport_driver();
		rw_enter(&rds_transport_lock, RW_READER);

		if (ret != 0) {
			/* Transport driver failed to load */
			rw_exit(&rds_transport_lock);
			return (ret);
		}
	}
	rw_exit(&rds_transport_lock);

	if (sflag == MODOPEN) {
		return (EINVAL);
	}

	/* Reopen not supported */
	if (q->q_ptr != NULL) {
		dprint(2, ("%s: Reopen is not supported: %p", LABEL, q->q_ptr));
		return (0);
	}

	rds = rds_create(q, credp);
	if (rds == NULL) {
		dprint(2, ("%s: rds_create failed", LABEL));
		return (0);
	}

	q->q_ptr = WR(q)->q_ptr = rds;
	rds->rds_state = TS_UNBND;
	rds->rds_family = AF_INET_OFFLOAD;

	q->q_hiwat = rds_recv_hiwat;
	q->q_lowat = rds_recv_lowat;

	qprocson(q);

	WR(q)->q_hiwat = rds_xmit_hiwat;
	WR(q)->q_lowat = rds_xmit_lowat;

	/* Set the Stream head watermarks */
	(void) proto_set_rx_hiwat(q, NULL, rds_recv_hiwat);
	(void) proto_set_rx_lowat(q, NULL, rds_recv_lowat);

	return (0);
}
示例#16
0
int
ud_direnter(
	struct ud_inode *tdp,
	char *namep,
	enum de_op op,
	struct ud_inode *sdp,
	struct ud_inode *sip,
	struct vattr *vap,
	struct ud_inode **ipp,
	struct cred *cr,
	caller_context_t *ctp)
{
	struct udf_vfs *udf_vfsp;
	struct ud_inode *tip;
	struct slot slot;
	int32_t namlen, err;
	char *s;

	uint8_t *buf = NULL;

	ud_printf("ud_direnter\n");

	udf_vfsp = tdp->i_udf;
	/* don't allow '/' characters in pathname component */
	for (s = namep, namlen = 0; *s; s++, namlen++) {
		if (*s == '/') {
			return (EACCES);
		}
	}

	if (namlen == 0) {
		cmn_err(CE_WARN, "name length == 0 in ud_direnter");
		return (EINVAL);
	}

	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
	/*
	 * If name is "." or ".." then if this is a create look it up
	 * and return EEXIST.  Rename or link TO "." or ".." is forbidden.
	 */
	if (namep[0] == '.' &&
	    (namlen == 1 || (namlen == 2 && namep[1] == '.'))) {
		if (op == DE_RENAME) {
			return (EINVAL);	/* *SIGH* should be ENOTEMPTY */
		}
		if (ipp) {
			/*
			 * ud_dirlook will acquire the i_rwlock
			 */
			rw_exit(&tdp->i_rwlock);
			if (err = ud_dirlook(tdp, namep, ipp, cr, 0)) {
				rw_enter(&tdp->i_rwlock, RW_WRITER);
				return (err);
			}
			rw_enter(&tdp->i_rwlock, RW_WRITER);
		}
		return (EEXIST);
	}

	tip = NULL;
	slot.status = NONE;
	slot.offset = 0;
	slot.size = 0;
	slot.fbp = NULL;
	slot.ep = NULL;
	slot.endoff = 0;

	/*
	 * For link and rename lock the source entry and check the link count
	 * to see if it has been removed while it was unlocked.  If not, we
	 * increment the link count and force the inode to disk to make sure
	 * that it is there before any directory entry that points to it.
	 */
	if (op == DE_LINK || op == DE_RENAME) {
		rw_enter(&sip->i_contents, RW_WRITER);
		if (sip->i_nlink == 0) {
			rw_exit(&sip->i_contents);
			return (ENOENT);
		}
		if (sip->i_nlink == MAXLINK) {
			rw_exit(&sip->i_contents);
			return (EMLINK);
		}

		sip->i_nlink++;
		mutex_enter(&sip->i_tlock);
		sip->i_flag |= ICHG;
		mutex_exit(&sip->i_tlock);
		ud_iupdat(sip, 1);
		rw_exit(&sip->i_contents);
	}
	/*
	 * If target directory has not been removed, then we can consider
	 * allowing file to be created.
	 */
	if (tdp->i_nlink == 0) {
		err = ENOENT;
		goto out2;
	}
	/*
	 * Check accessibility of directory.
	 */
	if (tdp->i_type != VDIR) {
		err = ENOTDIR;
		goto out2;
	}
	/*
	 * Execute access is required to search the directory.
	 */
	if (err = ud_iaccess(tdp, IEXEC, cr)) {
		goto out2;
	}
	/*
	 * If this is a rename of a directory and the parent is
	 * different (".." must be changed), then the source
	 * directory must not be in the directory hierarchy
	 * above the target, as this would orphan everything
	 * below the source directory.  Also the user must have
	 * write permission in the source so as to be able to
	 * change "..".
	 */
	if (op == DE_RENAME) {
		if (sip == tdp) {
			err = EINVAL;
			goto out2;
		}
		rw_enter(&sip->i_contents, RW_READER);
		if ((sip->i_type == VDIR) && (sdp != tdp)) {
			uint32_t blkno;

			if ((err = ud_iaccess(sip, IWRITE, cr))) {
				rw_exit(&sip->i_contents);
				goto out2;
			}
			blkno = sip->i_icb_lbano;
			rw_exit(&sip->i_contents);
			if ((err = ud_dircheckpath(blkno, tdp, cr))) {
				goto out2;
			}
		} else {
			rw_exit(&sip->i_contents);
		}
	}

	/*
	 * Search for the entry. Return VN_HELD tip if found.
	 */
	buf = kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
	rw_enter(&tdp->i_contents, RW_WRITER);
	if (err = ud_dircheckforname(tdp,
			namep, namlen, &slot, &tip, buf, cr)) {
		goto out;
	}
	if (tip) {
		switch (op) {
			case DE_CREATE :
			case DE_MKDIR :
				if (ipp) {
					*ipp = tip;
					err = EEXIST;
				} else {
					VN_RELE(ITOV(tip));
				}
				break;
			case DE_RENAME :
				err = ud_dirrename(sdp, sip, tdp, tip,
						namep, buf, &slot, cr);
				/*
				 * We used to VN_RELE() here, but this
				 * was moved down so that we could send
				 * a vnevent after the locks were dropped.
				 */
				break;
			case DE_LINK :
				/*
				 * Can't link to an existing file.
				 */
				VN_RELE(ITOV(tip));
				err = EEXIST;
				break;
		}
	} else {
		/*
		 * The entry does not exist. Check write permission in
		 * directory to see if entry can be created.
		 */
		if (err = ud_iaccess(tdp, IWRITE, cr)) {
			goto out;
		}
		if ((op == DE_CREATE) || (op == DE_MKDIR)) {
			/*
			 * Make new inode and directory entry as required.
			 */
			if (err = ud_dirmakeinode(tdp, &sip, vap, op, cr))
				goto out;
		}
		if (err = ud_diraddentry(tdp, namep, op,
		    namlen, &slot, sip, sdp, cr)) {
			if ((op == DE_CREATE) || (op == DE_MKDIR)) {
				/*
				 * Unmake the inode we just made.
				 */
				rw_enter(&sip->i_contents, RW_WRITER);
				if (sip->i_type == VDIR) {
					tdp->i_nlink--;
				}
				sip->i_nlink = 0;
				mutex_enter(&sip->i_tlock);
				sip->i_flag |= ICHG;
				mutex_exit(&sip->i_tlock);
				rw_exit(&sip->i_contents);
				VN_RELE(ITOV(sip));
				sip = NULL;
			}
		} else if (ipp) {
			*ipp = sip;
		} else if ((op == DE_CREATE) || (op == DE_MKDIR)) {
			VN_RELE(ITOV(sip));
		}
	}
out:
	if (buf != NULL) {
		kmem_free(buf, udf_vfsp->udf_lbsize);
	}
	if (slot.fbp) {
		fbrelse(slot.fbp, S_OTHER);
	}
	rw_exit(&tdp->i_contents);

	if (op == DE_RENAME) {
		/*
		 * If it's all good, send events after locks are dropped
		 * but before vnodes are released.
		 */
		if (err == 0) {
			if (tip) {
				vnevent_rename_dest(ITOV(tip), ITOV(tdp),
				    namep, ctp);
			}

			if (sdp != tdp) {
				vnevent_rename_dest_dir(ITOV(tdp), ctp);
			}
		}

		/*
		 * The following VN_RELE() was moved from the
		 * DE_RENAME case above
		 */
		if (tip) {
			VN_RELE(ITOV(tip));
		}
	}

out2:
	if (err && ((op == DE_LINK) || (op == DE_RENAME))) {
		/*
		 * Undo bumped link count.
		 */
		rw_enter(&sip->i_contents, RW_WRITER);
		sip->i_nlink--;
		rw_exit(&sip->i_contents);

		mutex_enter(&sip->i_tlock);
		sip->i_flag |= ICHG;
		mutex_exit(&sip->i_tlock);
	}
	return (err);
}
示例#17
0
void
dmu_objset_evict(objset_t *os)
{
	dsl_dataset_t *ds = os->os_dsl_dataset;
	int t;

	for (t = 0; t < TXG_SIZE; t++)
		ASSERT(!dmu_objset_is_dirty(os, t));

	if (ds) {
		if (!dsl_dataset_is_snapshot(ds)) {
			VERIFY(0 == dsl_prop_unregister(ds, "checksum",
			    checksum_changed_cb, os));
			VERIFY(0 == dsl_prop_unregister(ds, "compression",
			    compression_changed_cb, os));
			VERIFY(0 == dsl_prop_unregister(ds, "copies",
			    copies_changed_cb, os));
			VERIFY(0 == dsl_prop_unregister(ds, "dedup",
			    dedup_changed_cb, os));
			VERIFY(0 == dsl_prop_unregister(ds, "logbias",
			    logbias_changed_cb, os));
			VERIFY(0 == dsl_prop_unregister(ds, "sync",
			    sync_changed_cb, os));
		}
		VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
		    primary_cache_changed_cb, os));
		VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
		    secondary_cache_changed_cb, os));
	}

	if (os->os_sa)
		sa_tear_down(os);

	/*
	 * We should need only a single pass over the dnode list, since
	 * nothing can be added to the list at this point.
	 */
	(void) dmu_objset_evict_dbufs(os);

	dnode_special_close(&os->os_meta_dnode);
	if (DMU_USERUSED_DNODE(os)) {
		dnode_special_close(&os->os_userused_dnode);
		dnode_special_close(&os->os_groupused_dnode);
	}
	zil_free(os->os_zil);

	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);

	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);

	/*
	 * This is a barrier to prevent the objset from going away in
	 * dnode_move() until we can safely ensure that the objset is still in
	 * use. We consider the objset valid before the barrier and invalid
	 * after the barrier.
	 */
	rw_enter(&os_lock, RW_READER);
	rw_exit(&os_lock);

	mutex_destroy(&os->os_lock);
	mutex_destroy(&os->os_obj_lock);
	mutex_destroy(&os->os_user_ptr_lock);
	kmem_free(os, sizeof (objset_t));
}
示例#18
0
/*
 * Locking i_contents in this
 * function seems to be really weird
 */
int
ud_dirremove(
	struct ud_inode *dp,
	char *namep,
	struct ud_inode *oip,
	struct vnode *cdir,
	enum dr_op op,
	struct cred *cr,
	caller_context_t *ctp)
{
	struct udf_vfs *udf_vfsp;
	int32_t namelen, err = 0;
	struct slot slot;
	struct ud_inode *ip;
	mode_t mode;
	struct file_id *fid;
	uint8_t *buf = NULL;
	uint32_t tbno;

	ud_printf("ud_dirremove\n");

	ASSERT(RW_WRITE_HELD(&dp->i_rwlock));

	udf_vfsp = dp->i_udf;
	namelen = (int)strlen(namep);
	if (namelen == 0) {
		cmn_err(CE_WARN, "name length == 0 in ud_dirremove");
		return (EINVAL);
	}

	/*
	 * return err when removing . and ..
	 */
	if (namep[0] == '.') {
		if (namelen == 1) {
			return (EINVAL);
		} else if (namelen == 2 && namep[1] == '.') {
			return (EEXIST);	/* SIGH should be ENOTEMPTY */
		}
	}

	ASSERT(RW_WRITE_HELD(&dp->i_rwlock));

	/*
	 * Check accessibility of directory.
	 */
	if (dp->i_type != VDIR) {
		return (ENOTDIR);
	}

	ip = NULL;
	slot.status = FOUND;	/* don't need to look for empty slot */
	slot.offset = 0;
	slot.size = 0;
	slot.fbp = NULL;
	slot.ep = NULL;
	slot.endoff = 0;
	/*
	 * Execute access is required to search the directory.
	 * Access for write is interpreted as allowing
	 * deletion of files in the directory.
	 */
	if (err = ud_iaccess(dp, IEXEC|IWRITE, cr)) {
		return (err);
	}

	buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);

	rw_enter(&dp->i_contents, RW_WRITER);

	if (err = ud_dircheckforname(dp,
			namep, namelen, &slot, &ip, buf, cr)) {
		goto out_novfs;
	}
	if (ip == NULL) {
		err = ENOENT;
		goto out_novfs;
	}
	if (oip && oip != ip) {
		err = ENOENT;
		goto out_novfs;
	}

	if ((mode = ip->i_type) == VDIR) {
		/*
		 * vn_vfswlock() prevents races between mount and rmdir.
		 */
		if (vn_vfswlock(ITOV(ip))) {
			err = EBUSY;
			goto out_novfs;
		}
		if (vn_mountedvfs(ITOV(ip)) != NULL && op != DR_RENAME) {
			err = EBUSY;
			goto out;
		}
		/*
		 * If we are removing a directory, get a lock on it.
		 * If the directory is empty, it will stay empty until
		 * we can remove it.
		 */
		rw_enter(&ip->i_rwlock, RW_READER);
	}
	/* We must be holding i_contents */
	rw_enter(&ip->i_contents, RW_READER);

	if (err = ud_sticky_remove_access(dp, ip, cr)) {
		rw_exit(&ip->i_contents);
		if (mode == VDIR) {
			rw_exit(&ip->i_rwlock);
		}
		goto out;
	}
	if (op == DR_RMDIR) {
		/*
		 * For rmdir(2), some special checks are required.
		 * (a) Don't remove any alias of the parent (e.g. ".").
		 * (b) Don't remove the current directory.
		 * (c) Make sure the entry is (still) a directory.
		 * (d) Make sure the directory is empty.
		 */

		if (dp == ip || ITOV(ip) == cdir) {
			err = EINVAL;
		} else if (ip->i_type != VDIR) {
			err = ENOTDIR;
		} else if ((ip->i_nlink != 1) ||
			(!ud_dirempty(ip, dp->i_uniqid, cr))) {
			/*
			 * Directories do not have an
			 * entry for "." so only one link
			 * will be there
			 */
			err = EEXIST;	/* SIGH should be ENOTEMPTY */
		}
		if (err) {
			rw_exit(&ip->i_contents);
			if (mode == VDIR) {
				rw_exit(&ip->i_rwlock);
			}
			goto out;
		}
	} else if (op == DR_REMOVE)  {
		/*
		 * unlink(2) requires a different check: allow only
		 * privileged processes to unlink a directory.
		 */
		struct vnode *vp = ITOV(ip);

		if (vp->v_type == VDIR &&
		    secpolicy_fs_linkdir(cr, vp->v_vfsp)) {
			err = EPERM;
			rw_exit(&ip->i_contents);
			rw_exit(&ip->i_rwlock);
			goto out;
		}
	}
	rw_exit(&ip->i_contents);

	/*
	 * Remove the cache'd entry, if any.
	 */
	dnlc_remove(ITOV(dp), namep);

	/*
	 * We can collapse all the directory
	 * entries that are deleted into one big entry
	 * but the better way is to
	 * defer it till next directory entry
	 * creation. where we can do this
	 * in a more efficient way
	 */
	fid = slot.ep;

	/*
	 * If this is the last entry
	 * just truncate the file instead
	 * of marking it deleted
	 */
	if ((slot.offset + FID_LEN(fid)) == dp->i_size) {
		fbrelse(slot.fbp, S_OTHER);
		if ((err = ud_itrunc(dp, slot.offset, 0, cr)) != 0) {
			goto out;
		}
	} else {
		fid->fid_flags |= FID_DELETED;

		if ((err = ud_ip_off2bno(dp, slot.offset, &tbno)) != 0) {
			goto out;
		}

		ud_make_tag(dp->i_udf, &fid->fid_tag,
			UD_FILE_ID_DESC, tbno, FID_LEN(fid));

		err = ud_write_fid(dp, &slot, buf);
	}

	slot.fbp = NULL;

	/*
	 * If we were removing a directory, it is 'gone' now so we can
	 * unlock it.
	 */
	if (mode == VDIR) {
		rw_exit(&ip->i_rwlock);
	}

	mutex_enter(&dp->i_tlock);
	dp->i_flag |= IUPD|ICHG;
	mutex_exit(&dp->i_tlock);
	mutex_enter(&ip->i_tlock);
	ip->i_flag |= ICHG;
	mutex_exit(&ip->i_tlock);

	if (err != 0) {
		goto out;
	}

	rw_enter(&ip->i_contents, RW_WRITER);

	/*
	 * Now dispose of the inode.
	 */
	if (ip->i_nlink > 0) {
		if ((op == DR_RMDIR) && (ip->i_type == VDIR)) {
			/*
			 * Decrement by 1 because there is no "."
			 * Clear the inode, but there may be other hard
			 * links so don't free the inode.
			 * Decrement the dp linkcount because we're
			 * trashing the ".." entry.
			 */
			ip->i_nlink --;
			dp->i_nlink--;
			dnlc_remove(ITOV(ip), ".");
			dnlc_remove(ITOV(ip), "..");
/*
 *			(void) ud_itrunc(ip, 0, 0, cr);
 */
		} else {
			ip->i_nlink--;
		}
	}
	ITIMES_NOLOCK(dp);
	ITIMES_NOLOCK(ip);
	rw_exit(&ip->i_contents);
out:
	if (mode == VDIR) {
		vn_vfsunlock(ITOV(ip));
	}
out_novfs:
	ASSERT(RW_WRITE_HELD(&dp->i_contents));

	if (slot.fbp != NULL) {
		fbrelse(slot.fbp, S_OTHER);
	}
	rw_exit(&dp->i_contents);

	if (ip) {
		/*
		 * If no errors, send any events after locks are dropped,
		 * but before the VN_RELE().
		 */
		if (err == 0) {
			if (op == DR_REMOVE) {
				vnevent_remove(ITOV(ip), ITOV(dp), namep, ctp);
			} else if (op == DR_RMDIR) {
				vnevent_rmdir(ITOV(ip), ITOV(dp), namep, ctp);
			}
		}
		VN_RELE(ITOV(ip));
	}

	kmem_free(buf, udf_vfsp->udf_lbsize);
	return (err);
}
示例#19
0
/*
 * Notify registered targets except 'self' about register value change
 */
static void
s1394_cmp_notify_reg_change(s1394_hal_t *hal, t1394_cmp_reg_t reg,
    s1394_target_t *self)
{
	s1394_target_t	*target;
	s1394_fa_target_t *fat;
	uint_t		saved_gen;
	int		num_retries = 0;
	void		(*cb)(opaque_t, t1394_cmp_reg_t);
	opaque_t	arg;

	TNF_PROBE_0_DEBUG(s1394_cmp_notify_reg_change_enter,
	    S1394_TNF_SL_CMP_STACK, "");

	rw_enter(&hal->target_list_rwlock, RW_READER);

start:
	target = hal->hal_fa[S1394_FA_TYPE_CMP].fal_head;

	for (; target; target = fat->fat_next) {
		fat = &target->target_fa[S1394_FA_TYPE_CMP];

		/*
		 * even if the target list changes when the lock is dropped,
		 * comparing with self is safe because the target should
		 * not unregister until all CMP operations are completed
		 */
		if (target == self) {
			continue;
		}

		cb = fat->fat_u.cmp.cm_evts.cmp_reg_change;
		if (cb == NULL) {
			continue;
		}
		arg = fat->fat_u.cmp.cm_evts.cmp_arg;

		saved_gen = s1394_fa_list_gen(hal, S1394_FA_TYPE_CMP);

		rw_exit(&hal->target_list_rwlock);
		cb(arg, reg);
		rw_enter(&hal->target_list_rwlock, RW_READER);

		/*
		 * List could change while we dropped the lock. In such
		 * case, start all over again, because missing a register
		 * change can have more serious consequences for a
		 * target than receiving same notification more than once
		 */
		if (saved_gen != s1394_fa_list_gen(hal, S1394_FA_TYPE_CMP)) {
			TNF_PROBE_2(s1394_cmp_notify_reg_change_error,
			    S1394_TNF_SL_CMP_ERROR, "",
			    tnf_string, msg, "list gen changed",
			    tnf_opaque, num_retries, num_retries);
			if (++num_retries <= s1394_cmp_notify_retry_cnt) {
				goto start;
			} else {
				break;
			}
		}
	}

	rw_exit(&hal->target_list_rwlock);

	TNF_PROBE_0_DEBUG(s1394_cmp_notify_reg_change_exit,
	    S1394_TNF_SL_CMP_STACK, "");
}
示例#20
0
/*
 * sync out AVL trees to persistent storage.
 */
void
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
	nvlist_t *nvp;
	nvlist_t **fuids;
	size_t nvsize = 0;
	char *packed;
	dmu_buf_t *db;
	fuid_domain_t *domnode;
	int numnodes;
	int i;

	if (!zfsvfs->z_fuid_dirty) {
		return;
	}

	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);

	/*
	 * First see if table needs to be created?
	 */
	if (zfsvfs->z_fuid_obj == 0) {
		zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
		    DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
		    sizeof (uint64_t), tx);
		VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
		    ZFS_FUID_TABLES, sizeof (uint64_t), 1,
		    &zfsvfs->z_fuid_obj, tx) == 0);
	}

	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
	for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
	    domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
		    domnode->f_idx) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
		    domnode->f_ksid->kd_name) == 0);
	}
	VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
	    fuids, numnodes) == 0);
	for (i = 0; i != numnodes; i++)
		nvlist_free(fuids[i]);
	kmem_free(fuids, numnodes * sizeof (void *));
	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
	packed = kmem_alloc(nvsize, KM_SLEEP);
	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);
	nvlist_free(nvp);
	zfsvfs->z_fuid_size = nvsize;
	dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
	    zfsvfs->z_fuid_size, packed, tx);
	kmem_free(packed, zfsvfs->z_fuid_size);
	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
	    FTAG, &db));
	dmu_buf_will_dirty(db, tx);
	*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
	dmu_buf_rele(db, FTAG);

	zfsvfs->z_fuid_dirty = B_FALSE;
	rw_exit(&zfsvfs->z_fuid_lock);
}
示例#21
0
void
memsegs_lock(int writer)
{
	rw_enter(&memsegslock, writer ? RW_WRITER : RW_READER);
}
示例#22
0
/*
 * Find all 'allow' permissions from a given point and then continue
 * traversing up to the root.
 *
 * This function constructs an nvlist of nvlists.
 * each setpoint is an nvlist composed of an nvlist of an nvlist
 * of the individual * users/groups/everyone/create
 * permissions.
 *
 * The nvlist will look like this.
 *
 * { source fsname -> { whokeys { permissions,...}, ...}}
 *
 * The fsname nvpairs will be arranged in a bottom up order.  For example,
 * if we have the following structure a/b/c then the nvpairs for the fsnames
 * will be ordered a/b/c, a/b, a.
 */
int
dsl_deleg_get(const char *ddname, nvlist_t **nvp)
{
	dsl_dir_t *dd, *startdd;
	dsl_pool_t *dp;
	int error;
	objset_t *mos;

	error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
	if (error)
		return (error);

	dp = startdd->dd_pool;
	mos = dp->dp_meta_objset;

	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	rw_enter(&dp->dp_config_rwlock, RW_READER);
	for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
		zap_cursor_t basezc;
		zap_attribute_t baseza;
		nvlist_t *sp_nvp;
		uint64_t n;
		char source[MAXNAMELEN];

		if (dd->dd_phys->dd_deleg_zapobj &&
		    (zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
		    &n) == 0) && n) {
			VERIFY(nvlist_alloc(&sp_nvp,
			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
		} else {
			continue;
		}

		for (zap_cursor_init(&basezc, mos,
		    dd->dd_phys->dd_deleg_zapobj);
		    zap_cursor_retrieve(&basezc, &baseza) == 0;
		    zap_cursor_advance(&basezc)) {
			zap_cursor_t zc;
			zap_attribute_t za;
			nvlist_t *perms_nvp;

			ASSERT(baseza.za_integer_length == 8);
			ASSERT(baseza.za_num_integers == 1);

			VERIFY(nvlist_alloc(&perms_nvp,
			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
			for (zap_cursor_init(&zc, mos, baseza.za_first_integer);
			    zap_cursor_retrieve(&zc, &za) == 0;
			    zap_cursor_advance(&zc)) {
				VERIFY(nvlist_add_boolean(perms_nvp,
				    za.za_name) == 0);
			}
			zap_cursor_fini(&zc);
			VERIFY(nvlist_add_nvlist(sp_nvp, baseza.za_name,
			    perms_nvp) == 0);
			nvlist_free(perms_nvp);
		}

		zap_cursor_fini(&basezc);

		dsl_dir_name(dd, source);
		VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
		nvlist_free(sp_nvp);
	}
	rw_exit(&dp->dp_config_rwlock);

	dsl_dir_close(startdd, FTAG);
	return (0);
}
示例#23
0
void
memlist_write_lock(void)
{
	rw_enter(&memlists_lock, RW_WRITER);
}
示例#24
0
/*
 * Check if user has requested permission.
 */
int
dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
{
	dsl_dataset_t *ds;
	dsl_dir_t *dd;
	dsl_pool_t *dp;
	void *cookie;
	int	error;
	char	checkflag;
	objset_t *mos;
	avl_tree_t permsets;
	perm_set_t *setnode;

	error = dsl_dataset_hold(dsname, FTAG, &ds);
	if (error)
		return (error);

	dp = ds->ds_dir->dd_pool;
	mos = dp->dp_meta_objset;

	if (dsl_delegation_on(mos) == B_FALSE) {
		dsl_dataset_rele(ds, FTAG);
		return (ECANCELED);
	}

	if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) <
	    SPA_VERSION_DELEGATED_PERMS) {
		dsl_dataset_rele(ds, FTAG);
		return (EPERM);
	}

	if (dsl_dataset_is_snapshot(ds)) {
		/*
		 * Snapshots are treated as descendents only,
		 * local permissions do not apply.
		 */
		checkflag = ZFS_DELEG_DESCENDENT;
	} else {
		checkflag = ZFS_DELEG_LOCAL;
	}

	avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
	    offsetof(perm_set_t, p_node));

	rw_enter(&dp->dp_config_rwlock, RW_READER);
	for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
	    checkflag = ZFS_DELEG_DESCENDENT) {
		uint64_t zapobj;
		boolean_t expanded;

		/*
		 * If not in global zone then make sure
		 * the zoned property is set
		 */
		if (!INGLOBALZONE(curproc)) {
			uint64_t zoned;

			if (dsl_prop_get_dd(dd,
			    zfs_prop_to_name(ZFS_PROP_ZONED),
			    8, 1, &zoned, NULL, B_FALSE) != 0)
				break;
			if (!zoned)
				break;
		}
		zapobj = dd->dd_phys->dd_deleg_zapobj;

		if (zapobj == 0)
			continue;

		dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr);
again:
		expanded = B_FALSE;
		for (setnode = avl_first(&permsets); setnode;
		    setnode = AVL_NEXT(&permsets, setnode)) {
			if (setnode->p_matched == B_TRUE)
				continue;

			/* See if this set directly grants this permission */
			error = dsl_check_access(mos, zapobj,
			    ZFS_DELEG_NAMED_SET, 0, setnode->p_setname, perm);
			if (error == 0)
				goto success;
			if (error == EPERM)
				setnode->p_matched = B_TRUE;

			/* See if this set includes other sets */
			error = dsl_load_sets(mos, zapobj,
			    ZFS_DELEG_NAMED_SET_SETS, 0,
			    setnode->p_setname, &permsets);
			if (error == 0)
				setnode->p_matched = expanded = B_TRUE;
		}
		/*
		 * If we expanded any sets, that will define more sets,
		 * which we need to check.
		 */
		if (expanded)
			goto again;

		error = dsl_check_user_access(mos, zapobj, perm, checkflag, cr);
		if (error == 0)
			goto success;
	}
	error = EPERM;
success:
	rw_exit(&dp->dp_config_rwlock);
	dsl_dataset_rele(ds, FTAG);

	cookie = NULL;
	while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
		kmem_free(setnode, sizeof (perm_set_t));

	return (error);
}
示例#25
0
/*
 * This is the upward reentry point for packets arriving from the bridging
 * module and from mac_rx for links not part of a bridge.
 */
void
mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
{
	mac_impl_t		*mip = (mac_impl_t *)mh;
	mac_ring_t		*mr = (mac_ring_t *)mrh;
	mac_soft_ring_set_t 	*mac_srs;
	mblk_t			*bp = mp_chain;
	boolean_t		hw_classified = B_FALSE;

	/*
	 * If there are any promiscuous mode callbacks defined for
	 * this MAC, pass them a copy if appropriate.
	 */
	if (mip->mi_promisc_list != NULL)
		mac_promisc_dispatch(mip, mp_chain, NULL);

	if (mr != NULL) {
		/*
		 * If the SRS teardown has started, just return. The 'mr'
		 * continues to be valid until the driver unregisters the mac.
		 * Hardware classified packets will not make their way up
		 * beyond this point once the teardown has started. The driver
		 * is never passed a pointer to a flow entry or SRS or any
		 * structure that can be freed much before mac_unregister.
		 */
		mutex_enter(&mr->mr_lock);
		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
			mutex_exit(&mr->mr_lock);
			freemsgchain(mp_chain);
			return;
		}
		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
			hw_classified = B_TRUE;
			MR_REFHOLD_LOCKED(mr);
		}
		mutex_exit(&mr->mr_lock);

		/*
		 * We check if an SRS is controlling this ring.
		 * If so, we can directly call the srs_lower_proc
		 * routine otherwise we need to go through mac_rx_classify
		 * to reach the right place.
		 */
		if (hw_classified) {
			mac_srs = mr->mr_srs;
			/*
			 * This is supposed to be the fast path.
			 * All packets received though here were steered by
			 * the hardware classifier, and share the same
			 * MAC header info.
			 */
			mac_srs->srs_rx.sr_lower_proc(mh,
			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
			MR_REFRELE(mr);
			return;
		}
		/* We'll fall through to software classification */
	} else {
		flow_entry_t *flent;
		int err;

		rw_enter(&mip->mi_rw_lock, RW_READER);
		if (mip->mi_single_active_client != NULL) {
			flent = mip->mi_single_active_client->mci_flent_list;
			FLOW_TRY_REFHOLD(flent, err);
			rw_exit(&mip->mi_rw_lock);
			if (err == 0) {
				(flent->fe_cb_fn)(flent->fe_cb_arg1,
				    flent->fe_cb_arg2, mp_chain, B_FALSE);
				FLOW_REFRELE(flent);
				return;
			}
		} else {
			rw_exit(&mip->mi_rw_lock);
		}
	}

	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
			return;
	}

	freemsgchain(bp);
}
示例#26
0
文件: zpl_xattr.c 项目: vpsfreecz/zfs
static int
zpl_xattr_set(struct inode *ip, const char *name, const void *value,
    size_t size, int flags)
{
	znode_t *zp = ITOZ(ip);
	zfs_sb_t *zsb = ZTOZSB(zp);
	cred_t *cr = CRED();
	fstrans_cookie_t cookie;
	int where;
	int error;

	crhold(cr);
	cookie = spl_fstrans_mark();
	rrm_enter_read(&(zsb)->z_teardown_lock, FTAG);
	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);

	/*
	 * Before setting the xattr check to see if it already exists.
	 * This is done to ensure the following optional flags are honored.
	 *
	 *   XATTR_CREATE: fail if xattr already exists
	 *   XATTR_REPLACE: fail if xattr does not exist
	 *
	 * We also want to know if it resides in sa or dir, so we can make
	 * sure we don't end up with duplicate in both places.
	 */
	error = __zpl_xattr_where(ip, name, &where, cr);
	if (error < 0) {
		if (error != -ENODATA)
			goto out;
		if (flags & XATTR_REPLACE)
			goto out;

		/* The xattr to be removed already doesn't exist */
		error = 0;
		if (value == NULL)
			goto out;
	} else {
		error = -EEXIST;
		if (flags & XATTR_CREATE)
			goto out;
	}

	/* Preferentially store the xattr as a SA for better performance */
	if (zsb->z_use_sa && zp->z_is_sa &&
	    (zsb->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
		if (error == 0) {
			/*
			 * Successfully put into SA, we need to clear the one
			 * in dir.
			 */
			if (where & XATTR_IN_DIR)
				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
			goto out;
		}
	}

	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
	/*
	 * Successfully put into dir, we need to clear the one in SA.
	 */
	if (error == 0 && (where & XATTR_IN_SA))
		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
out:
	rw_exit(&ITOZ(ip)->z_xattr_lock);
	rrm_exit(&(zsb)->z_teardown_lock, FTAG);
	spl_fstrans_unmark(cookie);
	crfree(cr);
	ASSERT3S(error, <=, 0);

	return (error);
}
/*
 * Teardown the zfsvfs::z_os.
 *
 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
 * and 'z_teardown_inactive_lock' held.
 */
static int
zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
{
	znode_t	*zp;

	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);

	if (!unmounting) {
		/*
		 * We purge the parent filesystem's vfsp as the parent
		 * filesystem and all of its snapshots have their vnode's
		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
		 * 'z_parent' is self referential for non-snapshots.
		 */
		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
#ifdef FREEBSD_NAMECACHE
		cache_purgevfs(zfsvfs->z_parent->z_vfs);
#endif
	}

	/*
	 * Close the zil. NB: Can't close the zil while zfs_inactive
	 * threads are blocked as zil_close can call zfs_inactive.
	 */
	if (zfsvfs->z_log) {
		zil_close(zfsvfs->z_log);
		zfsvfs->z_log = NULL;
	}

	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);

	/*
	 * If we are not unmounting (ie: online recv) and someone already
	 * unmounted this file system while we were doing the switcheroo,
	 * or a reopen of z_os failed then just bail out now.
	 */
	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
		rw_exit(&zfsvfs->z_teardown_inactive_lock);
		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
		return (EIO);
	}

	/*
	 * At this point there are no vops active, and any new vops will
	 * fail with EIO since we have z_teardown_lock for writer (only
	 * relavent for forced unmount).
	 *
	 * Release all holds on dbufs.
	 */
	mutex_enter(&zfsvfs->z_znodes_lock);
	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
	    zp = list_next(&zfsvfs->z_all_znodes, zp))
		if (zp->z_dbuf) {
			ASSERT(ZTOV(zp)->v_count >= 0);
			zfs_znode_dmu_fini(zp);
		}
	mutex_exit(&zfsvfs->z_znodes_lock);

	/*
	 * If we are unmounting, set the unmounted flag and let new vops
	 * unblock.  zfs_inactive will have the unmounted behavior, and all
	 * other vops will fail with EIO.
	 */
	if (unmounting) {
		zfsvfs->z_unmounted = B_TRUE;
		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
		rw_exit(&zfsvfs->z_teardown_inactive_lock);

#ifdef __FreeBSD__
		/*
		 * Some znodes might not be fully reclaimed, wait for them.
		 */
		mutex_enter(&zfsvfs->z_znodes_lock);
		while (list_head(&zfsvfs->z_all_znodes) != NULL) {
			msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0,
			    "zteardown", 0);
		}
		mutex_exit(&zfsvfs->z_znodes_lock);
#endif
	}

	/*
	 * z_os will be NULL if there was an error in attempting to reopen
	 * zfsvfs, so just return as the properties had already been
	 * unregistered and cached data had been evicted before.
	 */
	if (zfsvfs->z_os == NULL)
		return (0);

	/*
	 * Unregister properties.
	 */
	zfs_unregister_callbacks(zfsvfs);

	/*
	 * Evict cached data
	 */
	if (dmu_objset_evict_dbufs(zfsvfs->z_os)) {
		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
		(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
	}

	return (0);
}
示例#28
0
int
rxi_GetIFInfo()
{
    int i = 0;
    int different = 0;
#ifndef AFS_SUN510_ENV
    ill_t *ill;
    ipif_t *ipif;
#endif
    int rxmtu, maxmtu;
    int mtus[ADDRSPERSITE];
    afs_uint32 addrs[ADDRSPERSITE];
    afs_uint32 ifinaddr;

    memset(mtus, 0, sizeof(mtus));
    memset(addrs, 0, sizeof(addrs));

#ifdef AFS_SUN510_ENV
    (void) rw_enter(&afsifinfo_lock, RW_READER);

    for (i = 0; (afsifinfo[i].ipaddr != NULL) && (i < ADDRSPERSITE); i++) {

             /* Ignore addresses which are down.. */
            if (!(afsifinfo[i].flags & IFF_UP))
                continue;

            /* Compute the Rx interface MTU */
	    rxmtu = (afsifinfo[i].mtu - RX_IPUDP_SIZE);

	    ifinaddr = afsifinfo[i].ipaddr;
	    if (myNetAddrs[i] != ifinaddr)
		different++;

	    /* Copy interface MTU and address; adjust maxmtu */
	    mtus[i] = rxmtu;
	    rxmtu = rxi_AdjustIfMTU(rxmtu);
	    maxmtu = rxmtu * rxi_nRecvFrags +
	        ((rxi_nRecvFrags - 1) * UDP_HDR_SIZE);
	    maxmtu = rxi_AdjustMaxMTU(rxmtu, maxmtu);
	    addrs[i] = ifinaddr;

	    if (!rx_IsLoopbackAddr(ifinaddr) && maxmtu > rx_maxReceiveSize) {
		rx_maxReceiveSize = MIN(RX_MAX_PACKET_SIZE, maxmtu);
		rx_maxReceiveSize =
		    MIN(rx_maxReceiveSize, rx_maxReceiveSizeUser);
	    }
            
    }
    
    (void) rw_exit(&afsifinfo_lock);

    rx_maxJumboRecvSize =
	RX_HEADER_SIZE + rxi_nDgramPackets * RX_JUMBOBUFFERSIZE +
	(rxi_nDgramPackets - 1) * RX_JUMBOHEADERSIZE;
    rx_maxJumboRecvSize = MAX(rx_maxJumboRecvSize, rx_maxReceiveSize);

    if (different) {
	int j;

	for (j = 0; j < i; j++) {
	    myNetMTUs[j] = mtus[j];
	    myNetAddrs[j] = addrs[j];
	}
    }

    return different;
}
示例#29
0
/*
 * bufcall() and timeout() callback entry for read/write stream
 *
 * Requires Lock (( M: Mandatory, P: Prohibited, A: Allowed ))
 *  -. uinst_t->lock   : P
 *  -. uinst_t->u_lock : P
 *  -. uinst_t->l_lock : P
 *  -. uinst_t->c_lock : P
 */
void
oplmsu_cmn_bufcb(void *arg)
{
	struct buf_tbl	*buftbl = arg;
	lpath_t		*lpath;
	ctrl_t		*ctrl;
	queue_t		*q;
	int		lq_flag = 0;

	rw_enter(&oplmsu_uinst->lock, RW_WRITER);
	mutex_enter(&oplmsu_uinst->l_lock);

	lpath = oplmsu_uinst->first_lpath;
	while (lpath) {
		if ((buftbl == lpath->rbuftbl) &&
		    (buftbl->rw_flag == MSU_READ_SIDE)) {
			if ((lpath->rbuf_id == 0) && (lpath->rtout_id == 0)) {
				mutex_exit(&oplmsu_uinst->l_lock);
				rw_exit(&oplmsu_uinst->lock);
			} else {
				q = lpath->rbuftbl->q;
				lpath->rbuftbl->q = NULL;
				lpath->rbuftbl->rw_flag = UNDEFINED;

				if (lpath->rbuf_id) {
					lpath->rbuf_id = 0;
				} else {
					lpath->rtout_id = 0;
				}
				mutex_exit(&oplmsu_uinst->l_lock);

				if (oplmsu_queue_flag == 1) {
					lq_flag = 1;
					oplmsu_queue_flag = 0;
				}

				rw_exit(&oplmsu_uinst->lock);
				oplmsu_rcmn_high_qenable(q);

				if (lq_flag == 1) {
					rw_enter(&oplmsu_uinst->lock,
					    RW_WRITER);
					oplmsu_queue_flag = 1;
					rw_exit(&oplmsu_uinst->lock);
				}
			}
			return;
		}
		lpath = lpath->l_next;
	}
	mutex_exit(&oplmsu_uinst->l_lock);

	mutex_enter(&oplmsu_uinst->c_lock);
	if ((ctrl = oplmsu_uinst->user_ctrl) != NULL) {
		if ((buftbl == ctrl->wbuftbl) &&
		    (buftbl->rw_flag == MSU_WRITE_SIDE)) {
			oplmsu_wbufcb_posthndl(ctrl);
			mutex_exit(&oplmsu_uinst->c_lock);
			rw_exit(&oplmsu_uinst->lock);
			return;
		}
	}

	if ((ctrl = oplmsu_uinst->meta_ctrl) != NULL) {
		if ((buftbl == ctrl->wbuftbl) &&
		    (buftbl->rw_flag == MSU_WRITE_SIDE)) {
			oplmsu_wbufcb_posthndl(ctrl);
			mutex_exit(&oplmsu_uinst->c_lock);
			rw_exit(&oplmsu_uinst->lock);
			return;
		}
	}
	mutex_exit(&oplmsu_uinst->c_lock);
	rw_exit(&oplmsu_uinst->lock);
}
示例#30
0
/*
 * srpt_ch_srp_cmd()
 */
static int
srpt_ch_srp_cmd(srpt_channel_t *ch, srpt_iu_t *iu)
{
	srp_cmd_req_t		*cmd = (srp_cmd_req_t *)iu->iu_buf;
	srp_indirect_desc_t	*i_desc;
	uint_t			i_di_cnt;
	uint_t			i_do_cnt;
	uint8_t			do_fmt;
	uint8_t			di_fmt;
	uint32_t		*cur_desc_off;
	int			i;
	ibt_status_t		status;
	uint8_t			addlen;


	DTRACE_SRP_2(task__command, srpt_channel_t, ch, srp_cmd_req_t, cmd);
	iu->iu_ch  = ch;
	iu->iu_tag = cmd->cr_tag;

	/*
	 * The SRP specification and SAM require support for bi-directional
	 * data transfer, so we create a single buffer descriptor list that
	 * in the IU buffer that covers the data-in and data-out buffers.
	 * In practice we will just see unidirectional transfers with either
	 * data-in or data out descriptors.  If we were to take that as fact,
	 * we could reduce overhead slightly.
	 */

	/*
	 * additional length is a 6-bit number in 4-byte words, so multiply by 4
	 * to get bytes.
	 */
	addlen = cmd->cr_add_cdb_len & 0x3f;	/* mask off 6 bits */

	cur_desc_off = (uint32_t *)(void *)&cmd->cr_add_data;
	cur_desc_off  += addlen;		/* 32-bit arithmetic */
	iu->iu_num_rdescs = 0;
	iu->iu_rdescs = (srp_direct_desc_t *)(void *)cur_desc_off;

	/*
	 * Examine buffer description for Data In (i.e. data flows
	 * to the initiator).
	 */
	i_do_cnt = i_di_cnt = 0;
	di_fmt = cmd->cr_buf_fmt >> 4;
	if (di_fmt == SRP_DATA_DESC_DIRECT) {
		iu->iu_num_rdescs = 1;
		cur_desc_off = (uint32_t *)(void *)&iu->iu_rdescs[1];
	} else if (di_fmt == SRP_DATA_DESC_INDIRECT) {
		i_desc = (srp_indirect_desc_t *)iu->iu_rdescs;
		i_di_cnt  = b2h32(i_desc->id_table.dd_len) /
		    sizeof (srp_direct_desc_t);

		/*
		 * Some initiators like OFED occasionally use the wrong counts,
		 * so check total to allow for this.  NOTE: we do not support
		 * reading of the descriptor table from the initiator, so if
		 * not all descriptors are in the IU we drop the task.
		 */
		if (i_di_cnt > (cmd->cr_dicnt + cmd->cr_docnt)) {
			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
			    " descriptors not supported");
			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
			    " i_di_cnt(%d), cr_dicnt(%d)",
			    (uint_t)sizeof (srp_direct_desc_t),
			    i_di_cnt, cmd->cr_dicnt);
			iu->iu_rdescs = NULL;
			return (1);
		}
		bcopy(&i_desc->id_desc[0], iu->iu_rdescs,
		    sizeof (srp_direct_desc_t) * i_di_cnt);
		iu->iu_num_rdescs += i_di_cnt;
		cur_desc_off = (uint32_t *)(void *)&i_desc->id_desc[i_di_cnt];
	}

	/*
	 * Examine buffer description for Data Out (i.e. data flows
	 * from the initiator).
	 */
	do_fmt = cmd->cr_buf_fmt & 0x0F;
	if (do_fmt == SRP_DATA_DESC_DIRECT) {
		if (di_fmt == SRP_DATA_DESC_DIRECT) {
			bcopy(cur_desc_off, &iu->iu_rdescs[iu->iu_num_rdescs],
			    sizeof (srp_direct_desc_t));
		}
		iu->iu_num_rdescs++;
	} else if (do_fmt == SRP_DATA_DESC_INDIRECT) {
		i_desc = (srp_indirect_desc_t *)cur_desc_off;
		i_do_cnt  = b2h32(i_desc->id_table.dd_len) /
		    sizeof (srp_direct_desc_t);

		/*
		 * Some initiators like OFED occasionally use the wrong counts,
		 * so check total to allow for this.  NOTE: we do not support
		 * reading of the descriptor table from the initiator, so if
		 * not all descriptors are in the IU we drop the task.
		 */
		if ((i_di_cnt + i_do_cnt) > (cmd->cr_dicnt + cmd->cr_docnt)) {
			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
			    " descriptors not supported");
			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
			    " i_do_cnt(%d), cr_docnt(%d)",
			    (uint_t)sizeof (srp_direct_desc_t),
			    i_do_cnt, cmd->cr_docnt);
			iu->iu_rdescs = 0;
			return (1);
		}
		bcopy(&i_desc->id_desc[0], &iu->iu_rdescs[iu->iu_num_rdescs],
		    sizeof (srp_direct_desc_t) * i_do_cnt);
		iu->iu_num_rdescs += i_do_cnt;
	}

	iu->iu_tot_xfer_len = 0;
	for (i = 0; i < iu->iu_num_rdescs; i++) {
		iu->iu_rdescs[i].dd_vaddr = b2h64(iu->iu_rdescs[i].dd_vaddr);
		iu->iu_rdescs[i].dd_hdl   = b2h32(iu->iu_rdescs[i].dd_hdl);
		iu->iu_rdescs[i].dd_len   = b2h32(iu->iu_rdescs[i].dd_len);
		iu->iu_tot_xfer_len += iu->iu_rdescs[i].dd_len;
	}

#ifdef DEBUG
	if (srpt_errlevel >= SRPT_LOG_L4) {
		SRPT_DPRINTF_L4("ch_srp_cmd, iu->iu_tot_xfer_len (%d)",
		    iu->iu_tot_xfer_len);
		for (i = 0; i < iu->iu_num_rdescs; i++) {
			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_vaddr"
			    " (0x%08llx)",
			    i, (u_longlong_t)iu->iu_rdescs[i].dd_vaddr);
			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_hdl"
			    " (0x%08x)", i, iu->iu_rdescs[i].dd_hdl);
			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_len (%d)",
			    i, iu->iu_rdescs[i].dd_len);
		}
		SRPT_DPRINTF_L4("ch_srp_cmd, LUN (0x%08lx)",
		    (unsigned long int) *((uint64_t *)(void *) cmd->cr_lun));
	}
#endif
	rw_enter(&ch->ch_rwlock, RW_READER);

	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
		/*
		 * The channel has begun disconnecting, so ignore the
		 * the command returning the IU resources.
		 */
		rw_exit(&ch->ch_rwlock);
		return (1);
	}

	/*
	 * Once a SCSI task is allocated and assigned to the IU, it
	 * owns those IU resources, which will be held until STMF
	 * is notified the task is done (from a lport perspective).
	 */
	iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport,
	    ch->ch_session->ss_ss, cmd->cr_lun,
	    SRP_CDB_SIZE + (addlen * 4), 0);
	if (iu->iu_stmf_task == NULL) {
		/*
		 * Could not allocate, return status to the initiator
		 * indicating that we are temporarily unable to process
		 * commands.  If unable to send, immediately return IU
		 * resource.
		 */
		SRPT_DPRINTF_L2("ch_srp_cmd, SCSI task allocation failure");
		rw_exit(&ch->ch_rwlock);
		mutex_enter(&iu->iu_lock);
		status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0,
		    NULL, SRPT_NO_FENCE_SEND);
		mutex_exit(&iu->iu_lock);
		if (status != IBT_SUCCESS) {
			SRPT_DPRINTF_L2("ch_srp_cmd, error(%d) posting error"
			    " response", status);
			return (1);
		} else {
			return (0);
		}
	}

	iu->iu_stmf_task->task_port_private = iu;
	iu->iu_stmf_task->task_flags = 0;

	if (di_fmt != 0) {
		iu->iu_stmf_task->task_flags |= TF_WRITE_DATA;
	}
	if (do_fmt != 0) {
		iu->iu_stmf_task->task_flags |= TF_READ_DATA;
	}

	switch (cmd->cr_task_attr) {
	case SRP_TSK_ATTR_QTYPE_SIMPLE:
		iu->iu_stmf_task->task_flags |=	TF_ATTR_SIMPLE_QUEUE;
		break;

	case SRP_TSK_ATTR_QTYPE_HEAD_OF_Q:
		iu->iu_stmf_task->task_flags |=	TF_ATTR_HEAD_OF_QUEUE;
		break;

	case SRP_TSK_ATTR_QTYPE_ORDERED:
		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
		break;

	case SRP_TSK_ATTR_QTYPE_ACA_Q_TAG:
		iu->iu_stmf_task->task_flags |=	TF_ATTR_ACA;
		break;

	default:
		SRPT_DPRINTF_L2("ch_srp_cmd, reserved task attr (%d)",
		    cmd->cr_task_attr);
		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
		break;
	}
	iu->iu_stmf_task->task_additional_flags = 0;
	iu->iu_stmf_task->task_priority		= 0;
	iu->iu_stmf_task->task_mgmt_function    = TM_NONE;
	iu->iu_stmf_task->task_max_nbufs	= STMF_BUFS_MAX;
	iu->iu_stmf_task->task_expected_xfer_length = iu->iu_tot_xfer_len;
	iu->iu_stmf_task->task_csn_size		= 0;

	bcopy(cmd->cr_cdb, iu->iu_stmf_task->task_cdb,
	    SRP_CDB_SIZE);
	if (addlen != 0) {
		bcopy(&cmd->cr_add_data,
		    iu->iu_stmf_task->task_cdb + SRP_CDB_SIZE,
		    addlen * 4);
	}

	/*
	 * Add the IU/task to the session and post to STMF.  The task will
	 * remain in the session's list until STMF is informed by SRP that
	 * it is done with the task.
	 */
	DTRACE_SRP_3(scsi__command, srpt_channel_t, iu->iu_ch,
	    scsi_task_t, iu->iu_stmf_task, srp_cmd_req_t, cmd);
	srpt_stp_add_task(ch->ch_session, iu);

	SRPT_DPRINTF_L3("ch_srp_cmd, new task (%p) posted",
	    (void *)iu->iu_stmf_task);
	stmf_post_task(iu->iu_stmf_task, NULL);
	rw_exit(&ch->ch_rwlock);

	return (0);
}