示例#1
0
/* ARGSUSED */
static int
setcond(caddr_t data)
{
	int auditstate;
	au_kcontext_t *kctx;

	if (!(audit_policy & AUDIT_PERZONE) && (!INGLOBALZONE(curproc)))
		return (EINVAL);

	kctx = GET_KCTX_NGZ;

	if (copyin(data, &auditstate, sizeof (int)))
		return (EFAULT);

	switch (auditstate) {
	case AUC_AUDITING:		/* Turn auditing on */
		if (audit_active == C2AUDIT_UNLOADED)
			audit_init_module();
		kctx->auk_auditstate = AUC_AUDITING;
		if (!(audit_policy & AUDIT_PERZONE) && INGLOBALZONE(curproc))
			set_all_zone_usr_proc_sys(ALL_ZONES);
		else
			set_all_zone_usr_proc_sys(curproc->p_zone->zone_id);
		break;

	case AUC_NOAUDIT:		/* Turn auditing off */
		if (kctx->auk_auditstate == AUC_NOAUDIT)
			break;
		kctx->auk_auditstate = AUC_NOAUDIT;

		/* clear out the audit queue */

		mutex_enter(&(kctx->auk_queue.lock));
		if (kctx->auk_queue.wt_block)
			cv_broadcast(&(kctx->auk_queue.write_cv));

		/* unblock au_output_thread */
		cv_broadcast(&(kctx->auk_queue.read_cv));

		mutex_exit(&(kctx->auk_queue.lock));
		break;

	default:
		return (EINVAL);
	}

	return (0);
}
示例#2
0
/*
 * Called from start_init_common(), to set init's core file path and content.
 */
void
init_core(void)
{
	struct core_globals *cg;

	/*
	 * The first time we hit this, in the global zone, we have to
	 * initialize the zsd key.
	 */
	if (INGLOBALZONE(curproc)) {
		zone_key_create(&core_zone_key, core_init_zone, NULL,
		    core_free_zone);
	}

	/*
	 * zone_key_create will have called core_init_zone for the
	 * global zone, which sets up the default path and content
	 * variables.
	 */
	VERIFY((cg = zone_getspecific(core_zone_key, curproc->p_zone)) != NULL);

	corectl_path_hold(cg->core_default_path);
	corectl_content_hold(cg->core_default_content);

	curproc->p_corefile = cg->core_default_path;
	curproc->p_content = cg->core_default_content;
}
示例#3
0
/*
 * Transfer specified CPUs between processor sets.
 */
int
pool_pset_xtransfer(psetid_t src, psetid_t dst, size_t size, id_t *ids)
{
	struct cpu *cpu;
	int ret = 0;
	int id;

	ASSERT(pool_lock_held());
	ASSERT(INGLOBALZONE(curproc));

	if (size == 0 || size > max_ncpus)	/* quick sanity check */
		return (EINVAL);

	mutex_enter(&cpu_lock);
	for (id = 0; id < size; id++) {
		if ((cpu = cpu_get((processorid_t)ids[id])) == NULL ||
		    cpupart_query_cpu(cpu) != src) {
			ret = EINVAL;
			break;
		}
		if ((ret = cpupart_attach_cpu(dst, cpu, 1)) != 0)
			break;
	}
	mutex_exit(&cpu_lock);
	if (ret == 0)
		pool_pset_mod = gethrtime();
	return (ret);
}
示例#4
0
/*
 * Enable processor set plugin.
 */
int
pool_pset_enable(void)
{
	int error;
	nvlist_t *props;

	ASSERT(pool_lock_held());
	ASSERT(INGLOBALZONE(curproc));
	/*
	 * Can't enable pools if there are existing cpu partitions.
	 */
	mutex_enter(&cpu_lock);
	if (cp_numparts > 1) {
		mutex_exit(&cpu_lock);
		return (EEXIST);
	}

	/*
	 * We want to switch things such that everything that was tagged with
	 * the special ALL_ZONES token now is explicitly visible to all zones:
	 * first add individual zones to the visibility list then remove the
	 * special "ALL_ZONES" token.  There must only be the default pset
	 * (PS_NONE) active if pools are being enabled, so we only need to
	 * deal with it.
	 *
	 * We want to make pool_pset_enabled() start returning B_TRUE before
	 * we call any of the visibility update functions.
	 */
	global_zone->zone_psetid = PS_NONE;
	/*
	 * We need to explicitly handle the global zone since
	 * zone_pset_set() won't modify it.
	 */
	pool_pset_visibility_add(PS_NONE, global_zone);
	/*
	 * A NULL argument means the ALL_ZONES token.
	 */
	pool_pset_visibility_remove(PS_NONE, NULL);
	error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE);
	ASSERT(error == 0);

	/*
	 * It is safe to drop cpu_lock here.  We're still
	 * holding pool_lock so no new cpu partitions can
	 * be created while we're here.
	 */
	mutex_exit(&cpu_lock);
	(void) nvlist_alloc(&pool_pset_default->pset_props,
	    NV_UNIQUE_NAME, KM_SLEEP);
	props = pool_pset_default->pset_props;
	(void) nvlist_add_string(props, "pset.name", "pset_default");
	(void) nvlist_add_string(props, "pset.comment", "");
	(void) nvlist_add_int64(props, "pset.sys_id", PS_NONE);
	(void) nvlist_add_string(props, "pset.units", "population");
	(void) nvlist_add_byte(props, "pset.default", 1);
	(void) nvlist_add_uint64(props, "pset.max", 65536);
	(void) nvlist_add_uint64(props, "pset.min", 1);
	pool_pset_mod = pool_cpu_mod = gethrtime();
	return (0);
}
示例#5
0
/* ARGSUSED */
static int
pool_pset_cpu_setup(cpu_setup_t what, int id, void *arg)
{
	processorid_t cpuid = id;
	struct setup_arg sarg;
	int error;
	cpu_t *c;

	ASSERT(MUTEX_HELD(&cpu_lock));
	ASSERT(INGLOBALZONE(curproc));

	if (!pool_pset_enabled())
		return (0);
	if (what != CPU_CONFIG && what != CPU_UNCONFIG &&
	    what != CPU_ON && what != CPU_OFF &&
	    what != CPU_CPUPART_IN && what != CPU_CPUPART_OUT)
		return (0);
	c = cpu_get(cpuid);
	ASSERT(c != NULL);
	sarg.psetid = cpupart_query_cpu(c);
	sarg.cpu = c;
	sarg.what = what;

	error = zone_walk(pool_pset_setup_cb, &sarg);
	ASSERT(error == 0);
	return (0);
}
示例#6
0
/*
 * Sigh.  Inside a local zone, we don't have access to /etc/zfs/zpool.cache,
 * and we don't want to allow the local zone to see all the pools anyway.
 * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
 * information for all pool visible within the zone.
 */
nvlist_t *
spa_all_configs(uint64_t *generation)
{
	nvlist_t *pools;
	spa_t *spa = NULL;

	if (*generation == spa_config_generation)
		return (NULL);

	pools = fnvlist_alloc();

	mutex_enter(&spa_namespace_lock);
	while ((spa = spa_next(spa)) != NULL) {
		if (INGLOBALZONE(curproc) ||
		    zone_dataset_visible(spa_name(spa), NULL)) {
			mutex_enter(&spa->spa_props_lock);
			fnvlist_add_nvlist(pools, spa_name(spa),
			    spa->spa_config);
			mutex_exit(&spa->spa_props_lock);
		}
	}
	*generation = spa_config_generation;
	mutex_exit(&spa_namespace_lock);

	return (pools);
}
示例#7
0
/*
 * Sigh.  Inside a local zone, we don't have access to /etc/zfs/zpool.cache,
 * and we don't want to allow the local zone to see all the pools anyway.
 * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
 * information for all pool visible within the zone.
 */
nvlist_t *
spa_all_configs(uint64_t *generation)
{
	nvlist_t *pools;
	spa_t *spa;

	if (*generation == spa_config_generation)
		return (NULL);

	VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	spa = NULL;
	mutex_enter(&spa_namespace_lock);
	while ((spa = spa_next(spa)) != NULL) {
		if (INGLOBALZONE(curproc) ||
		    zone_dataset_visible(spa_name(spa), NULL)) {
			mutex_enter(&spa->spa_config_cache_lock);
			VERIFY(nvlist_add_nvlist(pools, spa_name(spa),
			    spa->spa_config) == 0);
			mutex_exit(&spa->spa_config_cache_lock);
		}
	}
	mutex_exit(&spa_namespace_lock);

	*generation = spa_config_generation;

	return (pools);
}
示例#8
0
static char *
spa_history_zone(void)
{
#ifdef _KERNEL
	if (INGLOBALZONE(curproc))
		return (NULL);
	return (curproc->p_zone->zone_name);
#else
	return (NULL);
#endif
}
示例#9
0
文件: task.c 项目: bahamas10/openzfs
task_t *
task_hold_by_id(taskid_t id)
{
	zoneid_t zoneid;

	if (INGLOBALZONE(curproc))
		zoneid = ALL_ZONES;
	else
		zoneid = getzoneid();
	return (task_hold_by_id_zone(id, zoneid));
}
示例#10
0
/*
 * Find a CPU partition given a processor set ID if the processor set
 * should be visible from the calling zone.
 */
cpupart_t *
cpupart_find(psetid_t psid)
{
	cpupart_t *cp;

	ASSERT(MUTEX_HELD(&cpu_lock));
	cp = cpupart_find_all(psid);
	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
			return (NULL);
	return (cp);
}
示例#11
0
/*
 * Set the global and local policy flags
 *
 * The global flags only make sense from the global zone;
 * the local flags depend on the AUDIT_PERZONE policy:
 * if the perzone policy is set, then policy is set separately
 * per zone, else held only in the global zone.
 *
 * The initial value of a local zone's policy flag is determined
 * by the value of the global zone's flags at the time the
 * local zone is created.
 *
 * While auditconfig(1M) allows setting and unsetting policies one bit
 * at a time, the mask passed in from auditconfig() is created by a
 * syscall to getpolicy and then modified based on the auditconfig()
 * cmd line, so the input policy value is used to replace the existing
 * policy.
 */
static int
setpolicy(caddr_t data)
{
	uint32_t	policy;
	au_kcontext_t	*kctx;

	if (copyin(data, &policy, sizeof (policy)))
		return (EFAULT);

	kctx = GET_KCTX_NGZ;

	if (INGLOBALZONE(curproc)) {
		if (policy & ~(AUDIT_GLOBAL | AUDIT_LOCAL))
			return (EINVAL);

		audit_policy = policy & AUDIT_GLOBAL;
	} else {
		if (!(audit_policy & AUDIT_PERZONE))
			return (EINVAL);

		if (policy & ~AUDIT_LOCAL)	/* global bits are a no-no */
			return (EINVAL);
	}
	kctx->auk_policy = policy & AUDIT_LOCAL;

	/*
	 * auk_current_vp is NULL before auditd starts (or during early
	 * auditd starup) or if auditd is halted; in either case,
	 * notification of a policy change is not needed, since auditd
	 * reads policy as it comes up.  The error return from au_doormsg()
	 * is ignored to avoid a race condition -- for example if auditd
	 * segv's, the audit state may be "auditing" but the door may
	 * be closed.  Returning an error if the door is open makes it
	 * impossible for Greenline to restart auditd.
	 */
	if (kctx->auk_current_vp != NULL)
		(void) au_doormsg(kctx, AU_DBUF_POLICY, &policy);

	/*
	 * Wake up anyone who might have blocked on full audit
	 * partitions. audit daemons need to set AUDIT_FULL when no
	 * space so we can tell if we should start dropping records.
	 */
	mutex_enter(&(kctx->auk_queue.lock));

	if ((policy & (AUDIT_CNT | AUDIT_SCNT) &&
	    (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater)))
		cv_broadcast(&(kctx->auk_queue.write_cv));

	mutex_exit(&(kctx->auk_queue.lock));

	return (0);
}
示例#12
0
/*
 * Disable processor set plugin.
 */
int
pool_pset_disable(void)
{
	processorid_t cpuid;
	cpu_t *cpu;
	int error;

	ASSERT(pool_lock_held());
	ASSERT(INGLOBALZONE(curproc));

	mutex_enter(&cpu_lock);
	if (cp_numparts > 1) {	/* make sure only default pset is left */
		mutex_exit(&cpu_lock);
		return (EBUSY);
	}
	/*
	 * Remove all non-system CPU and processor set properties
	 */
	for (cpuid = 0; cpuid < NCPU; cpuid++) {
		if ((cpu = cpu_get(cpuid)) == NULL)
			continue;
		if (cpu->cpu_props != NULL) {
			(void) nvlist_free(cpu->cpu_props);
			cpu->cpu_props = NULL;
		}
	}

	/*
	 * We want to switch things such that everything is now visible
	 * to ALL_ZONES: first add the special "ALL_ZONES" token to the
	 * visibility list then remove individual zones.  There must
	 * only be the default pset active if pools are being disabled,
	 * so we only need to deal with it.
	 */
	error = zone_walk(pool_pset_zone_pset_set, (void *)ZONE_PS_INVAL);
	ASSERT(error == 0);
	pool_pset_visibility_add(PS_NONE, NULL);
	pool_pset_visibility_remove(PS_NONE, global_zone);
	/*
	 * pool_pset_enabled() will henceforth return B_FALSE.
	 */
	global_zone->zone_psetid = ZONE_PS_INVAL;
	mutex_exit(&cpu_lock);
	if (pool_pset_default->pset_props != NULL) {
		nvlist_free(pool_pset_default->pset_props);
		pool_pset_default->pset_props = NULL;
	}
	return (0);
}
示例#13
0
static int
setkmask(caddr_t data)
{
	au_mask_t	mask;
	au_kcontext_t	*kctx;

	if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc))
		return (EINVAL);

	kctx = GET_KCTX_NGZ;

	if (copyin(data, &mask, sizeof (au_mask_t)))
		return (EFAULT);

	kctx->auk_info.ai_namask = mask;
	return (0);
}
示例#14
0
void
clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr,
    struct cred *cred)
{
	struct cku_private *p = htop(h);
	rdma_registry_t *rp;

	ASSERT(INGLOBALZONE(curproc));
	/*
	 * Find underlying RDMATF plugin
	 */
	p->cku_rd_mod = NULL;
	rw_enter(&rdma_lock, RW_READER);
	rp = rdma_mod_head;
	while (rp != NULL) {
		if (strcmp(rp->r_mod->rdma_api, proto))
			rp = rp->r_next;
		else {
			p->cku_rd_mod = rp->r_mod;
			p->cku_rd_handle = handle;
			break;
		}

	}
	rw_exit(&rdma_lock);

	/*
	 * Set up the rpc information
	 */
	p->cku_cred = cred;
	p->cku_xid = 0;

	if (p->cku_addr.maxlen < raddr->len) {
		if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
			kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
		p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
		p->cku_addr.maxlen = raddr->maxlen;
	}

	p->cku_srcaddr.len = 0;

	p->cku_addr.len = raddr->len;
	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
	h->cl_ops = &rdma_clnt_ops;
}
示例#15
0
/*
 * Put new CPU property.
 * Handle special case of "cpu.status".
 */
int
pool_cpu_propput(processorid_t cpuid, nvpair_t *pair)
{
	int ret = 0;
	cpu_t *cpu;

	ASSERT(pool_lock_held());
	ASSERT(INGLOBALZONE(curproc));

	if (nvpair_type(pair) == DATA_TYPE_STRING &&
	    strcmp(nvpair_name(pair), "cpu.status") == 0) {
		char *val;
		int status;
		int old_status;
		(void) nvpair_value_string(pair, &val);
		if (strcmp(val, PS_OFFLINE) == 0)
			status = P_OFFLINE;
		else if (strcmp(val, PS_ONLINE) == 0)
			status = P_ONLINE;
		else if (strcmp(val, PS_NOINTR) == 0)
			status = P_NOINTR;
		else if (strcmp(val, PS_FAULTED) == 0)
			status = P_FAULTED;
		else if (strcmp(val, PS_SPARE) == 0)
			status = P_SPARE;
		else
			return (EINVAL);
		ret = p_online_internal(cpuid, status, &old_status);
	} else {
		mutex_enter(&cpu_lock);
		if ((cpu = cpu_get(cpuid)) == NULL)
			ret = EINVAL;
		if (cpu->cpu_props == NULL) {
			(void) nvlist_alloc(&cpu->cpu_props,
			    NV_UNIQUE_NAME, KM_SLEEP);
			(void) nvlist_add_string(cpu->cpu_props,
			    "cpu.comment", "");
		}
		ret = pool_propput_common(cpu->cpu_props, pair, pool_cpu_props);
		if (ret == 0)
			pool_cpu_mod = gethrtime();
		mutex_exit(&cpu_lock);
	}
	return (ret);
}
示例#16
0
static int
setclass(caddr_t data)
{
	au_evclass_map_t event;
	au_kcontext_t	*kctx;

	if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc))
		return (EINVAL);

	kctx = GET_KCTX_NGZ;

	if (copyin(data, &event, sizeof (au_evclass_map_t)))
		return (EFAULT);

	if (event.ec_number > MAX_KEVENTS)
		return (EINVAL);

	kctx->auk_ets[event.ec_number] = event.ec_class;

	return (0);
}
示例#17
0
static int
setstat(caddr_t data)
{
	au_kcontext_t *kctx = GET_KCTX_PZ;
	au_stat_t au_stat;

	if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc))
		return (EINVAL);

	if (copyin(data, &au_stat, sizeof (au_stat_t)))
		return (EFAULT);

	if (au_stat.as_generated == CLEAR_VAL)
		kctx->auk_statistics.as_generated = 0;
	if (au_stat.as_nonattrib == CLEAR_VAL)
		kctx->auk_statistics.as_nonattrib = 0;
	if (au_stat.as_kernel == CLEAR_VAL)
		kctx->auk_statistics.as_kernel = 0;
	if (au_stat.as_audit == CLEAR_VAL)
		kctx->auk_statistics.as_audit = 0;
	if (au_stat.as_auditctl == CLEAR_VAL)
		kctx->auk_statistics.as_auditctl = 0;
	if (au_stat.as_enqueue == CLEAR_VAL)
		kctx->auk_statistics.as_enqueue = 0;
	if (au_stat.as_written == CLEAR_VAL)
		kctx->auk_statistics.as_written = 0;
	if (au_stat.as_wblocked == CLEAR_VAL)
		kctx->auk_statistics.as_wblocked = 0;
	if (au_stat.as_rblocked == CLEAR_VAL)
		kctx->auk_statistics.as_rblocked = 0;
	if (au_stat.as_dropped == CLEAR_VAL)
		kctx->auk_statistics.as_dropped = 0;
	if (au_stat.as_totalsize == CLEAR_VAL)
		kctx->auk_statistics.as_totalsize = 0;

	membar_producer();

	return (0);

}
示例#18
0
/*
 * Callback function used to apply a cpu configuration event to a zone.
 */
static int
pool_pset_setup_cb(zone_t *zone, void *arg)
{
	struct setup_arg *sa = arg;

	ASSERT(MUTEX_HELD(&cpu_lock));
	ASSERT(INGLOBALZONE(curproc));
	ASSERT(zone != NULL);

	if (zone == global_zone)
		return (0);
	if (zone_pset_get(zone) != sa->psetid)
		return (0);	/* ignore */
	switch (sa->what) {
	case CPU_CONFIG:
		cpu_visibility_configure(sa->cpu, zone);
		break;
	case CPU_UNCONFIG:
		cpu_visibility_unconfigure(sa->cpu, zone);
		break;
	case CPU_ON:
		cpu_visibility_online(sa->cpu, zone);
		break;
	case CPU_OFF:
		cpu_visibility_offline(sa->cpu, zone);
		break;
	case CPU_CPUPART_IN:
		cpu_visibility_add(sa->cpu, zone);
		break;
	case CPU_CPUPART_OUT:
		cpu_visibility_remove(sa->cpu, zone);
		break;
	default:
		cmn_err(CE_PANIC, "invalid cpu_setup_t value %d", sa->what);
	}
	return (0);
}
示例#19
0
/*
 * Remove existing CPU property.
 */
int
pool_cpu_proprm(processorid_t cpuid, char *name)
{
	int ret;
	cpu_t *cpu;

	ASSERT(pool_lock_held());
	ASSERT(INGLOBALZONE(curproc));

	mutex_enter(&cpu_lock);
	if ((cpu = cpu_get(cpuid)) == NULL || cpu_is_poweredoff(cpu)) {
		ret = EINVAL;
	} else {
		if (cpu->cpu_props == NULL)
			ret = EINVAL;
		else
			ret = pool_proprm_common(cpu->cpu_props, name,
			    pool_cpu_props);
	}
	if (ret == 0)
		pool_cpu_mod = gethrtime();
	mutex_exit(&cpu_lock);
	return (ret);
}
示例#20
0
/*ARGSUSED*/
static int
zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
{
	char		*osname;
	pathname_t	spn;
	int		error = 0;
	uio_seg_t	fromspace = (uap->flags & MS_SYSSPACE) ?
				UIO_SYSSPACE : UIO_USERSPACE;
	int		canwrite;

	if (mvp->v_type != VDIR)
		return (ENOTDIR);

	mutex_enter(&mvp->v_lock);
	if ((uap->flags & MS_REMOUNT) == 0 &&
	    (uap->flags & MS_OVERLAY) == 0 &&
	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
		mutex_exit(&mvp->v_lock);
		return (EBUSY);
	}
	mutex_exit(&mvp->v_lock);

	/*
	 * ZFS does not support passing unparsed data in via MS_DATA.
	 * Users should use the MS_OPTIONSTR interface; this means
	 * that all option parsing is already done and the options struct
	 * can be interrogated.
	 */
	if ((uap->flags & MS_DATA) && uap->datalen > 0)
		return (EINVAL);

	/*
	 * When doing a remount, we simply refresh our temporary properties
	 * according to those options set in the current VFS options.
	 */
	if (uap->flags & MS_REMOUNT) {
		return (zfs_refresh_properties(vfsp));
	}

	/*
	 * Get the objset name (the "special" mount argument).
	 */
	if (error = pn_get(uap->spec, fromspace, &spn))
		return (error);

	osname = spn.pn_path;

	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
		goto out;

	/*
	 * Refuse to mount a filesystem if we are in a local zone and the
	 * dataset is not visible.
	 */
	if (!INGLOBALZONE(curproc) &&
	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
		error = EPERM;
		goto out;
	}

	error = zfs_domount(vfsp, osname, cr);

out:
	pn_free(&spn);
	return (error);
}
示例#21
0
/*
 * Return list of active processor sets, up to a maximum indicated by
 * numpsets.  The total number of processor sets is stored in the
 * location pointed to by numpsets.
 */
static int
pset_list(psetid_t *psetlist, uint_t *numpsets)
{
	uint_t user_npsets = 0;
	uint_t real_npsets;
	psetid_t *psets = NULL;
	int error = 0;

	if (numpsets != NULL) {
		if (copyin(numpsets, &user_npsets, sizeof (uint_t)) != 0)
			return (set_errno(EFAULT));
	}

	/*
	 * Get the list of all processor sets.  First we need to find
	 * out how many there are, so we can allocate a large enough
	 * buffer.
	 */
	mutex_enter(&cpu_lock);
	if (!INGLOBALZONE(curproc) && pool_pset_enabled()) {
		psetid_t psetid = zone_pset_get(curproc->p_zone);

		if (psetid == PS_NONE) {
			real_npsets = 0;
		} else {
			real_npsets = 1;
			psets = kmem_alloc(real_npsets * sizeof (psetid_t),
			    KM_SLEEP);
			psets[0] = psetid;
		}
	} else {
		real_npsets = cpupart_list(0, NULL, CP_ALL);
		if (real_npsets) {
			psets = kmem_alloc(real_npsets * sizeof (psetid_t),
			    KM_SLEEP);
			(void) cpupart_list(psets, real_npsets, CP_ALL);
		}
	}
	mutex_exit(&cpu_lock);

	if (user_npsets > real_npsets)
		user_npsets = real_npsets;

	if (numpsets != NULL) {
		if (copyout(&real_npsets, numpsets, sizeof (uint_t)) != 0)
			error = EFAULT;
		else if (psetlist != NULL && user_npsets != 0) {
			if (copyout(psets, psetlist,
			    user_npsets * sizeof (psetid_t)) != 0)
				error = EFAULT;
		}
	}

	if (real_npsets)
		kmem_free(psets, real_npsets * sizeof (psetid_t));

	if (error)
		return (set_errno(error));
	else
		return (0);
}
示例#22
0
static int
pset_bind(psetid_t pset, idtype_t idtype, id_t id, psetid_t *opset)
{
	kthread_t	*tp;
	proc_t		*pp;
	task_t		*tk;
	kproject_t	*kpj;
	contract_t	*ct;
	zone_t		*zptr;
	psetid_t	oldpset;
	int		error = 0;
	void		*projbuf, *zonebuf;

	pool_lock();
	if ((pset != PS_QUERY) && (pset != PS_SOFT) &&
	    (pset != PS_HARD) && (pset != PS_QUERY_TYPE)) {
		/*
		 * Check if the set actually exists before checking
		 * permissions.  This is the historical error
		 * precedence.  Note that if pset was PS_MYID, the
		 * cpupart_get_cpus call will change it to the
		 * processor set id of the caller (or PS_NONE if the
		 * caller is not bound to a processor set).
		 */
		if (pool_state == POOL_ENABLED) {
			pool_unlock();
			return (set_errno(ENOTSUP));
		}
		if (cpupart_get_cpus(&pset, NULL, NULL) != 0) {
			pool_unlock();
			return (set_errno(EINVAL));
		} else if (pset != PS_NONE && secpolicy_pset(CRED()) != 0) {
			pool_unlock();
			return (set_errno(EPERM));
		}
	}

	/*
	 * Pre-allocate enough buffers for FSS for all active projects
	 * and for all active zones on the system.  Unused buffers will
	 * be freed later by fss_freebuf().
	 */
	mutex_enter(&cpu_lock);
	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);

	switch (idtype) {
	case P_LWPID:
		pp = curproc;
		mutex_enter(&pidlock);
		mutex_enter(&pp->p_lock);
		if (id == P_MYID) {
			tp = curthread;
		} else {
			if ((tp = idtot(pp, id)) == NULL) {
				mutex_exit(&pp->p_lock);
				mutex_exit(&pidlock);
				error = ESRCH;
				break;
			}
		}
		error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf);
		mutex_exit(&pp->p_lock);
		mutex_exit(&pidlock);
		break;

	case P_PID:
		mutex_enter(&pidlock);
		if (id == P_MYID) {
			pp = curproc;
		} else if ((pp = prfind(id)) == NULL) {
			mutex_exit(&pidlock);
			error = ESRCH;
			break;
		}
		error = pset_bind_process(pp, pset, &oldpset, projbuf, zonebuf);
		mutex_exit(&pidlock);
		break;

	case P_TASKID:
		mutex_enter(&pidlock);
		if (id == P_MYID)
			id = curproc->p_task->tk_tkid;
		if ((tk = task_hold_by_id(id)) == NULL) {
			mutex_exit(&pidlock);
			error = ESRCH;
			break;
		}
		error = pset_bind_task(tk, pset, &oldpset, projbuf, zonebuf);
		mutex_exit(&pidlock);
		task_rele(tk);
		break;

	case P_PROJID:
		pp = curproc;
		if (id == P_MYID)
			id = curprojid();
		if ((kpj = project_hold_by_id(id, pp->p_zone,
		    PROJECT_HOLD_FIND)) == NULL) {
			error = ESRCH;
			break;
		}
		mutex_enter(&pidlock);
		error = pset_bind_project(kpj, pset, &oldpset, projbuf,
		    zonebuf);
		mutex_exit(&pidlock);
		project_rele(kpj);
		break;

	case P_ZONEID:
		if (id == P_MYID)
			id = getzoneid();
		if ((zptr = zone_find_by_id(id)) == NULL) {
			error = ESRCH;
			break;
		}
		mutex_enter(&pidlock);
		error = pset_bind_zone(zptr, pset, &oldpset, projbuf, zonebuf);
		mutex_exit(&pidlock);
		zone_rele(zptr);
		break;

	case P_CTID:
		if (id == P_MYID)
			id = PRCTID(curproc);
		if ((ct = contract_type_ptr(process_type, id,
		    curproc->p_zone->zone_uniqid)) == NULL) {
			error = ESRCH;
			break;
		}
		mutex_enter(&pidlock);
		error = pset_bind_contract(ct->ct_data, pset, &oldpset, projbuf,
		    zonebuf);
		mutex_exit(&pidlock);
		contract_rele(ct);
		break;

	case P_PSETID:
		if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
			error = EINVAL;
			break;
		}
		error = pset_unbind(id, projbuf, zonebuf, idtype);
		break;

	case P_ALL:
		if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) {
			error = EINVAL;
			break;
		}
		error = pset_unbind(PS_NONE, projbuf, zonebuf, idtype);
		break;

	default:
		error = EINVAL;
		break;
	}

	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
	mutex_exit(&cpu_lock);
	pool_unlock();

	if (error != 0)
		return (set_errno(error));
	if (opset != NULL) {
		if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0)
			return (set_errno(EFAULT));
	}
	return (0);
}
示例#23
0
/*
 * Check if user has requested permission.
 */
int
dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
{
	dsl_dataset_t *ds;
	dsl_dir_t *dd;
	dsl_pool_t *dp;
	void *cookie;
	int	error;
	char	checkflag;
	objset_t *mos;
	avl_tree_t permsets;
	perm_set_t *setnode;

	error = dsl_dataset_hold(dsname, FTAG, &ds);
	if (error)
		return (error);

	dp = ds->ds_dir->dd_pool;
	mos = dp->dp_meta_objset;

	if (dsl_delegation_on(mos) == B_FALSE) {
		dsl_dataset_rele(ds, FTAG);
		return (ECANCELED);
	}

	if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) <
	    SPA_VERSION_DELEGATED_PERMS) {
		dsl_dataset_rele(ds, FTAG);
		return (EPERM);
	}

	if (dsl_dataset_is_snapshot(ds)) {
		/*
		 * Snapshots are treated as descendents only,
		 * local permissions do not apply.
		 */
		checkflag = ZFS_DELEG_DESCENDENT;
	} else {
		checkflag = ZFS_DELEG_LOCAL;
	}

	avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
	    offsetof(perm_set_t, p_node));

	rw_enter(&dp->dp_config_rwlock, RW_READER);
	for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
	    checkflag = ZFS_DELEG_DESCENDENT) {
		uint64_t zapobj;
		boolean_t expanded;

		/*
		 * If not in global zone then make sure
		 * the zoned property is set
		 */
		if (!INGLOBALZONE(curproc)) {
			uint64_t zoned;

			if (dsl_prop_get_dd(dd,
			    zfs_prop_to_name(ZFS_PROP_ZONED),
			    8, 1, &zoned, NULL) != 0)
				break;
			if (!zoned)
				break;
		}
		zapobj = dd->dd_phys->dd_deleg_zapobj;

		if (zapobj == 0)
			continue;

		dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr);
again:
		expanded = B_FALSE;
		for (setnode = avl_first(&permsets); setnode;
		    setnode = AVL_NEXT(&permsets, setnode)) {
			if (setnode->p_matched == B_TRUE)
				continue;

			/* See if this set directly grants this permission */
			error = dsl_check_access(mos, zapobj,
			    ZFS_DELEG_NAMED_SET, 0, setnode->p_setname, perm);
			if (error == 0)
				goto success;
			if (error == EPERM)
				setnode->p_matched = B_TRUE;

			/* See if this set includes other sets */
			error = dsl_load_sets(mos, zapobj,
			    ZFS_DELEG_NAMED_SET_SETS, 0,
			    setnode->p_setname, &permsets);
			if (error == 0)
				setnode->p_matched = expanded = B_TRUE;
		}
		/*
		 * If we expanded any sets, that will define more sets,
		 * which we need to check.
		 */
		if (expanded)
			goto again;

		error = dsl_check_user_access(mos, zapobj, perm, checkflag, cr);
		if (error == 0)
			goto success;
	}
	error = EPERM;
success:
	rw_exit(&dp->dp_config_rwlock);
	dsl_dataset_rele(ds, FTAG);

	cookie = NULL;
	while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
		kmem_free(setnode, sizeof (perm_set_t));

	return (error);
}
示例#24
0
static int
setsmask(caddr_t data)
{
	STRUCT_DECL(auditinfo, user_info);
	struct proc *p;
	const auditinfo_addr_t	*ainfo;
	model_t	model;

	/* setsmask not applicable in non-global zones without perzone policy */
	if (!(audit_policy & AUDIT_PERZONE) && (!INGLOBALZONE(curproc)))
		return (EINVAL);

	model = get_udatamodel();
	STRUCT_INIT(user_info, model);

	if (copyin(data, STRUCT_BUF(user_info), STRUCT_SIZE(user_info)))
		return (EFAULT);

	mutex_enter(&pidlock);	/* lock the process queue against updates */
	for (p = practive; p != NULL; p = p->p_next) {
		cred_t	*cr;

		/* if in non-global zone only modify processes in same zone */
		if (!HASZONEACCESS(curproc, p->p_zone->zone_id))
			continue;

		mutex_enter(&p->p_lock);	/* so process doesn't go away */

		/* skip system processes and ones being created or going away */
		if (p->p_stat == SIDL || p->p_stat == SZOMB ||
		    (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) {
			mutex_exit(&p->p_lock);
			continue;
		}

		mutex_enter(&p->p_crlock);
		crhold(cr = p->p_cred);
		mutex_exit(&p->p_crlock);
		ainfo = crgetauinfo(cr);
		if (ainfo == NULL) {
			mutex_exit(&p->p_lock);
			crfree(cr);
			continue;
		}

		if (ainfo->ai_asid == STRUCT_FGET(user_info, ai_asid)) {
			au_mask_t	mask;
			int		err;

			/*
			 * Here's a process which matches the specified asid.
			 * If its mask doesn't already match the new mask,
			 * save the new mask in the pad, to be picked up
			 * next syscall.
			 */
			mask = STRUCT_FGET(user_info, ai_mask);
			err = bcmp(&mask, &ainfo->ai_mask, sizeof (au_mask_t));
			crfree(cr);
			if (err != 0) {
				struct p_audit_data *pad = P2A(p);
				ASSERT(pad != NULL);

				mutex_enter(&(pad->pad_lock));
				pad->pad_flags |= PAD_SETMASK;
				pad->pad_newmask = mask;
				mutex_exit(&(pad->pad_lock));

				/*
				 * No need to call set_proc_pre_sys(), since
				 * t_pre_sys is ALWAYS on when audit is
				 * enabled...due to syscall auditing.
				 */
			}
		} else {
			crfree(cr);
		}
		mutex_exit(&p->p_lock);
	}
	mutex_exit(&pidlock);

	return (0);
}
示例#25
0
static int
setqctrl(caddr_t data)
{
	au_kcontext_t	*kctx;
	struct au_qctrl qctrl_tmp;
	STRUCT_DECL(au_qctrl, qctrl);
	STRUCT_INIT(qctrl, get_udatamodel());

	if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc))
		return (EINVAL);
	kctx = GET_KCTX_NGZ;

	if (copyin(data, STRUCT_BUF(qctrl), STRUCT_SIZE(qctrl)))
		return (EFAULT);

	qctrl_tmp.aq_hiwater = (size_t)STRUCT_FGET(qctrl, aq_hiwater);
	qctrl_tmp.aq_lowater = (size_t)STRUCT_FGET(qctrl, aq_lowater);
	qctrl_tmp.aq_bufsz = (size_t)STRUCT_FGET(qctrl, aq_bufsz);
	qctrl_tmp.aq_delay = (clock_t)STRUCT_FGET(qctrl, aq_delay);

	/* enforce sane values */

	if (qctrl_tmp.aq_hiwater <= qctrl_tmp.aq_lowater)
		return (EINVAL);

	if (qctrl_tmp.aq_hiwater < AQ_LOWATER)
		return (EINVAL);

	if (qctrl_tmp.aq_hiwater > AQ_MAXHIGH)
		return (EINVAL);

	if (qctrl_tmp.aq_bufsz < AQ_BUFSZ)
		return (EINVAL);

	if (qctrl_tmp.aq_bufsz > AQ_MAXBUFSZ)
		return (EINVAL);

	if (qctrl_tmp.aq_delay == 0)
		return (EINVAL);

	if (qctrl_tmp.aq_delay > AQ_MAXDELAY)
		return (EINVAL);

	/* update everything at once so things are consistant */
	mutex_enter(&(kctx->auk_queue.lock));
	kctx->auk_queue.hiwater = qctrl_tmp.aq_hiwater;
	kctx->auk_queue.lowater = qctrl_tmp.aq_lowater;
	kctx->auk_queue.bufsz = qctrl_tmp.aq_bufsz;
	kctx->auk_queue.delay = qctrl_tmp.aq_delay;

	if (kctx->auk_queue.rd_block &&
	    kctx->auk_queue.cnt > kctx->auk_queue.lowater)
		cv_broadcast(&(kctx->auk_queue.read_cv));

	if (kctx->auk_queue.wt_block &&
	    kctx->auk_queue.cnt < kctx->auk_queue.hiwater)
		cv_broadcast(&(kctx->auk_queue.write_cv));

	mutex_exit(&(kctx->auk_queue.lock));

	return (0);
}
示例#26
0
/*
 * the host address for AUDIT_PERZONE == 0 is that of the global
 * zone and for local zones it is of the current zone.
 */
static int
setkaudit(caddr_t info_p, int len)
{
	STRUCT_DECL(auditinfo_addr, info);
	model_t model;
	au_kcontext_t	*kctx;

	if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc))
		return (EINVAL);

	kctx = GET_KCTX_NGZ;

	model = get_udatamodel();
	STRUCT_INIT(info, model);

	if (len < STRUCT_SIZE(info))
		return (EOVERFLOW);

	if (copyin(info_p, STRUCT_BUF(info), STRUCT_SIZE(info)))
		return (EFAULT);

	if ((STRUCT_FGET(info, ai_termid.at_type) != AU_IPv4) &&
	    (STRUCT_FGET(info, ai_termid.at_type) != AU_IPv6))
		return (EINVAL);

	/* Set audit mask, termid and session id as specified */
	kctx->auk_info.ai_auid = STRUCT_FGET(info, ai_auid);
	kctx->auk_info.ai_namask = STRUCT_FGET(info, ai_mask);
#ifdef _LP64
	/* only convert to 64 bit if coming from a 32 bit binary */
	if (model == DATAMODEL_ILP32)
		kctx->auk_info.ai_termid.at_port =
		    DEVEXPL(STRUCT_FGET(info, ai_termid.at_port));
	else
		kctx->auk_info.ai_termid.at_port =
		    STRUCT_FGET(info, ai_termid.at_port);
#else
	kctx->auk_info.ai_termid.at_port = STRUCT_FGET(info, ai_termid.at_port);
#endif
	kctx->auk_info.ai_termid.at_type = STRUCT_FGET(info, ai_termid.at_type);
	bzero(&kctx->auk_info.ai_termid.at_addr[0],
	    sizeof (kctx->auk_info.ai_termid.at_addr));
	kctx->auk_info.ai_termid.at_addr[0] =
	    STRUCT_FGET(info, ai_termid.at_addr[0]);
	kctx->auk_info.ai_termid.at_addr[1] =
	    STRUCT_FGET(info, ai_termid.at_addr[1]);
	kctx->auk_info.ai_termid.at_addr[2] =
	    STRUCT_FGET(info, ai_termid.at_addr[2]);
	kctx->auk_info.ai_termid.at_addr[3] =
	    STRUCT_FGET(info, ai_termid.at_addr[3]);
	kctx->auk_info.ai_asid = STRUCT_FGET(info, ai_asid);

	if (kctx->auk_info.ai_termid.at_type == AU_IPv6 &&
	    IN6_IS_ADDR_V4MAPPED(
	    ((in6_addr_t *)kctx->auk_info.ai_termid.at_addr))) {
		kctx->auk_info.ai_termid.at_type = AU_IPv4;
		kctx->auk_info.ai_termid.at_addr[0] =
		    kctx->auk_info.ai_termid.at_addr[3];
		kctx->auk_info.ai_termid.at_addr[1] = 0;
		kctx->auk_info.ai_termid.at_addr[2] = 0;
		kctx->auk_info.ai_termid.at_addr[3] = 0;
	}
	if (kctx->auk_info.ai_termid.at_type == AU_IPv6)
		kctx->auk_hostaddr_valid = IN6_IS_ADDR_UNSPECIFIED(
		    (in6_addr_t *)kctx->auk_info.ai_termid.at_addr) ? 0 : 1;
	else
		kctx->auk_hostaddr_valid =
		    (kctx->auk_info.ai_termid.at_addr[0] ==
		    htonl(INADDR_ANY)) ? 0 : 1;

	return (0);
}
示例#27
0
long
sysconfig(int which)
{
	switch (which) {

	/*
	 * if it is not handled in mach_sysconfig either
	 * it must be EINVAL.
	 */
	default:
		return (mach_sysconfig(which)); /* `uname -i`/os */

	case _CONFIG_CLK_TCK:
		return ((long)hz);	/* clock frequency per second */

	case _CONFIG_PROF_TCK:
		return ((long)hz);	/* profiling clock freq per sec */

	case _CONFIG_NGROUPS:
		/*
		 * Maximum number of supplementary groups.
		 */
		return (ngroups_max);

	case _CONFIG_OPEN_FILES:
		/*
		 * Maximum number of open files (soft limit).
		 */
		{
			rlim64_t fd_ctl;
			mutex_enter(&curproc->p_lock);
			fd_ctl = rctl_enforced_value(
			    rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls,
			    curproc);
			mutex_exit(&curproc->p_lock);
			return ((ulong_t)fd_ctl);
		}

	case _CONFIG_CHILD_MAX:
		/*
		 * Maximum number of processes.
		 */
		return (v.v_maxup);

	case _CONFIG_POSIX_VER:
		return (_POSIX_VERSION); /* current POSIX version */

	case _CONFIG_PAGESIZE:
		return (PAGESIZE);

	case _CONFIG_XOPEN_VER:
		return (_XOPEN_VERSION); /* current XOPEN version */

	case _CONFIG_NPROC_CONF:
		return (zone_ncpus_get(curproc->p_zone));

	case _CONFIG_NPROC_ONLN:
		return (zone_ncpus_online_get(curproc->p_zone));

	case _CONFIG_NPROC_MAX:
		return (max_ncpus);

	case _CONFIG_STACK_PROT:
		return (curproc->p_stkprot & ~PROT_USER);

	case _CONFIG_AIO_LISTIO_MAX:
		return (_AIO_LISTIO_MAX);

	case _CONFIG_AIO_MAX:
		return (_AIO_MAX);

	case _CONFIG_AIO_PRIO_DELTA_MAX:
		return (0);

	case _CONFIG_DELAYTIMER_MAX:
		return (INT_MAX);

	case _CONFIG_MQ_OPEN_MAX:
		return (_MQ_OPEN_MAX);

	case _CONFIG_MQ_PRIO_MAX:
		return (_MQ_PRIO_MAX);

	case _CONFIG_RTSIG_MAX:
		return (_SIGRTMAX - _SIGRTMIN + 1);

	case _CONFIG_SEM_NSEMS_MAX:
		return (_SEM_NSEMS_MAX);

	case _CONFIG_SEM_VALUE_MAX:
		return (_SEM_VALUE_MAX);

	case _CONFIG_SIGQUEUE_MAX:
		/*
		 * Maximum number of outstanding queued signals.
		 */
		{
			rlim64_t sigqsz_max;
			mutex_enter(&curproc->p_lock);
			sigqsz_max = rctl_enforced_value(rc_process_sigqueue,
			    curproc->p_rctls, curproc);
			mutex_exit(&curproc->p_lock);
			return ((uint_t)sigqsz_max);
		}

	case _CONFIG_SIGRT_MIN:
		return (_SIGRTMIN);

	case _CONFIG_SIGRT_MAX:
		return (_SIGRTMAX);

	case _CONFIG_TIMER_MAX:
		return (timer_max);

	case _CONFIG_PHYS_PAGES:
		/*
		 * If the non-global zone has a phys. memory cap, use that.
		 * We always report the system-wide value for the global zone,
		 * even though rcapd can be used on the global zone too.
		 */
		if (!INGLOBALZONE(curproc) &&
		    curproc->p_zone->zone_phys_mcap != 0)
			return (MIN(btop(curproc->p_zone->zone_phys_mcap),
			    physinstalled));

		return (physinstalled);

	case _CONFIG_AVPHYS_PAGES:
		/*
		 * If the non-global zone has a phys. memory cap, use
		 * the phys. memory cap - zone's current rss.  We always
		 * report the system-wide value for the global zone, even
		 * though rcapd can be used on the global zone too.
		 */
		if (!INGLOBALZONE(curproc) &&
		    curproc->p_zone->zone_phys_mcap != 0) {
			pgcnt_t cap, rss, free;
			vmusage_t in_use;
			size_t cnt = 1;

			cap = btop(curproc->p_zone->zone_phys_mcap);
			if (cap > physinstalled)
				return (freemem);

			if (vm_getusage(VMUSAGE_ZONE, 1, &in_use, &cnt,
			    FKIOCTL) != 0)
				in_use.vmu_rss_all = 0;
			rss = btop(in_use.vmu_rss_all);
			/*
			 * Because rcapd implements a soft cap, it is possible
			 * for rss to be temporarily over the cap.
			 */
			if (cap > rss)
				free = cap - rss;
			else
				free = 0;
			return (MIN(free, freemem));
		}

		return (freemem);

	case _CONFIG_MAXPID:
		return (maxpid);

	case _CONFIG_CPUID_MAX:
		return (max_cpuid);

	case _CONFIG_EPHID_MAX:
		return (MAXEPHUID);

	case _CONFIG_SYMLOOP_MAX:
		return (MAXSYMLINKS);
	}
}
示例#28
0
/*
 * Called by proc_exit() when a zone's init exits, presumably because
 * it failed.  As long as the given zone is still in the "running"
 * state, we will re-exec() init, but first we need to reset things
 * which are usually inherited across exec() but will break init's
 * assumption that it is being exec()'d from a virgin process.  Most
 * importantly this includes closing all file descriptors (exec only
 * closes those marked close-on-exec) and resetting signals (exec only
 * resets handled signals, and we need to clear any signals which
 * killed init).  Anything else that exec(2) says would be inherited,
 * but would affect the execution of init, needs to be reset.
 */
static int
restart_init(int what, int why)
{
	kthread_t *t = curthread;
	klwp_t *lwp = ttolwp(t);
	proc_t *p = ttoproc(t);
	user_t *up = PTOU(p);

	vnode_t *oldcd, *oldrd;
	int i, err;
	char reason_buf[64];

	/*
	 * Let zone admin (and global zone admin if this is for a non-global
	 * zone) know that init has failed and will be restarted.
	 */
	zcmn_err(p->p_zone->zone_id, CE_WARN,
	    "init(1M) %s: restarting automatically",
	    exit_reason(reason_buf, sizeof (reason_buf), what, why));

	if (!INGLOBALZONE(p)) {
		cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
		    "restarting automatically",
		    p->p_zone->zone_name, p->p_pid, reason_buf);
	}

	/*
	 * Remove any fpollinfo_t's for this (last) thread from our file
	 * descriptors so closeall() can ASSERT() that they're all gone.
	 * Then close all open file descriptors in the process.
	 */
	pollcleanup();
	closeall(P_FINFO(p));

	/*
	 * Grab p_lock and begin clearing miscellaneous global process
	 * state that needs to be reset before we exec the new init(1M).
	 */

	mutex_enter(&p->p_lock);
	prbarrier(p);

	p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
	up->u_cmask = CMASK;

	sigemptyset(&t->t_hold);
	sigemptyset(&t->t_sig);
	sigemptyset(&t->t_extsig);

	sigemptyset(&p->p_sig);
	sigemptyset(&p->p_extsig);

	sigdelq(p, t, 0);
	sigdelq(p, NULL, 0);

	if (p->p_killsqp) {
		siginfofree(p->p_killsqp);
		p->p_killsqp = NULL;
	}

	/*
	 * Reset any signals that are ignored back to the default disposition.
	 * Other u_signal members will be cleared when exec calls sigdefault().
	 */
	for (i = 1; i < NSIG; i++) {
		if (up->u_signal[i - 1] == SIG_IGN) {
			up->u_signal[i - 1] = SIG_DFL;
			sigemptyset(&up->u_sigmask[i - 1]);
		}
	}

	/*
	 * Clear the current signal, any signal info associated with it, and
	 * any signal information from contracts and/or contract templates.
	 */
	lwp->lwp_cursig = 0;
	lwp->lwp_extsig = 0;
	if (lwp->lwp_curinfo != NULL) {
		siginfofree(lwp->lwp_curinfo);
		lwp->lwp_curinfo = NULL;
	}
	lwp_ctmpl_clear(lwp);

	/*
	 * Reset both the process root directory and the current working
	 * directory to the root of the zone just as we do during boot.
	 */
	VN_HOLD(p->p_zone->zone_rootvp);
	oldrd = up->u_rdir;
	up->u_rdir = p->p_zone->zone_rootvp;

	VN_HOLD(p->p_zone->zone_rootvp);
	oldcd = up->u_cdir;
	up->u_cdir = p->p_zone->zone_rootvp;

	if (up->u_cwd != NULL) {
		refstr_rele(up->u_cwd);
		up->u_cwd = NULL;
	}

	mutex_exit(&p->p_lock);

	if (oldrd != NULL)
		VN_RELE(oldrd);
	if (oldcd != NULL)
		VN_RELE(oldcd);

	/* Free the controlling tty.  (freectty() always assumes curproc.) */
	ASSERT(p == curproc);
	(void) freectty(B_TRUE);

	/*
	 * Now exec() the new init(1M) on top of the current process.  If we
	 * succeed, the caller will treat this like a successful system call.
	 * If we fail, we issue messages and the caller will proceed with exit.
	 */
	err = exec_init(p->p_zone->zone_initname, NULL);

	if (err == 0)
		return (0);

	zcmn_err(p->p_zone->zone_id, CE_WARN,
	    "failed to restart init(1M) (err=%d): system reboot required", err);

	if (!INGLOBALZONE(p)) {
		cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
		    "(pid %d, err=%d): zoneadm(1M) boot required",
		    p->p_zone->zone_name, p->p_pid, err);
	}

	return (-1);
}
示例#29
0
/*
 * Return value:
 *   1 - exitlwps() failed, call (or continue) lwp_exit()
 *   0 - restarting init.  Return through system call path
 */
int
proc_exit(int why, int what)
{
	kthread_t *t = curthread;
	klwp_t *lwp = ttolwp(t);
	proc_t *p = ttoproc(t);
	zone_t *z = p->p_zone;
	timeout_id_t tmp_id;
	int rv;
	proc_t *q;
	task_t *tk;
	vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
	sigqueue_t *sqp;
	lwpdir_t *lwpdir;
	uint_t lwpdir_sz;
	tidhash_t *tidhash;
	uint_t tidhash_sz;
	ret_tidhash_t *ret_tidhash;
	refstr_t *cwd;
	hrtime_t hrutime, hrstime;
	int evaporate;

	/*
	 * Stop and discard the process's lwps except for the current one,
	 * unless some other lwp beat us to it.  If exitlwps() fails then
	 * return and the calling lwp will call (or continue in) lwp_exit().
	 */
	proc_is_exiting(p);
	if (exitlwps(0) != 0)
		return (1);

	mutex_enter(&p->p_lock);
	if (p->p_ttime > 0) {
		/*
		 * Account any remaining ticks charged to this process
		 * on its way out.
		 */
		(void) task_cpu_time_incr(p->p_task, p->p_ttime);
		p->p_ttime = 0;
	}
	mutex_exit(&p->p_lock);

	DTRACE_PROC(lwp__exit);
	DTRACE_PROC1(exit, int, why);

	/*
	 * Will perform any brand specific proc exit processing, since this
	 * is always the last lwp, will also perform lwp_exit and free brand
	 * data
	 */
	if (PROC_IS_BRANDED(p)) {
		lwp_detach_brand_hdlrs(lwp);
		brand_clearbrand(p, B_FALSE);
	}

	/*
	 * Don't let init exit unless zone_start_init() failed its exec, or
	 * we are shutting down the zone or the machine.
	 *
	 * Since we are single threaded, we don't need to lock the
	 * following accesses to zone_proc_initpid.
	 */
	if (p->p_pid == z->zone_proc_initpid) {
		if (z->zone_boot_err == 0 &&
		    zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN &&
		    z->zone_restart_init == B_TRUE &&
		    restart_init(what, why) == 0)
			return (0);
		/*
		 * Since we didn't or couldn't restart init, we clear
		 * the zone's init state and proceed with exit
		 * processing.
		 */
		z->zone_proc_initpid = -1;
	}

	lwp_pcb_exit();

	/*
	 * Allocate a sigqueue now, before we grab locks.
	 * It will be given to sigcld(), below.
	 * Special case:  If we will be making the process disappear
	 * without a trace because it is either:
	 *	* an exiting SSYS process, or
	 *	* a posix_spawn() vfork child who requests it,
	 * we don't bother to allocate a useless sigqueue.
	 */
	evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
	    why == CLD_EXITED && what == _EVAPORATE);
	if (!evaporate)
		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);

	/*
	 * revoke any doors created by the process.
	 */
	if (p->p_door_list)
		door_exit();

	/*
	 * Release schedctl data structures.
	 */
	if (p->p_pagep)
		schedctl_proc_cleanup();

	/*
	 * make sure all pending kaio has completed.
	 */
	if (p->p_aio)
		aio_cleanup_exit();

	/*
	 * discard the lwpchan cache.
	 */
	if (p->p_lcp != NULL)
		lwpchan_destroy_cache(0);

	/*
	 * Clean up any DTrace helper actions or probes for the process.
	 */
	if (p->p_dtrace_helpers != NULL) {
		ASSERT(dtrace_helpers_cleanup != NULL);
		(*dtrace_helpers_cleanup)();
	}

	/* untimeout the realtime timers */
	if (p->p_itimer != NULL)
		timer_exit();

	if ((tmp_id = p->p_alarmid) != 0) {
		p->p_alarmid = 0;
		(void) untimeout(tmp_id);
	}

	/*
	 * Remove any fpollinfo_t's for this (last) thread from our file
	 * descriptors so closeall() can ASSERT() that they're all gone.
	 */
	pollcleanup();

	if (p->p_rprof_cyclic != CYCLIC_NONE) {
		mutex_enter(&cpu_lock);
		cyclic_remove(p->p_rprof_cyclic);
		mutex_exit(&cpu_lock);
	}

	mutex_enter(&p->p_lock);

	/*
	 * Clean up any DTrace probes associated with this process.
	 */
	if (p->p_dtrace_probes) {
		ASSERT(dtrace_fasttrap_exit_ptr != NULL);
		dtrace_fasttrap_exit_ptr(p);
	}

	while ((tmp_id = p->p_itimerid) != 0) {
		p->p_itimerid = 0;
		mutex_exit(&p->p_lock);
		(void) untimeout(tmp_id);
		mutex_enter(&p->p_lock);
	}

	lwp_cleanup();

	/*
	 * We are about to exit; prevent our resource associations from
	 * being changed.
	 */
	pool_barrier_enter();

	/*
	 * Block the process against /proc now that we have really
	 * acquired p->p_lock (to manipulate p_tlist at least).
	 */
	prbarrier(p);

	sigfillset(&p->p_ignore);
	sigemptyset(&p->p_siginfo);
	sigemptyset(&p->p_sig);
	sigemptyset(&p->p_extsig);
	sigemptyset(&t->t_sig);
	sigemptyset(&t->t_extsig);
	sigemptyset(&p->p_sigmask);
	sigdelq(p, t, 0);
	lwp->lwp_cursig = 0;
	lwp->lwp_extsig = 0;
	p->p_flag &= ~(SKILLED | SEXTKILLED);
	if (lwp->lwp_curinfo) {
		siginfofree(lwp->lwp_curinfo);
		lwp->lwp_curinfo = NULL;
	}

	t->t_proc_flag |= TP_LWPEXIT;
	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
	prlwpexit(t);		/* notify /proc */
	lwp_hash_out(p, t->t_tid);
	prexit(p);

	p->p_lwpcnt = 0;
	p->p_tlist = NULL;
	sigqfree(p);
	term_mstate(t);
	p->p_mterm = gethrtime();

	exec_vp = p->p_exec;
	execdir_vp = p->p_execdir;
	p->p_exec = NULLVP;
	p->p_execdir = NULLVP;
	mutex_exit(&p->p_lock);

	pr_free_watched_pages(p);

	closeall(P_FINFO(p));

	/* Free the controlling tty.  (freectty() always assumes curproc.) */
	ASSERT(p == curproc);
	(void) freectty(B_TRUE);

#if defined(__sparc)
	if (p->p_utraps != NULL)
		utrap_free(p);
#endif
	if (p->p_semacct)			/* IPC semaphore exit */
		semexit(p);
	rv = wstat(why, what);

	acct(rv & 0xff);
	exacct_commit_proc(p, rv);

	/*
	 * Release any resources associated with C2 auditing
	 */
	if (AU_AUDITING()) {
		/*
		 * audit exit system call
		 */
		audit_exit(why, what);
	}

	/*
	 * Free address space.
	 */
	relvm();

	if (exec_vp) {
		/*
		 * Close this executable which has been opened when the process
		 * was created by getproc().
		 */
		(void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
		VN_RELE(exec_vp);
	}
	if (execdir_vp)
		VN_RELE(execdir_vp);

	/*
	 * Release held contracts.
	 */
	contract_exit(p);

	/*
	 * Depart our encapsulating process contract.
	 */
	if ((p->p_flag & SSYS) == 0) {
		ASSERT(p->p_ct_process);
		contract_process_exit(p->p_ct_process, p, rv);
	}

	/*
	 * Remove pool association, and block if requested by pool_do_bind.
	 */
	mutex_enter(&p->p_lock);
	ASSERT(p->p_pool->pool_ref > 0);
	atomic_add_32(&p->p_pool->pool_ref, -1);
	p->p_pool = pool_default;
	/*
	 * Now that our address space has been freed and all other threads
	 * in this process have exited, set the PEXITED pool flag.  This
	 * tells the pools subsystems to ignore this process if it was
	 * requested to rebind this process to a new pool.
	 */
	p->p_poolflag |= PEXITED;
	pool_barrier_exit();
	mutex_exit(&p->p_lock);

	mutex_enter(&pidlock);

	/*
	 * Delete this process from the newstate list of its parent. We
	 * will put it in the right place in the sigcld in the end.
	 */
	delete_ns(p->p_parent, p);

	/*
	 * Reassign the orphans to the next of kin.
	 * Don't rearrange init's orphanage.
	 */
	if ((q = p->p_orphan) != NULL && p != proc_init) {

		proc_t *nokp = p->p_nextofkin;

		for (;;) {
			q->p_nextofkin = nokp;
			if (q->p_nextorph == NULL)
				break;
			q = q->p_nextorph;
		}
		q->p_nextorph = nokp->p_orphan;
		nokp->p_orphan = p->p_orphan;
		p->p_orphan = NULL;
	}

	/*
	 * Reassign the children to init.
	 * Don't try to assign init's children to init.
	 */
	if ((q = p->p_child) != NULL && p != proc_init) {
		struct proc	*np;
		struct proc	*initp = proc_init;
		boolean_t	setzonetop = B_FALSE;

		if (!INGLOBALZONE(curproc))
			setzonetop = B_TRUE;

		pgdetach(p);

		do {
			np = q->p_sibling;
			/*
			 * Delete it from its current parent new state
			 * list and add it to init new state list
			 */
			delete_ns(q->p_parent, q);

			q->p_ppid = 1;
			q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
			if (setzonetop) {
				mutex_enter(&q->p_lock);
				q->p_flag |= SZONETOP;
				mutex_exit(&q->p_lock);
			}
			q->p_parent = initp;

			/*
			 * Since q will be the first child,
			 * it will not have a previous sibling.
			 */
			q->p_psibling = NULL;
			if (initp->p_child) {
				initp->p_child->p_psibling = q;
			}
			q->p_sibling = initp->p_child;
			initp->p_child = q;
			if (q->p_proc_flag & P_PR_PTRACE) {
				mutex_enter(&q->p_lock);
				sigtoproc(q, NULL, SIGKILL);
				mutex_exit(&q->p_lock);
			}
			/*
			 * sigcld() will add the child to parents
			 * newstate list.
			 */
			if (q->p_stat == SZOMB)
				sigcld(q, NULL);
		} while ((q = np) != NULL);

		p->p_child = NULL;
		ASSERT(p->p_child_ns == NULL);
	}

	TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);

	mutex_enter(&p->p_lock);
	CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */

	/*
	 * Have our task accummulate our resource usage data before they
	 * become contaminated by p_cacct etc., and before we renounce
	 * membership of the task.
	 *
	 * We do this regardless of whether or not task accounting is active.
	 * This is to avoid having nonsense data reported for this task if
	 * task accounting is subsequently enabled. The overhead is minimal;
	 * by this point, this process has accounted for the usage of all its
	 * LWPs. We nonetheless do the work here, and under the protection of
	 * pidlock, so that the movement of the process's usage to the task
	 * happens at the same time as the removal of the process from the
	 * task, from the point of view of exacct_snapshot_task_usage().
	 */
	exacct_update_task_mstate(p);

	hrutime = mstate_aggr_state(p, LMS_USER);
	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
	p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
	p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;

	p->p_acct[LMS_USER]	+= p->p_cacct[LMS_USER];
	p->p_acct[LMS_SYSTEM]	+= p->p_cacct[LMS_SYSTEM];
	p->p_acct[LMS_TRAP]	+= p->p_cacct[LMS_TRAP];
	p->p_acct[LMS_TFAULT]	+= p->p_cacct[LMS_TFAULT];
	p->p_acct[LMS_DFAULT]	+= p->p_cacct[LMS_DFAULT];
	p->p_acct[LMS_KFAULT]	+= p->p_cacct[LMS_KFAULT];
	p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
	p->p_acct[LMS_SLEEP]	+= p->p_cacct[LMS_SLEEP];
	p->p_acct[LMS_WAIT_CPU]	+= p->p_cacct[LMS_WAIT_CPU];
	p->p_acct[LMS_STOPPED]	+= p->p_cacct[LMS_STOPPED];

	p->p_ru.minflt	+= p->p_cru.minflt;
	p->p_ru.majflt	+= p->p_cru.majflt;
	p->p_ru.nswap	+= p->p_cru.nswap;
	p->p_ru.inblock	+= p->p_cru.inblock;
	p->p_ru.oublock	+= p->p_cru.oublock;
	p->p_ru.msgsnd	+= p->p_cru.msgsnd;
	p->p_ru.msgrcv	+= p->p_cru.msgrcv;
	p->p_ru.nsignals += p->p_cru.nsignals;
	p->p_ru.nvcsw	+= p->p_cru.nvcsw;
	p->p_ru.nivcsw	+= p->p_cru.nivcsw;
	p->p_ru.sysc	+= p->p_cru.sysc;
	p->p_ru.ioch	+= p->p_cru.ioch;

	p->p_stat = SZOMB;
	p->p_proc_flag &= ~P_PR_PTRACE;
	p->p_wdata = what;
	p->p_wcode = (char)why;

	cdir = PTOU(p)->u_cdir;
	rdir = PTOU(p)->u_rdir;
	cwd = PTOU(p)->u_cwd;

	ASSERT(cdir != NULL || p->p_parent == &p0);

	/*
	 * Release resource controls, as they are no longer enforceable.
	 */
	rctl_set_free(p->p_rctls);

	/*
	 * Decrement tk_nlwps counter for our task.max-lwps resource control.
	 * An extended accounting record, if that facility is active, is
	 * scheduled to be written.  We cannot give up task and project
	 * membership at this point because that would allow zombies to escape
	 * from the max-processes resource controls.  Zombies stay in their
	 * current task and project until the process table slot is released
	 * in freeproc().
	 */
	tk = p->p_task;

	mutex_enter(&p->p_zone->zone_nlwps_lock);
	tk->tk_nlwps--;
	tk->tk_proj->kpj_nlwps--;
	p->p_zone->zone_nlwps--;
	mutex_exit(&p->p_zone->zone_nlwps_lock);

	/*
	 * Clear the lwp directory and the lwpid hash table
	 * now that /proc can't bother us any more.
	 * We free the memory below, after dropping p->p_lock.
	 */
	lwpdir = p->p_lwpdir;
	lwpdir_sz = p->p_lwpdir_sz;
	tidhash = p->p_tidhash;
	tidhash_sz = p->p_tidhash_sz;
	ret_tidhash = p->p_ret_tidhash;
	p->p_lwpdir = NULL;
	p->p_lwpfree = NULL;
	p->p_lwpdir_sz = 0;
	p->p_tidhash = NULL;
	p->p_tidhash_sz = 0;
	p->p_ret_tidhash = NULL;

	/*
	 * If the process has context ops installed, call the exit routine
	 * on behalf of this last remaining thread. Normally exitpctx() is
	 * called during thread_exit() or lwp_exit(), but because this is the
	 * last thread in the process, we must call it here. By the time
	 * thread_exit() is called (below), the association with the relevant
	 * process has been lost.
	 *
	 * We also free the context here.
	 */
	if (p->p_pctx) {
		kpreempt_disable();
		exitpctx(p);
		kpreempt_enable();

		freepctx(p, 0);
	}

	/*
	 * curthread's proc pointer is changed to point to the 'sched'
	 * process for the corresponding zone, except in the case when
	 * the exiting process is in fact a zsched instance, in which
	 * case the proc pointer is set to p0.  We do so, so that the
	 * process still points at the right zone when we call the VN_RELE()
	 * below.
	 *
	 * This is because curthread's original proc pointer can be freed as
	 * soon as the child sends a SIGCLD to its parent.  We use zsched so
	 * that for user processes, even in the final moments of death, the
	 * process is still associated with its zone.
	 */
	if (p != t->t_procp->p_zone->zone_zsched)
		t->t_procp = t->t_procp->p_zone->zone_zsched;
	else
		t->t_procp = &p0;

	mutex_exit(&p->p_lock);
	if (!evaporate) {
		p->p_pidflag &= ~CLDPEND;
		sigcld(p, sqp);
	} else {
		/*
		 * Do what sigcld() would do if the disposition
		 * of the SIGCHLD signal were set to be ignored.
		 */
		cv_broadcast(&p->p_srwchan_cv);
		freeproc(p);
	}
	mutex_exit(&pidlock);

	/*
	 * We don't release u_cdir and u_rdir until SZOMB is set.
	 * This protects us against dofusers().
	 */
	if (cdir)
		VN_RELE(cdir);
	if (rdir)
		VN_RELE(rdir);
	if (cwd)
		refstr_rele(cwd);

	/*
	 * task_rele() may ultimately cause the zone to go away (or
	 * may cause the last user process in a zone to go away, which
	 * signals zsched to go away).  So prior to this call, we must
	 * no longer point at zsched.
	 */
	t->t_procp = &p0;

	kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
	kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
	while (ret_tidhash != NULL) {
		ret_tidhash_t *next = ret_tidhash->rth_next;
		kmem_free(ret_tidhash->rth_tidhash,
		    ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
		kmem_free(ret_tidhash, sizeof (*ret_tidhash));
		ret_tidhash = next;
	}

	thread_exit();
	/* NOTREACHED */
}
示例#30
0
int
clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family,
    rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl)
{
	CLIENT *h;
	struct cku_private *p;
	struct rpc_msg call_msg;
	rdma_registry_t *rp;

	ASSERT(INGLOBALZONE(curproc));

	if (cl == NULL)
		return (EINVAL);
	*cl = NULL;

	p = kmem_zalloc(sizeof (*p), KM_SLEEP);

	/*
	 * Find underlying RDMATF plugin
	 */
	rw_enter(&rdma_lock, RW_READER);
	rp = rdma_mod_head;
	while (rp != NULL) {
		if (strcmp(rp->r_mod->rdma_api, proto))
			rp = rp->r_next;
		else {
			p->cku_rd_mod = rp->r_mod;
			p->cku_rd_handle = handle;
			break;
		}
	}
	rw_exit(&rdma_lock);

	if (p->cku_rd_mod == NULL) {
		/*
		 * Should not happen.
		 * No matching RDMATF plugin.
		 */
		kmem_free(p, sizeof (struct cku_private));
		return (EINVAL);
	}

	h = ptoh(p);
	h->cl_ops = &rdma_clnt_ops;
	h->cl_private = (caddr_t)p;
	h->cl_auth = authkern_create();

	/* call message, just used to pre-serialize below */
	call_msg.rm_xid = 0;
	call_msg.rm_direction = CALL;
	call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
	call_msg.rm_call.cb_prog = pgm;
	call_msg.rm_call.cb_vers = vers;

	xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE);
	/* pre-serialize call message header */
	if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
		XDR_DESTROY(&p->cku_outxdr);
		auth_destroy(h->cl_auth);
		kmem_free(p, sizeof (struct cku_private));
		return (EINVAL);
	}

	/*
	 * Set up the rpc information
	 */
	p->cku_cred = cred;
	p->cku_srcaddr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
	p->cku_srcaddr.maxlen = raddr->maxlen;
	p->cku_srcaddr.len = 0;
	p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
	p->cku_addr.maxlen = raddr->maxlen;
	p->cku_addr.len = raddr->len;
	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
	p->cku_addrfmly = family;

	*cl = h;
	return (0);
}