/* ARGSUSED */ static int setcond(caddr_t data) { int auditstate; au_kcontext_t *kctx; if (!(audit_policy & AUDIT_PERZONE) && (!INGLOBALZONE(curproc))) return (EINVAL); kctx = GET_KCTX_NGZ; if (copyin(data, &auditstate, sizeof (int))) return (EFAULT); switch (auditstate) { case AUC_AUDITING: /* Turn auditing on */ if (audit_active == C2AUDIT_UNLOADED) audit_init_module(); kctx->auk_auditstate = AUC_AUDITING; if (!(audit_policy & AUDIT_PERZONE) && INGLOBALZONE(curproc)) set_all_zone_usr_proc_sys(ALL_ZONES); else set_all_zone_usr_proc_sys(curproc->p_zone->zone_id); break; case AUC_NOAUDIT: /* Turn auditing off */ if (kctx->auk_auditstate == AUC_NOAUDIT) break; kctx->auk_auditstate = AUC_NOAUDIT; /* clear out the audit queue */ mutex_enter(&(kctx->auk_queue.lock)); if (kctx->auk_queue.wt_block) cv_broadcast(&(kctx->auk_queue.write_cv)); /* unblock au_output_thread */ cv_broadcast(&(kctx->auk_queue.read_cv)); mutex_exit(&(kctx->auk_queue.lock)); break; default: return (EINVAL); } return (0); }
/* * Called from start_init_common(), to set init's core file path and content. */ void init_core(void) { struct core_globals *cg; /* * The first time we hit this, in the global zone, we have to * initialize the zsd key. */ if (INGLOBALZONE(curproc)) { zone_key_create(&core_zone_key, core_init_zone, NULL, core_free_zone); } /* * zone_key_create will have called core_init_zone for the * global zone, which sets up the default path and content * variables. */ VERIFY((cg = zone_getspecific(core_zone_key, curproc->p_zone)) != NULL); corectl_path_hold(cg->core_default_path); corectl_content_hold(cg->core_default_content); curproc->p_corefile = cg->core_default_path; curproc->p_content = cg->core_default_content; }
/* * Transfer specified CPUs between processor sets. */ int pool_pset_xtransfer(psetid_t src, psetid_t dst, size_t size, id_t *ids) { struct cpu *cpu; int ret = 0; int id; ASSERT(pool_lock_held()); ASSERT(INGLOBALZONE(curproc)); if (size == 0 || size > max_ncpus) /* quick sanity check */ return (EINVAL); mutex_enter(&cpu_lock); for (id = 0; id < size; id++) { if ((cpu = cpu_get((processorid_t)ids[id])) == NULL || cpupart_query_cpu(cpu) != src) { ret = EINVAL; break; } if ((ret = cpupart_attach_cpu(dst, cpu, 1)) != 0) break; } mutex_exit(&cpu_lock); if (ret == 0) pool_pset_mod = gethrtime(); return (ret); }
/* * Enable processor set plugin. */ int pool_pset_enable(void) { int error; nvlist_t *props; ASSERT(pool_lock_held()); ASSERT(INGLOBALZONE(curproc)); /* * Can't enable pools if there are existing cpu partitions. */ mutex_enter(&cpu_lock); if (cp_numparts > 1) { mutex_exit(&cpu_lock); return (EEXIST); } /* * We want to switch things such that everything that was tagged with * the special ALL_ZONES token now is explicitly visible to all zones: * first add individual zones to the visibility list then remove the * special "ALL_ZONES" token. There must only be the default pset * (PS_NONE) active if pools are being enabled, so we only need to * deal with it. * * We want to make pool_pset_enabled() start returning B_TRUE before * we call any of the visibility update functions. */ global_zone->zone_psetid = PS_NONE; /* * We need to explicitly handle the global zone since * zone_pset_set() won't modify it. */ pool_pset_visibility_add(PS_NONE, global_zone); /* * A NULL argument means the ALL_ZONES token. */ pool_pset_visibility_remove(PS_NONE, NULL); error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE); ASSERT(error == 0); /* * It is safe to drop cpu_lock here. We're still * holding pool_lock so no new cpu partitions can * be created while we're here. */ mutex_exit(&cpu_lock); (void) nvlist_alloc(&pool_pset_default->pset_props, NV_UNIQUE_NAME, KM_SLEEP); props = pool_pset_default->pset_props; (void) nvlist_add_string(props, "pset.name", "pset_default"); (void) nvlist_add_string(props, "pset.comment", ""); (void) nvlist_add_int64(props, "pset.sys_id", PS_NONE); (void) nvlist_add_string(props, "pset.units", "population"); (void) nvlist_add_byte(props, "pset.default", 1); (void) nvlist_add_uint64(props, "pset.max", 65536); (void) nvlist_add_uint64(props, "pset.min", 1); pool_pset_mod = pool_cpu_mod = gethrtime(); return (0); }
/* ARGSUSED */ static int pool_pset_cpu_setup(cpu_setup_t what, int id, void *arg) { processorid_t cpuid = id; struct setup_arg sarg; int error; cpu_t *c; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(INGLOBALZONE(curproc)); if (!pool_pset_enabled()) return (0); if (what != CPU_CONFIG && what != CPU_UNCONFIG && what != CPU_ON && what != CPU_OFF && what != CPU_CPUPART_IN && what != CPU_CPUPART_OUT) return (0); c = cpu_get(cpuid); ASSERT(c != NULL); sarg.psetid = cpupart_query_cpu(c); sarg.cpu = c; sarg.what = what; error = zone_walk(pool_pset_setup_cb, &sarg); ASSERT(error == 0); return (0); }
/* * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache, * and we don't want to allow the local zone to see all the pools anyway. * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration * information for all pool visible within the zone. */ nvlist_t * spa_all_configs(uint64_t *generation) { nvlist_t *pools; spa_t *spa = NULL; if (*generation == spa_config_generation) return (NULL); pools = fnvlist_alloc(); mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { if (INGLOBALZONE(curproc) || zone_dataset_visible(spa_name(spa), NULL)) { mutex_enter(&spa->spa_props_lock); fnvlist_add_nvlist(pools, spa_name(spa), spa->spa_config); mutex_exit(&spa->spa_props_lock); } } *generation = spa_config_generation; mutex_exit(&spa_namespace_lock); return (pools); }
/* * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache, * and we don't want to allow the local zone to see all the pools anyway. * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration * information for all pool visible within the zone. */ nvlist_t * spa_all_configs(uint64_t *generation) { nvlist_t *pools; spa_t *spa; if (*generation == spa_config_generation) return (NULL); VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); spa = NULL; mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { if (INGLOBALZONE(curproc) || zone_dataset_visible(spa_name(spa), NULL)) { mutex_enter(&spa->spa_config_cache_lock); VERIFY(nvlist_add_nvlist(pools, spa_name(spa), spa->spa_config) == 0); mutex_exit(&spa->spa_config_cache_lock); } } mutex_exit(&spa_namespace_lock); *generation = spa_config_generation; return (pools); }
static char * spa_history_zone(void) { #ifdef _KERNEL if (INGLOBALZONE(curproc)) return (NULL); return (curproc->p_zone->zone_name); #else return (NULL); #endif }
task_t * task_hold_by_id(taskid_t id) { zoneid_t zoneid; if (INGLOBALZONE(curproc)) zoneid = ALL_ZONES; else zoneid = getzoneid(); return (task_hold_by_id_zone(id, zoneid)); }
/* * Find a CPU partition given a processor set ID if the processor set * should be visible from the calling zone. */ cpupart_t * cpupart_find(psetid_t psid) { cpupart_t *cp; ASSERT(MUTEX_HELD(&cpu_lock)); cp = cpupart_find_all(psid); if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() && zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id)) return (NULL); return (cp); }
/* * Set the global and local policy flags * * The global flags only make sense from the global zone; * the local flags depend on the AUDIT_PERZONE policy: * if the perzone policy is set, then policy is set separately * per zone, else held only in the global zone. * * The initial value of a local zone's policy flag is determined * by the value of the global zone's flags at the time the * local zone is created. * * While auditconfig(1M) allows setting and unsetting policies one bit * at a time, the mask passed in from auditconfig() is created by a * syscall to getpolicy and then modified based on the auditconfig() * cmd line, so the input policy value is used to replace the existing * policy. */ static int setpolicy(caddr_t data) { uint32_t policy; au_kcontext_t *kctx; if (copyin(data, &policy, sizeof (policy))) return (EFAULT); kctx = GET_KCTX_NGZ; if (INGLOBALZONE(curproc)) { if (policy & ~(AUDIT_GLOBAL | AUDIT_LOCAL)) return (EINVAL); audit_policy = policy & AUDIT_GLOBAL; } else { if (!(audit_policy & AUDIT_PERZONE)) return (EINVAL); if (policy & ~AUDIT_LOCAL) /* global bits are a no-no */ return (EINVAL); } kctx->auk_policy = policy & AUDIT_LOCAL; /* * auk_current_vp is NULL before auditd starts (or during early * auditd starup) or if auditd is halted; in either case, * notification of a policy change is not needed, since auditd * reads policy as it comes up. The error return from au_doormsg() * is ignored to avoid a race condition -- for example if auditd * segv's, the audit state may be "auditing" but the door may * be closed. Returning an error if the door is open makes it * impossible for Greenline to restart auditd. */ if (kctx->auk_current_vp != NULL) (void) au_doormsg(kctx, AU_DBUF_POLICY, &policy); /* * Wake up anyone who might have blocked on full audit * partitions. audit daemons need to set AUDIT_FULL when no * space so we can tell if we should start dropping records. */ mutex_enter(&(kctx->auk_queue.lock)); if ((policy & (AUDIT_CNT | AUDIT_SCNT) && (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater))) cv_broadcast(&(kctx->auk_queue.write_cv)); mutex_exit(&(kctx->auk_queue.lock)); return (0); }
/* * Disable processor set plugin. */ int pool_pset_disable(void) { processorid_t cpuid; cpu_t *cpu; int error; ASSERT(pool_lock_held()); ASSERT(INGLOBALZONE(curproc)); mutex_enter(&cpu_lock); if (cp_numparts > 1) { /* make sure only default pset is left */ mutex_exit(&cpu_lock); return (EBUSY); } /* * Remove all non-system CPU and processor set properties */ for (cpuid = 0; cpuid < NCPU; cpuid++) { if ((cpu = cpu_get(cpuid)) == NULL) continue; if (cpu->cpu_props != NULL) { (void) nvlist_free(cpu->cpu_props); cpu->cpu_props = NULL; } } /* * We want to switch things such that everything is now visible * to ALL_ZONES: first add the special "ALL_ZONES" token to the * visibility list then remove individual zones. There must * only be the default pset active if pools are being disabled, * so we only need to deal with it. */ error = zone_walk(pool_pset_zone_pset_set, (void *)ZONE_PS_INVAL); ASSERT(error == 0); pool_pset_visibility_add(PS_NONE, NULL); pool_pset_visibility_remove(PS_NONE, global_zone); /* * pool_pset_enabled() will henceforth return B_FALSE. */ global_zone->zone_psetid = ZONE_PS_INVAL; mutex_exit(&cpu_lock); if (pool_pset_default->pset_props != NULL) { nvlist_free(pool_pset_default->pset_props); pool_pset_default->pset_props = NULL; } return (0); }
static int setkmask(caddr_t data) { au_mask_t mask; au_kcontext_t *kctx; if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc)) return (EINVAL); kctx = GET_KCTX_NGZ; if (copyin(data, &mask, sizeof (au_mask_t))) return (EFAULT); kctx->auk_info.ai_namask = mask; return (0); }
void clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr, struct cred *cred) { struct cku_private *p = htop(h); rdma_registry_t *rp; ASSERT(INGLOBALZONE(curproc)); /* * Find underlying RDMATF plugin */ p->cku_rd_mod = NULL; rw_enter(&rdma_lock, RW_READER); rp = rdma_mod_head; while (rp != NULL) { if (strcmp(rp->r_mod->rdma_api, proto)) rp = rp->r_next; else { p->cku_rd_mod = rp->r_mod; p->cku_rd_handle = handle; break; } } rw_exit(&rdma_lock); /* * Set up the rpc information */ p->cku_cred = cred; p->cku_xid = 0; if (p->cku_addr.maxlen < raddr->len) { if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); p->cku_addr.maxlen = raddr->maxlen; } p->cku_srcaddr.len = 0; p->cku_addr.len = raddr->len; bcopy(raddr->buf, p->cku_addr.buf, raddr->len); h->cl_ops = &rdma_clnt_ops; }
/* * Put new CPU property. * Handle special case of "cpu.status". */ int pool_cpu_propput(processorid_t cpuid, nvpair_t *pair) { int ret = 0; cpu_t *cpu; ASSERT(pool_lock_held()); ASSERT(INGLOBALZONE(curproc)); if (nvpair_type(pair) == DATA_TYPE_STRING && strcmp(nvpair_name(pair), "cpu.status") == 0) { char *val; int status; int old_status; (void) nvpair_value_string(pair, &val); if (strcmp(val, PS_OFFLINE) == 0) status = P_OFFLINE; else if (strcmp(val, PS_ONLINE) == 0) status = P_ONLINE; else if (strcmp(val, PS_NOINTR) == 0) status = P_NOINTR; else if (strcmp(val, PS_FAULTED) == 0) status = P_FAULTED; else if (strcmp(val, PS_SPARE) == 0) status = P_SPARE; else return (EINVAL); ret = p_online_internal(cpuid, status, &old_status); } else { mutex_enter(&cpu_lock); if ((cpu = cpu_get(cpuid)) == NULL) ret = EINVAL; if (cpu->cpu_props == NULL) { (void) nvlist_alloc(&cpu->cpu_props, NV_UNIQUE_NAME, KM_SLEEP); (void) nvlist_add_string(cpu->cpu_props, "cpu.comment", ""); } ret = pool_propput_common(cpu->cpu_props, pair, pool_cpu_props); if (ret == 0) pool_cpu_mod = gethrtime(); mutex_exit(&cpu_lock); } return (ret); }
static int setclass(caddr_t data) { au_evclass_map_t event; au_kcontext_t *kctx; if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc)) return (EINVAL); kctx = GET_KCTX_NGZ; if (copyin(data, &event, sizeof (au_evclass_map_t))) return (EFAULT); if (event.ec_number > MAX_KEVENTS) return (EINVAL); kctx->auk_ets[event.ec_number] = event.ec_class; return (0); }
static int setstat(caddr_t data) { au_kcontext_t *kctx = GET_KCTX_PZ; au_stat_t au_stat; if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc)) return (EINVAL); if (copyin(data, &au_stat, sizeof (au_stat_t))) return (EFAULT); if (au_stat.as_generated == CLEAR_VAL) kctx->auk_statistics.as_generated = 0; if (au_stat.as_nonattrib == CLEAR_VAL) kctx->auk_statistics.as_nonattrib = 0; if (au_stat.as_kernel == CLEAR_VAL) kctx->auk_statistics.as_kernel = 0; if (au_stat.as_audit == CLEAR_VAL) kctx->auk_statistics.as_audit = 0; if (au_stat.as_auditctl == CLEAR_VAL) kctx->auk_statistics.as_auditctl = 0; if (au_stat.as_enqueue == CLEAR_VAL) kctx->auk_statistics.as_enqueue = 0; if (au_stat.as_written == CLEAR_VAL) kctx->auk_statistics.as_written = 0; if (au_stat.as_wblocked == CLEAR_VAL) kctx->auk_statistics.as_wblocked = 0; if (au_stat.as_rblocked == CLEAR_VAL) kctx->auk_statistics.as_rblocked = 0; if (au_stat.as_dropped == CLEAR_VAL) kctx->auk_statistics.as_dropped = 0; if (au_stat.as_totalsize == CLEAR_VAL) kctx->auk_statistics.as_totalsize = 0; membar_producer(); return (0); }
/* * Callback function used to apply a cpu configuration event to a zone. */ static int pool_pset_setup_cb(zone_t *zone, void *arg) { struct setup_arg *sa = arg; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(INGLOBALZONE(curproc)); ASSERT(zone != NULL); if (zone == global_zone) return (0); if (zone_pset_get(zone) != sa->psetid) return (0); /* ignore */ switch (sa->what) { case CPU_CONFIG: cpu_visibility_configure(sa->cpu, zone); break; case CPU_UNCONFIG: cpu_visibility_unconfigure(sa->cpu, zone); break; case CPU_ON: cpu_visibility_online(sa->cpu, zone); break; case CPU_OFF: cpu_visibility_offline(sa->cpu, zone); break; case CPU_CPUPART_IN: cpu_visibility_add(sa->cpu, zone); break; case CPU_CPUPART_OUT: cpu_visibility_remove(sa->cpu, zone); break; default: cmn_err(CE_PANIC, "invalid cpu_setup_t value %d", sa->what); } return (0); }
/* * Remove existing CPU property. */ int pool_cpu_proprm(processorid_t cpuid, char *name) { int ret; cpu_t *cpu; ASSERT(pool_lock_held()); ASSERT(INGLOBALZONE(curproc)); mutex_enter(&cpu_lock); if ((cpu = cpu_get(cpuid)) == NULL || cpu_is_poweredoff(cpu)) { ret = EINVAL; } else { if (cpu->cpu_props == NULL) ret = EINVAL; else ret = pool_proprm_common(cpu->cpu_props, name, pool_cpu_props); } if (ret == 0) pool_cpu_mod = gethrtime(); mutex_exit(&cpu_lock); return (ret); }
/*ARGSUSED*/ static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) { char *osname; pathname_t spn; int error = 0; uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE; int canwrite; if (mvp->v_type != VDIR) return (ENOTDIR); mutex_enter(&mvp->v_lock); if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 && (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { mutex_exit(&mvp->v_lock); return (EBUSY); } mutex_exit(&mvp->v_lock); /* * ZFS does not support passing unparsed data in via MS_DATA. * Users should use the MS_OPTIONSTR interface; this means * that all option parsing is already done and the options struct * can be interrogated. */ if ((uap->flags & MS_DATA) && uap->datalen > 0) return (EINVAL); /* * When doing a remount, we simply refresh our temporary properties * according to those options set in the current VFS options. */ if (uap->flags & MS_REMOUNT) { return (zfs_refresh_properties(vfsp)); } /* * Get the objset name (the "special" mount argument). */ if (error = pn_get(uap->spec, fromspace, &spn)) return (error); osname = spn.pn_path; if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) goto out; /* * Refuse to mount a filesystem if we are in a local zone and the * dataset is not visible. */ if (!INGLOBALZONE(curproc) && (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { error = EPERM; goto out; } error = zfs_domount(vfsp, osname, cr); out: pn_free(&spn); return (error); }
/* * Return list of active processor sets, up to a maximum indicated by * numpsets. The total number of processor sets is stored in the * location pointed to by numpsets. */ static int pset_list(psetid_t *psetlist, uint_t *numpsets) { uint_t user_npsets = 0; uint_t real_npsets; psetid_t *psets = NULL; int error = 0; if (numpsets != NULL) { if (copyin(numpsets, &user_npsets, sizeof (uint_t)) != 0) return (set_errno(EFAULT)); } /* * Get the list of all processor sets. First we need to find * out how many there are, so we can allocate a large enough * buffer. */ mutex_enter(&cpu_lock); if (!INGLOBALZONE(curproc) && pool_pset_enabled()) { psetid_t psetid = zone_pset_get(curproc->p_zone); if (psetid == PS_NONE) { real_npsets = 0; } else { real_npsets = 1; psets = kmem_alloc(real_npsets * sizeof (psetid_t), KM_SLEEP); psets[0] = psetid; } } else { real_npsets = cpupart_list(0, NULL, CP_ALL); if (real_npsets) { psets = kmem_alloc(real_npsets * sizeof (psetid_t), KM_SLEEP); (void) cpupart_list(psets, real_npsets, CP_ALL); } } mutex_exit(&cpu_lock); if (user_npsets > real_npsets) user_npsets = real_npsets; if (numpsets != NULL) { if (copyout(&real_npsets, numpsets, sizeof (uint_t)) != 0) error = EFAULT; else if (psetlist != NULL && user_npsets != 0) { if (copyout(psets, psetlist, user_npsets * sizeof (psetid_t)) != 0) error = EFAULT; } } if (real_npsets) kmem_free(psets, real_npsets * sizeof (psetid_t)); if (error) return (set_errno(error)); else return (0); }
static int pset_bind(psetid_t pset, idtype_t idtype, id_t id, psetid_t *opset) { kthread_t *tp; proc_t *pp; task_t *tk; kproject_t *kpj; contract_t *ct; zone_t *zptr; psetid_t oldpset; int error = 0; void *projbuf, *zonebuf; pool_lock(); if ((pset != PS_QUERY) && (pset != PS_SOFT) && (pset != PS_HARD) && (pset != PS_QUERY_TYPE)) { /* * Check if the set actually exists before checking * permissions. This is the historical error * precedence. Note that if pset was PS_MYID, the * cpupart_get_cpus call will change it to the * processor set id of the caller (or PS_NONE if the * caller is not bound to a processor set). */ if (pool_state == POOL_ENABLED) { pool_unlock(); return (set_errno(ENOTSUP)); } if (cpupart_get_cpus(&pset, NULL, NULL) != 0) { pool_unlock(); return (set_errno(EINVAL)); } else if (pset != PS_NONE && secpolicy_pset(CRED()) != 0) { pool_unlock(); return (set_errno(EPERM)); } } /* * Pre-allocate enough buffers for FSS for all active projects * and for all active zones on the system. Unused buffers will * be freed later by fss_freebuf(). */ mutex_enter(&cpu_lock); projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ); zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE); switch (idtype) { case P_LWPID: pp = curproc; mutex_enter(&pidlock); mutex_enter(&pp->p_lock); if (id == P_MYID) { tp = curthread; } else { if ((tp = idtot(pp, id)) == NULL) { mutex_exit(&pp->p_lock); mutex_exit(&pidlock); error = ESRCH; break; } } error = pset_bind_thread(tp, pset, &oldpset, projbuf, zonebuf); mutex_exit(&pp->p_lock); mutex_exit(&pidlock); break; case P_PID: mutex_enter(&pidlock); if (id == P_MYID) { pp = curproc; } else if ((pp = prfind(id)) == NULL) { mutex_exit(&pidlock); error = ESRCH; break; } error = pset_bind_process(pp, pset, &oldpset, projbuf, zonebuf); mutex_exit(&pidlock); break; case P_TASKID: mutex_enter(&pidlock); if (id == P_MYID) id = curproc->p_task->tk_tkid; if ((tk = task_hold_by_id(id)) == NULL) { mutex_exit(&pidlock); error = ESRCH; break; } error = pset_bind_task(tk, pset, &oldpset, projbuf, zonebuf); mutex_exit(&pidlock); task_rele(tk); break; case P_PROJID: pp = curproc; if (id == P_MYID) id = curprojid(); if ((kpj = project_hold_by_id(id, pp->p_zone, PROJECT_HOLD_FIND)) == NULL) { error = ESRCH; break; } mutex_enter(&pidlock); error = pset_bind_project(kpj, pset, &oldpset, projbuf, zonebuf); mutex_exit(&pidlock); project_rele(kpj); break; case P_ZONEID: if (id == P_MYID) id = getzoneid(); if ((zptr = zone_find_by_id(id)) == NULL) { error = ESRCH; break; } mutex_enter(&pidlock); error = pset_bind_zone(zptr, pset, &oldpset, projbuf, zonebuf); mutex_exit(&pidlock); zone_rele(zptr); break; case P_CTID: if (id == P_MYID) id = PRCTID(curproc); if ((ct = contract_type_ptr(process_type, id, curproc->p_zone->zone_uniqid)) == NULL) { error = ESRCH; break; } mutex_enter(&pidlock); error = pset_bind_contract(ct->ct_data, pset, &oldpset, projbuf, zonebuf); mutex_exit(&pidlock); contract_rele(ct); break; case P_PSETID: if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) { error = EINVAL; break; } error = pset_unbind(id, projbuf, zonebuf, idtype); break; case P_ALL: if (id == P_MYID || pset != PS_NONE || !INGLOBALZONE(curproc)) { error = EINVAL; break; } error = pset_unbind(PS_NONE, projbuf, zonebuf, idtype); break; default: error = EINVAL; break; } fss_freebuf(projbuf, FSS_ALLOC_PROJ); fss_freebuf(zonebuf, FSS_ALLOC_ZONE); mutex_exit(&cpu_lock); pool_unlock(); if (error != 0) return (set_errno(error)); if (opset != NULL) { if (copyout(&oldpset, opset, sizeof (psetid_t)) != 0) return (set_errno(EFAULT)); } return (0); }
/* * Check if user has requested permission. */ int dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr) { dsl_dataset_t *ds; dsl_dir_t *dd; dsl_pool_t *dp; void *cookie; int error; char checkflag; objset_t *mos; avl_tree_t permsets; perm_set_t *setnode; error = dsl_dataset_hold(dsname, FTAG, &ds); if (error) return (error); dp = ds->ds_dir->dd_pool; mos = dp->dp_meta_objset; if (dsl_delegation_on(mos) == B_FALSE) { dsl_dataset_rele(ds, FTAG); return (ECANCELED); } if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) < SPA_VERSION_DELEGATED_PERMS) { dsl_dataset_rele(ds, FTAG); return (EPERM); } if (dsl_dataset_is_snapshot(ds)) { /* * Snapshots are treated as descendents only, * local permissions do not apply. */ checkflag = ZFS_DELEG_DESCENDENT; } else { checkflag = ZFS_DELEG_LOCAL; } avl_create(&permsets, perm_set_compare, sizeof (perm_set_t), offsetof(perm_set_t, p_node)); rw_enter(&dp->dp_config_rwlock, RW_READER); for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent, checkflag = ZFS_DELEG_DESCENDENT) { uint64_t zapobj; boolean_t expanded; /* * If not in global zone then make sure * the zoned property is set */ if (!INGLOBALZONE(curproc)) { uint64_t zoned; if (dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_ZONED), 8, 1, &zoned, NULL) != 0) break; if (!zoned) break; } zapobj = dd->dd_phys->dd_deleg_zapobj; if (zapobj == 0) continue; dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr); again: expanded = B_FALSE; for (setnode = avl_first(&permsets); setnode; setnode = AVL_NEXT(&permsets, setnode)) { if (setnode->p_matched == B_TRUE) continue; /* See if this set directly grants this permission */ error = dsl_check_access(mos, zapobj, ZFS_DELEG_NAMED_SET, 0, setnode->p_setname, perm); if (error == 0) goto success; if (error == EPERM) setnode->p_matched = B_TRUE; /* See if this set includes other sets */ error = dsl_load_sets(mos, zapobj, ZFS_DELEG_NAMED_SET_SETS, 0, setnode->p_setname, &permsets); if (error == 0) setnode->p_matched = expanded = B_TRUE; } /* * If we expanded any sets, that will define more sets, * which we need to check. */ if (expanded) goto again; error = dsl_check_user_access(mos, zapobj, perm, checkflag, cr); if (error == 0) goto success; } error = EPERM; success: rw_exit(&dp->dp_config_rwlock); dsl_dataset_rele(ds, FTAG); cookie = NULL; while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL) kmem_free(setnode, sizeof (perm_set_t)); return (error); }
static int setsmask(caddr_t data) { STRUCT_DECL(auditinfo, user_info); struct proc *p; const auditinfo_addr_t *ainfo; model_t model; /* setsmask not applicable in non-global zones without perzone policy */ if (!(audit_policy & AUDIT_PERZONE) && (!INGLOBALZONE(curproc))) return (EINVAL); model = get_udatamodel(); STRUCT_INIT(user_info, model); if (copyin(data, STRUCT_BUF(user_info), STRUCT_SIZE(user_info))) return (EFAULT); mutex_enter(&pidlock); /* lock the process queue against updates */ for (p = practive; p != NULL; p = p->p_next) { cred_t *cr; /* if in non-global zone only modify processes in same zone */ if (!HASZONEACCESS(curproc, p->p_zone->zone_id)) continue; mutex_enter(&p->p_lock); /* so process doesn't go away */ /* skip system processes and ones being created or going away */ if (p->p_stat == SIDL || p->p_stat == SZOMB || (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) { mutex_exit(&p->p_lock); continue; } mutex_enter(&p->p_crlock); crhold(cr = p->p_cred); mutex_exit(&p->p_crlock); ainfo = crgetauinfo(cr); if (ainfo == NULL) { mutex_exit(&p->p_lock); crfree(cr); continue; } if (ainfo->ai_asid == STRUCT_FGET(user_info, ai_asid)) { au_mask_t mask; int err; /* * Here's a process which matches the specified asid. * If its mask doesn't already match the new mask, * save the new mask in the pad, to be picked up * next syscall. */ mask = STRUCT_FGET(user_info, ai_mask); err = bcmp(&mask, &ainfo->ai_mask, sizeof (au_mask_t)); crfree(cr); if (err != 0) { struct p_audit_data *pad = P2A(p); ASSERT(pad != NULL); mutex_enter(&(pad->pad_lock)); pad->pad_flags |= PAD_SETMASK; pad->pad_newmask = mask; mutex_exit(&(pad->pad_lock)); /* * No need to call set_proc_pre_sys(), since * t_pre_sys is ALWAYS on when audit is * enabled...due to syscall auditing. */ } } else { crfree(cr); } mutex_exit(&p->p_lock); } mutex_exit(&pidlock); return (0); }
static int setqctrl(caddr_t data) { au_kcontext_t *kctx; struct au_qctrl qctrl_tmp; STRUCT_DECL(au_qctrl, qctrl); STRUCT_INIT(qctrl, get_udatamodel()); if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc)) return (EINVAL); kctx = GET_KCTX_NGZ; if (copyin(data, STRUCT_BUF(qctrl), STRUCT_SIZE(qctrl))) return (EFAULT); qctrl_tmp.aq_hiwater = (size_t)STRUCT_FGET(qctrl, aq_hiwater); qctrl_tmp.aq_lowater = (size_t)STRUCT_FGET(qctrl, aq_lowater); qctrl_tmp.aq_bufsz = (size_t)STRUCT_FGET(qctrl, aq_bufsz); qctrl_tmp.aq_delay = (clock_t)STRUCT_FGET(qctrl, aq_delay); /* enforce sane values */ if (qctrl_tmp.aq_hiwater <= qctrl_tmp.aq_lowater) return (EINVAL); if (qctrl_tmp.aq_hiwater < AQ_LOWATER) return (EINVAL); if (qctrl_tmp.aq_hiwater > AQ_MAXHIGH) return (EINVAL); if (qctrl_tmp.aq_bufsz < AQ_BUFSZ) return (EINVAL); if (qctrl_tmp.aq_bufsz > AQ_MAXBUFSZ) return (EINVAL); if (qctrl_tmp.aq_delay == 0) return (EINVAL); if (qctrl_tmp.aq_delay > AQ_MAXDELAY) return (EINVAL); /* update everything at once so things are consistant */ mutex_enter(&(kctx->auk_queue.lock)); kctx->auk_queue.hiwater = qctrl_tmp.aq_hiwater; kctx->auk_queue.lowater = qctrl_tmp.aq_lowater; kctx->auk_queue.bufsz = qctrl_tmp.aq_bufsz; kctx->auk_queue.delay = qctrl_tmp.aq_delay; if (kctx->auk_queue.rd_block && kctx->auk_queue.cnt > kctx->auk_queue.lowater) cv_broadcast(&(kctx->auk_queue.read_cv)); if (kctx->auk_queue.wt_block && kctx->auk_queue.cnt < kctx->auk_queue.hiwater) cv_broadcast(&(kctx->auk_queue.write_cv)); mutex_exit(&(kctx->auk_queue.lock)); return (0); }
/* * the host address for AUDIT_PERZONE == 0 is that of the global * zone and for local zones it is of the current zone. */ static int setkaudit(caddr_t info_p, int len) { STRUCT_DECL(auditinfo_addr, info); model_t model; au_kcontext_t *kctx; if (!(audit_policy & AUDIT_PERZONE) && !INGLOBALZONE(curproc)) return (EINVAL); kctx = GET_KCTX_NGZ; model = get_udatamodel(); STRUCT_INIT(info, model); if (len < STRUCT_SIZE(info)) return (EOVERFLOW); if (copyin(info_p, STRUCT_BUF(info), STRUCT_SIZE(info))) return (EFAULT); if ((STRUCT_FGET(info, ai_termid.at_type) != AU_IPv4) && (STRUCT_FGET(info, ai_termid.at_type) != AU_IPv6)) return (EINVAL); /* Set audit mask, termid and session id as specified */ kctx->auk_info.ai_auid = STRUCT_FGET(info, ai_auid); kctx->auk_info.ai_namask = STRUCT_FGET(info, ai_mask); #ifdef _LP64 /* only convert to 64 bit if coming from a 32 bit binary */ if (model == DATAMODEL_ILP32) kctx->auk_info.ai_termid.at_port = DEVEXPL(STRUCT_FGET(info, ai_termid.at_port)); else kctx->auk_info.ai_termid.at_port = STRUCT_FGET(info, ai_termid.at_port); #else kctx->auk_info.ai_termid.at_port = STRUCT_FGET(info, ai_termid.at_port); #endif kctx->auk_info.ai_termid.at_type = STRUCT_FGET(info, ai_termid.at_type); bzero(&kctx->auk_info.ai_termid.at_addr[0], sizeof (kctx->auk_info.ai_termid.at_addr)); kctx->auk_info.ai_termid.at_addr[0] = STRUCT_FGET(info, ai_termid.at_addr[0]); kctx->auk_info.ai_termid.at_addr[1] = STRUCT_FGET(info, ai_termid.at_addr[1]); kctx->auk_info.ai_termid.at_addr[2] = STRUCT_FGET(info, ai_termid.at_addr[2]); kctx->auk_info.ai_termid.at_addr[3] = STRUCT_FGET(info, ai_termid.at_addr[3]); kctx->auk_info.ai_asid = STRUCT_FGET(info, ai_asid); if (kctx->auk_info.ai_termid.at_type == AU_IPv6 && IN6_IS_ADDR_V4MAPPED( ((in6_addr_t *)kctx->auk_info.ai_termid.at_addr))) { kctx->auk_info.ai_termid.at_type = AU_IPv4; kctx->auk_info.ai_termid.at_addr[0] = kctx->auk_info.ai_termid.at_addr[3]; kctx->auk_info.ai_termid.at_addr[1] = 0; kctx->auk_info.ai_termid.at_addr[2] = 0; kctx->auk_info.ai_termid.at_addr[3] = 0; } if (kctx->auk_info.ai_termid.at_type == AU_IPv6) kctx->auk_hostaddr_valid = IN6_IS_ADDR_UNSPECIFIED( (in6_addr_t *)kctx->auk_info.ai_termid.at_addr) ? 0 : 1; else kctx->auk_hostaddr_valid = (kctx->auk_info.ai_termid.at_addr[0] == htonl(INADDR_ANY)) ? 0 : 1; return (0); }
long sysconfig(int which) { switch (which) { /* * if it is not handled in mach_sysconfig either * it must be EINVAL. */ default: return (mach_sysconfig(which)); /* `uname -i`/os */ case _CONFIG_CLK_TCK: return ((long)hz); /* clock frequency per second */ case _CONFIG_PROF_TCK: return ((long)hz); /* profiling clock freq per sec */ case _CONFIG_NGROUPS: /* * Maximum number of supplementary groups. */ return (ngroups_max); case _CONFIG_OPEN_FILES: /* * Maximum number of open files (soft limit). */ { rlim64_t fd_ctl; mutex_enter(&curproc->p_lock); fd_ctl = rctl_enforced_value( rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc); mutex_exit(&curproc->p_lock); return ((ulong_t)fd_ctl); } case _CONFIG_CHILD_MAX: /* * Maximum number of processes. */ return (v.v_maxup); case _CONFIG_POSIX_VER: return (_POSIX_VERSION); /* current POSIX version */ case _CONFIG_PAGESIZE: return (PAGESIZE); case _CONFIG_XOPEN_VER: return (_XOPEN_VERSION); /* current XOPEN version */ case _CONFIG_NPROC_CONF: return (zone_ncpus_get(curproc->p_zone)); case _CONFIG_NPROC_ONLN: return (zone_ncpus_online_get(curproc->p_zone)); case _CONFIG_NPROC_MAX: return (max_ncpus); case _CONFIG_STACK_PROT: return (curproc->p_stkprot & ~PROT_USER); case _CONFIG_AIO_LISTIO_MAX: return (_AIO_LISTIO_MAX); case _CONFIG_AIO_MAX: return (_AIO_MAX); case _CONFIG_AIO_PRIO_DELTA_MAX: return (0); case _CONFIG_DELAYTIMER_MAX: return (INT_MAX); case _CONFIG_MQ_OPEN_MAX: return (_MQ_OPEN_MAX); case _CONFIG_MQ_PRIO_MAX: return (_MQ_PRIO_MAX); case _CONFIG_RTSIG_MAX: return (_SIGRTMAX - _SIGRTMIN + 1); case _CONFIG_SEM_NSEMS_MAX: return (_SEM_NSEMS_MAX); case _CONFIG_SEM_VALUE_MAX: return (_SEM_VALUE_MAX); case _CONFIG_SIGQUEUE_MAX: /* * Maximum number of outstanding queued signals. */ { rlim64_t sigqsz_max; mutex_enter(&curproc->p_lock); sigqsz_max = rctl_enforced_value(rc_process_sigqueue, curproc->p_rctls, curproc); mutex_exit(&curproc->p_lock); return ((uint_t)sigqsz_max); } case _CONFIG_SIGRT_MIN: return (_SIGRTMIN); case _CONFIG_SIGRT_MAX: return (_SIGRTMAX); case _CONFIG_TIMER_MAX: return (timer_max); case _CONFIG_PHYS_PAGES: /* * If the non-global zone has a phys. memory cap, use that. * We always report the system-wide value for the global zone, * even though rcapd can be used on the global zone too. */ if (!INGLOBALZONE(curproc) && curproc->p_zone->zone_phys_mcap != 0) return (MIN(btop(curproc->p_zone->zone_phys_mcap), physinstalled)); return (physinstalled); case _CONFIG_AVPHYS_PAGES: /* * If the non-global zone has a phys. memory cap, use * the phys. memory cap - zone's current rss. We always * report the system-wide value for the global zone, even * though rcapd can be used on the global zone too. */ if (!INGLOBALZONE(curproc) && curproc->p_zone->zone_phys_mcap != 0) { pgcnt_t cap, rss, free; vmusage_t in_use; size_t cnt = 1; cap = btop(curproc->p_zone->zone_phys_mcap); if (cap > physinstalled) return (freemem); if (vm_getusage(VMUSAGE_ZONE, 1, &in_use, &cnt, FKIOCTL) != 0) in_use.vmu_rss_all = 0; rss = btop(in_use.vmu_rss_all); /* * Because rcapd implements a soft cap, it is possible * for rss to be temporarily over the cap. */ if (cap > rss) free = cap - rss; else free = 0; return (MIN(free, freemem)); } return (freemem); case _CONFIG_MAXPID: return (maxpid); case _CONFIG_CPUID_MAX: return (max_cpuid); case _CONFIG_EPHID_MAX: return (MAXEPHUID); case _CONFIG_SYMLOOP_MAX: return (MAXSYMLINKS); } }
/* * Called by proc_exit() when a zone's init exits, presumably because * it failed. As long as the given zone is still in the "running" * state, we will re-exec() init, but first we need to reset things * which are usually inherited across exec() but will break init's * assumption that it is being exec()'d from a virgin process. Most * importantly this includes closing all file descriptors (exec only * closes those marked close-on-exec) and resetting signals (exec only * resets handled signals, and we need to clear any signals which * killed init). Anything else that exec(2) says would be inherited, * but would affect the execution of init, needs to be reset. */ static int restart_init(int what, int why) { kthread_t *t = curthread; klwp_t *lwp = ttolwp(t); proc_t *p = ttoproc(t); user_t *up = PTOU(p); vnode_t *oldcd, *oldrd; int i, err; char reason_buf[64]; /* * Let zone admin (and global zone admin if this is for a non-global * zone) know that init has failed and will be restarted. */ zcmn_err(p->p_zone->zone_id, CE_WARN, "init(1M) %s: restarting automatically", exit_reason(reason_buf, sizeof (reason_buf), what, why)); if (!INGLOBALZONE(p)) { cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: " "restarting automatically", p->p_zone->zone_name, p->p_pid, reason_buf); } /* * Remove any fpollinfo_t's for this (last) thread from our file * descriptors so closeall() can ASSERT() that they're all gone. * Then close all open file descriptors in the process. */ pollcleanup(); closeall(P_FINFO(p)); /* * Grab p_lock and begin clearing miscellaneous global process * state that needs to be reset before we exec the new init(1M). */ mutex_enter(&p->p_lock); prbarrier(p); p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE); up->u_cmask = CMASK; sigemptyset(&t->t_hold); sigemptyset(&t->t_sig); sigemptyset(&t->t_extsig); sigemptyset(&p->p_sig); sigemptyset(&p->p_extsig); sigdelq(p, t, 0); sigdelq(p, NULL, 0); if (p->p_killsqp) { siginfofree(p->p_killsqp); p->p_killsqp = NULL; } /* * Reset any signals that are ignored back to the default disposition. * Other u_signal members will be cleared when exec calls sigdefault(). */ for (i = 1; i < NSIG; i++) { if (up->u_signal[i - 1] == SIG_IGN) { up->u_signal[i - 1] = SIG_DFL; sigemptyset(&up->u_sigmask[i - 1]); } } /* * Clear the current signal, any signal info associated with it, and * any signal information from contracts and/or contract templates. */ lwp->lwp_cursig = 0; lwp->lwp_extsig = 0; if (lwp->lwp_curinfo != NULL) { siginfofree(lwp->lwp_curinfo); lwp->lwp_curinfo = NULL; } lwp_ctmpl_clear(lwp); /* * Reset both the process root directory and the current working * directory to the root of the zone just as we do during boot. */ VN_HOLD(p->p_zone->zone_rootvp); oldrd = up->u_rdir; up->u_rdir = p->p_zone->zone_rootvp; VN_HOLD(p->p_zone->zone_rootvp); oldcd = up->u_cdir; up->u_cdir = p->p_zone->zone_rootvp; if (up->u_cwd != NULL) { refstr_rele(up->u_cwd); up->u_cwd = NULL; } mutex_exit(&p->p_lock); if (oldrd != NULL) VN_RELE(oldrd); if (oldcd != NULL) VN_RELE(oldcd); /* Free the controlling tty. (freectty() always assumes curproc.) */ ASSERT(p == curproc); (void) freectty(B_TRUE); /* * Now exec() the new init(1M) on top of the current process. If we * succeed, the caller will treat this like a successful system call. * If we fail, we issue messages and the caller will proceed with exit. */ err = exec_init(p->p_zone->zone_initname, NULL); if (err == 0) return (0); zcmn_err(p->p_zone->zone_id, CE_WARN, "failed to restart init(1M) (err=%d): system reboot required", err); if (!INGLOBALZONE(p)) { cmn_err(CE_WARN, "failed to restart init(1M) for zone %s " "(pid %d, err=%d): zoneadm(1M) boot required", p->p_zone->zone_name, p->p_pid, err); } return (-1); }
/* * Return value: * 1 - exitlwps() failed, call (or continue) lwp_exit() * 0 - restarting init. Return through system call path */ int proc_exit(int why, int what) { kthread_t *t = curthread; klwp_t *lwp = ttolwp(t); proc_t *p = ttoproc(t); zone_t *z = p->p_zone; timeout_id_t tmp_id; int rv; proc_t *q; task_t *tk; vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; sigqueue_t *sqp; lwpdir_t *lwpdir; uint_t lwpdir_sz; tidhash_t *tidhash; uint_t tidhash_sz; ret_tidhash_t *ret_tidhash; refstr_t *cwd; hrtime_t hrutime, hrstime; int evaporate; /* * Stop and discard the process's lwps except for the current one, * unless some other lwp beat us to it. If exitlwps() fails then * return and the calling lwp will call (or continue in) lwp_exit(). */ proc_is_exiting(p); if (exitlwps(0) != 0) return (1); mutex_enter(&p->p_lock); if (p->p_ttime > 0) { /* * Account any remaining ticks charged to this process * on its way out. */ (void) task_cpu_time_incr(p->p_task, p->p_ttime); p->p_ttime = 0; } mutex_exit(&p->p_lock); DTRACE_PROC(lwp__exit); DTRACE_PROC1(exit, int, why); /* * Will perform any brand specific proc exit processing, since this * is always the last lwp, will also perform lwp_exit and free brand * data */ if (PROC_IS_BRANDED(p)) { lwp_detach_brand_hdlrs(lwp); brand_clearbrand(p, B_FALSE); } /* * Don't let init exit unless zone_start_init() failed its exec, or * we are shutting down the zone or the machine. * * Since we are single threaded, we don't need to lock the * following accesses to zone_proc_initpid. */ if (p->p_pid == z->zone_proc_initpid) { if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && z->zone_restart_init == B_TRUE && restart_init(what, why) == 0) return (0); /* * Since we didn't or couldn't restart init, we clear * the zone's init state and proceed with exit * processing. */ z->zone_proc_initpid = -1; } lwp_pcb_exit(); /* * Allocate a sigqueue now, before we grab locks. * It will be given to sigcld(), below. * Special case: If we will be making the process disappear * without a trace because it is either: * * an exiting SSYS process, or * * a posix_spawn() vfork child who requests it, * we don't bother to allocate a useless sigqueue. */ evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) && why == CLD_EXITED && what == _EVAPORATE); if (!evaporate) sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); /* * revoke any doors created by the process. */ if (p->p_door_list) door_exit(); /* * Release schedctl data structures. */ if (p->p_pagep) schedctl_proc_cleanup(); /* * make sure all pending kaio has completed. */ if (p->p_aio) aio_cleanup_exit(); /* * discard the lwpchan cache. */ if (p->p_lcp != NULL) lwpchan_destroy_cache(0); /* * Clean up any DTrace helper actions or probes for the process. */ if (p->p_dtrace_helpers != NULL) { ASSERT(dtrace_helpers_cleanup != NULL); (*dtrace_helpers_cleanup)(); } /* untimeout the realtime timers */ if (p->p_itimer != NULL) timer_exit(); if ((tmp_id = p->p_alarmid) != 0) { p->p_alarmid = 0; (void) untimeout(tmp_id); } /* * Remove any fpollinfo_t's for this (last) thread from our file * descriptors so closeall() can ASSERT() that they're all gone. */ pollcleanup(); if (p->p_rprof_cyclic != CYCLIC_NONE) { mutex_enter(&cpu_lock); cyclic_remove(p->p_rprof_cyclic); mutex_exit(&cpu_lock); } mutex_enter(&p->p_lock); /* * Clean up any DTrace probes associated with this process. */ if (p->p_dtrace_probes) { ASSERT(dtrace_fasttrap_exit_ptr != NULL); dtrace_fasttrap_exit_ptr(p); } while ((tmp_id = p->p_itimerid) != 0) { p->p_itimerid = 0; mutex_exit(&p->p_lock); (void) untimeout(tmp_id); mutex_enter(&p->p_lock); } lwp_cleanup(); /* * We are about to exit; prevent our resource associations from * being changed. */ pool_barrier_enter(); /* * Block the process against /proc now that we have really * acquired p->p_lock (to manipulate p_tlist at least). */ prbarrier(p); sigfillset(&p->p_ignore); sigemptyset(&p->p_siginfo); sigemptyset(&p->p_sig); sigemptyset(&p->p_extsig); sigemptyset(&t->t_sig); sigemptyset(&t->t_extsig); sigemptyset(&p->p_sigmask); sigdelq(p, t, 0); lwp->lwp_cursig = 0; lwp->lwp_extsig = 0; p->p_flag &= ~(SKILLED | SEXTKILLED); if (lwp->lwp_curinfo) { siginfofree(lwp->lwp_curinfo); lwp->lwp_curinfo = NULL; } t->t_proc_flag |= TP_LWPEXIT; ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); prlwpexit(t); /* notify /proc */ lwp_hash_out(p, t->t_tid); prexit(p); p->p_lwpcnt = 0; p->p_tlist = NULL; sigqfree(p); term_mstate(t); p->p_mterm = gethrtime(); exec_vp = p->p_exec; execdir_vp = p->p_execdir; p->p_exec = NULLVP; p->p_execdir = NULLVP; mutex_exit(&p->p_lock); pr_free_watched_pages(p); closeall(P_FINFO(p)); /* Free the controlling tty. (freectty() always assumes curproc.) */ ASSERT(p == curproc); (void) freectty(B_TRUE); #if defined(__sparc) if (p->p_utraps != NULL) utrap_free(p); #endif if (p->p_semacct) /* IPC semaphore exit */ semexit(p); rv = wstat(why, what); acct(rv & 0xff); exacct_commit_proc(p, rv); /* * Release any resources associated with C2 auditing */ if (AU_AUDITING()) { /* * audit exit system call */ audit_exit(why, what); } /* * Free address space. */ relvm(); if (exec_vp) { /* * Close this executable which has been opened when the process * was created by getproc(). */ (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL); VN_RELE(exec_vp); } if (execdir_vp) VN_RELE(execdir_vp); /* * Release held contracts. */ contract_exit(p); /* * Depart our encapsulating process contract. */ if ((p->p_flag & SSYS) == 0) { ASSERT(p->p_ct_process); contract_process_exit(p->p_ct_process, p, rv); } /* * Remove pool association, and block if requested by pool_do_bind. */ mutex_enter(&p->p_lock); ASSERT(p->p_pool->pool_ref > 0); atomic_add_32(&p->p_pool->pool_ref, -1); p->p_pool = pool_default; /* * Now that our address space has been freed and all other threads * in this process have exited, set the PEXITED pool flag. This * tells the pools subsystems to ignore this process if it was * requested to rebind this process to a new pool. */ p->p_poolflag |= PEXITED; pool_barrier_exit(); mutex_exit(&p->p_lock); mutex_enter(&pidlock); /* * Delete this process from the newstate list of its parent. We * will put it in the right place in the sigcld in the end. */ delete_ns(p->p_parent, p); /* * Reassign the orphans to the next of kin. * Don't rearrange init's orphanage. */ if ((q = p->p_orphan) != NULL && p != proc_init) { proc_t *nokp = p->p_nextofkin; for (;;) { q->p_nextofkin = nokp; if (q->p_nextorph == NULL) break; q = q->p_nextorph; } q->p_nextorph = nokp->p_orphan; nokp->p_orphan = p->p_orphan; p->p_orphan = NULL; } /* * Reassign the children to init. * Don't try to assign init's children to init. */ if ((q = p->p_child) != NULL && p != proc_init) { struct proc *np; struct proc *initp = proc_init; boolean_t setzonetop = B_FALSE; if (!INGLOBALZONE(curproc)) setzonetop = B_TRUE; pgdetach(p); do { np = q->p_sibling; /* * Delete it from its current parent new state * list and add it to init new state list */ delete_ns(q->p_parent, q); q->p_ppid = 1; q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID); if (setzonetop) { mutex_enter(&q->p_lock); q->p_flag |= SZONETOP; mutex_exit(&q->p_lock); } q->p_parent = initp; /* * Since q will be the first child, * it will not have a previous sibling. */ q->p_psibling = NULL; if (initp->p_child) { initp->p_child->p_psibling = q; } q->p_sibling = initp->p_child; initp->p_child = q; if (q->p_proc_flag & P_PR_PTRACE) { mutex_enter(&q->p_lock); sigtoproc(q, NULL, SIGKILL); mutex_exit(&q->p_lock); } /* * sigcld() will add the child to parents * newstate list. */ if (q->p_stat == SZOMB) sigcld(q, NULL); } while ((q = np) != NULL); p->p_child = NULL; ASSERT(p->p_child_ns == NULL); } TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p); mutex_enter(&p->p_lock); CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */ /* * Have our task accummulate our resource usage data before they * become contaminated by p_cacct etc., and before we renounce * membership of the task. * * We do this regardless of whether or not task accounting is active. * This is to avoid having nonsense data reported for this task if * task accounting is subsequently enabled. The overhead is minimal; * by this point, this process has accounted for the usage of all its * LWPs. We nonetheless do the work here, and under the protection of * pidlock, so that the movement of the process's usage to the task * happens at the same time as the removal of the process from the * task, from the point of view of exacct_snapshot_task_usage(). */ exacct_update_task_mstate(p); hrutime = mstate_aggr_state(p, LMS_USER); hrstime = mstate_aggr_state(p, LMS_SYSTEM); p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime; p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime; p->p_acct[LMS_USER] += p->p_cacct[LMS_USER]; p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM]; p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP]; p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT]; p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT]; p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT]; p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK]; p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP]; p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU]; p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED]; p->p_ru.minflt += p->p_cru.minflt; p->p_ru.majflt += p->p_cru.majflt; p->p_ru.nswap += p->p_cru.nswap; p->p_ru.inblock += p->p_cru.inblock; p->p_ru.oublock += p->p_cru.oublock; p->p_ru.msgsnd += p->p_cru.msgsnd; p->p_ru.msgrcv += p->p_cru.msgrcv; p->p_ru.nsignals += p->p_cru.nsignals; p->p_ru.nvcsw += p->p_cru.nvcsw; p->p_ru.nivcsw += p->p_cru.nivcsw; p->p_ru.sysc += p->p_cru.sysc; p->p_ru.ioch += p->p_cru.ioch; p->p_stat = SZOMB; p->p_proc_flag &= ~P_PR_PTRACE; p->p_wdata = what; p->p_wcode = (char)why; cdir = PTOU(p)->u_cdir; rdir = PTOU(p)->u_rdir; cwd = PTOU(p)->u_cwd; ASSERT(cdir != NULL || p->p_parent == &p0); /* * Release resource controls, as they are no longer enforceable. */ rctl_set_free(p->p_rctls); /* * Decrement tk_nlwps counter for our task.max-lwps resource control. * An extended accounting record, if that facility is active, is * scheduled to be written. We cannot give up task and project * membership at this point because that would allow zombies to escape * from the max-processes resource controls. Zombies stay in their * current task and project until the process table slot is released * in freeproc(). */ tk = p->p_task; mutex_enter(&p->p_zone->zone_nlwps_lock); tk->tk_nlwps--; tk->tk_proj->kpj_nlwps--; p->p_zone->zone_nlwps--; mutex_exit(&p->p_zone->zone_nlwps_lock); /* * Clear the lwp directory and the lwpid hash table * now that /proc can't bother us any more. * We free the memory below, after dropping p->p_lock. */ lwpdir = p->p_lwpdir; lwpdir_sz = p->p_lwpdir_sz; tidhash = p->p_tidhash; tidhash_sz = p->p_tidhash_sz; ret_tidhash = p->p_ret_tidhash; p->p_lwpdir = NULL; p->p_lwpfree = NULL; p->p_lwpdir_sz = 0; p->p_tidhash = NULL; p->p_tidhash_sz = 0; p->p_ret_tidhash = NULL; /* * If the process has context ops installed, call the exit routine * on behalf of this last remaining thread. Normally exitpctx() is * called during thread_exit() or lwp_exit(), but because this is the * last thread in the process, we must call it here. By the time * thread_exit() is called (below), the association with the relevant * process has been lost. * * We also free the context here. */ if (p->p_pctx) { kpreempt_disable(); exitpctx(p); kpreempt_enable(); freepctx(p, 0); } /* * curthread's proc pointer is changed to point to the 'sched' * process for the corresponding zone, except in the case when * the exiting process is in fact a zsched instance, in which * case the proc pointer is set to p0. We do so, so that the * process still points at the right zone when we call the VN_RELE() * below. * * This is because curthread's original proc pointer can be freed as * soon as the child sends a SIGCLD to its parent. We use zsched so * that for user processes, even in the final moments of death, the * process is still associated with its zone. */ if (p != t->t_procp->p_zone->zone_zsched) t->t_procp = t->t_procp->p_zone->zone_zsched; else t->t_procp = &p0; mutex_exit(&p->p_lock); if (!evaporate) { p->p_pidflag &= ~CLDPEND; sigcld(p, sqp); } else { /* * Do what sigcld() would do if the disposition * of the SIGCHLD signal were set to be ignored. */ cv_broadcast(&p->p_srwchan_cv); freeproc(p); } mutex_exit(&pidlock); /* * We don't release u_cdir and u_rdir until SZOMB is set. * This protects us against dofusers(). */ if (cdir) VN_RELE(cdir); if (rdir) VN_RELE(rdir); if (cwd) refstr_rele(cwd); /* * task_rele() may ultimately cause the zone to go away (or * may cause the last user process in a zone to go away, which * signals zsched to go away). So prior to this call, we must * no longer point at zsched. */ t->t_procp = &p0; kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t)); kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t)); while (ret_tidhash != NULL) { ret_tidhash_t *next = ret_tidhash->rth_next; kmem_free(ret_tidhash->rth_tidhash, ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t)); kmem_free(ret_tidhash, sizeof (*ret_tidhash)); ret_tidhash = next; } thread_exit(); /* NOTREACHED */ }
int clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family, rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl) { CLIENT *h; struct cku_private *p; struct rpc_msg call_msg; rdma_registry_t *rp; ASSERT(INGLOBALZONE(curproc)); if (cl == NULL) return (EINVAL); *cl = NULL; p = kmem_zalloc(sizeof (*p), KM_SLEEP); /* * Find underlying RDMATF plugin */ rw_enter(&rdma_lock, RW_READER); rp = rdma_mod_head; while (rp != NULL) { if (strcmp(rp->r_mod->rdma_api, proto)) rp = rp->r_next; else { p->cku_rd_mod = rp->r_mod; p->cku_rd_handle = handle; break; } } rw_exit(&rdma_lock); if (p->cku_rd_mod == NULL) { /* * Should not happen. * No matching RDMATF plugin. */ kmem_free(p, sizeof (struct cku_private)); return (EINVAL); } h = ptoh(p); h->cl_ops = &rdma_clnt_ops; h->cl_private = (caddr_t)p; h->cl_auth = authkern_create(); /* call message, just used to pre-serialize below */ call_msg.rm_xid = 0; call_msg.rm_direction = CALL; call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; call_msg.rm_call.cb_prog = pgm; call_msg.rm_call.cb_vers = vers; xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE); /* pre-serialize call message header */ if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { XDR_DESTROY(&p->cku_outxdr); auth_destroy(h->cl_auth); kmem_free(p, sizeof (struct cku_private)); return (EINVAL); } /* * Set up the rpc information */ p->cku_cred = cred; p->cku_srcaddr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); p->cku_srcaddr.maxlen = raddr->maxlen; p->cku_srcaddr.len = 0; p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); p->cku_addr.maxlen = raddr->maxlen; p->cku_addr.len = raddr->len; bcopy(raddr->buf, p->cku_addr.buf, raddr->len); p->cku_addrfmly = family; *cl = h; return (0); }