/* * msgget system call. */ static int msgget(key_t key, int msgflg) { kmsqid_t *qp; kmutex_t *lock; int id, error; int ii; proc_t *pp = curproc; top: if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock)) return (set_errno(error)); if (IPC_FREE(&qp->msg_perm)) { mutex_exit(lock); mutex_exit(&pp->p_lock); list_create(&qp->msg_list, sizeof (struct msg), offsetof(struct msg, msg_node)); qp->msg_qnum = 0; qp->msg_lspid = qp->msg_lrpid = 0; qp->msg_stime = qp->msg_rtime = 0; qp->msg_ctime = gethrestime_sec(); qp->msg_ngt_cnt = 0; qp->msg_neg_copy = 0; for (ii = 0; ii <= MSG_MAX_QNUM; ii++) { list_create(&qp->msg_wait_snd[ii], sizeof (msgq_wakeup_t), offsetof(msgq_wakeup_t, msgw_list)); list_create(&qp->msg_wait_snd_ngt[ii], sizeof (msgq_wakeup_t), offsetof(msgq_wakeup_t, msgw_list)); } /* * The proper initialization of msg_lowest_type is to the * highest possible value. By doing this we guarantee that * when the first send happens, the lowest type will be set * properly. */ qp->msg_lowest_type = -1; list_create(&qp->msg_cpy_block, sizeof (msgq_wakeup_t), offsetof(msgq_wakeup_t, msgw_list)); qp->msg_fnd_sndr = &msg_fnd_sndr[0]; qp->msg_fnd_rdr = &msg_fnd_rdr[0]; qp->msg_rcv_cnt = 0; qp->msg_snd_cnt = 0; if (error = ipc_commit_begin(msq_svc, key, msgflg, (kipc_perm_t *)qp)) { if (error == EAGAIN) goto top; return (set_errno(error)); } qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb, pp->p_rctls, pp); qp->msg_qmax = rctl_enforced_value(rc_process_msgtql, pp->p_rctls, pp); lock = ipc_commit_end(msq_svc, &qp->msg_perm); }
/* * No locking required because I held the root vnode before calling this * function so the vfs won't disappear on me. To be more explicit: * fdvrootp->v_count will be greater than 1 so fdunmount will just return. */ static int fdstatvfs(struct vfs *vfsp, struct statvfs64 *sp) { dev32_t d32; rctl_qty_t fdno_ctl; mutex_enter(&curproc->p_lock); fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc); mutex_exit(&curproc->p_lock); bzero(sp, sizeof (*sp)); sp->f_bsize = 1024; sp->f_frsize = 1024; sp->f_blocks = (fsblkcnt64_t)0; sp->f_bfree = (fsblkcnt64_t)0; sp->f_bavail = (fsblkcnt64_t)0; sp->f_files = (fsfilcnt64_t) (MIN(P_FINFO(curproc)->fi_nfiles, fdno_ctl + 2)); sp->f_ffree = (fsfilcnt64_t)0; sp->f_favail = (fsfilcnt64_t)0; (void) cmpldev(&d32, vfsp->vfs_dev); sp->f_fsid = d32; (void) strcpy(sp->f_basetype, vfssw[fdfstype].vsw_name); sp->f_flag = vf_to_stf(vfsp->vfs_flag); sp->f_namemax = FDNSIZE; (void) strcpy(sp->f_fstr, "/dev/fd"); (void) strcpy(&sp->f_fstr[8], "/dev/fd"); return (0); }
/* ARGSUSED */ static int fdread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct) { static struct fddirect dotbuf[] = { { FDROOTINO, "." }, { FDROOTINO, ".." } }; struct fddirect dirbuf; int i, n; int minfd, maxfd, modoff, error = 0; int nentries; rctl_qty_t fdno_ctl; int endoff; if (vp->v_type != VDIR) return (ENOSYS); mutex_enter(&curproc->p_lock); fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc); nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); mutex_exit(&curproc->p_lock); endoff = (nentries + 2) * FDSDSIZE; /* * Fake up ".", "..", and the /dev/fd directory entries. */ if (uiop->uio_loffset < (offset_t)0 || uiop->uio_loffset >= (offset_t)endoff || uiop->uio_resid <= 0) return (0); ASSERT(uiop->uio_loffset <= MAXOFF_T); if (uiop->uio_offset < 2*FDSDSIZE) { error = uiomove((caddr_t)dotbuf + uiop->uio_offset, MIN(uiop->uio_resid, 2*FDSDSIZE - uiop->uio_offset), UIO_READ, uiop); if (uiop->uio_resid <= 0 || error) return (error); } minfd = (uiop->uio_offset - 2*FDSDSIZE)/FDSDSIZE; maxfd = (uiop->uio_offset + uiop->uio_resid - 1)/FDSDSIZE; modoff = uiop->uio_offset % FDSDSIZE; for (i = 0; i < FDDIRSIZE; i++) dirbuf.d_name[i] = '\0'; for (i = minfd; i < MIN(maxfd, nentries); i++) { n = i; dirbuf.d_ino = fdtoi(n); numtos((ulong_t)n, dirbuf.d_name); error = uiomove((caddr_t)&dirbuf + modoff, MIN(uiop->uio_resid, FDSDSIZE - modoff), UIO_READ, uiop); if (uiop->uio_resid <= 0 || error) return (error); modoff = 0; } return (error); }
/* ARGSUSED */ static int fdreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, caller_context_t *ct, int flags) { /* bp holds one dirent structure */ u_offset_t bp[DIRENT64_RECLEN(FDNSIZE) / sizeof (u_offset_t)]; struct dirent64 *dirent = (struct dirent64 *)bp; int reclen, nentries; rctl_qty_t fdno_ctl; int n; int oresid; off_t off; if (uiop->uio_offset < 0 || uiop->uio_resid <= 0 || (uiop->uio_offset % FDSDSIZE) != 0) return (ENOENT); ASSERT(uiop->uio_loffset <= MAXOFF_T); oresid = uiop->uio_resid; bzero(bp, sizeof (bp)); mutex_enter(&curproc->p_lock); fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc); nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); mutex_exit(&curproc->p_lock); while (uiop->uio_resid > 0) { if ((off = uiop->uio_offset) == 0) { /* "." */ dirent->d_ino = (ino64_t)FDROOTINO; dirent->d_name[0] = '.'; dirent->d_name[1] = '\0'; reclen = DIRENT64_RECLEN(1); } else if (off == FDSDSIZE) { /* ".." */ dirent->d_ino = (ino64_t)FDROOTINO; dirent->d_name[0] = '.'; dirent->d_name[1] = '.'; dirent->d_name[2] = '\0'; reclen = DIRENT64_RECLEN(2); } else { /* * Return entries corresponding to the allowable * number of file descriptors for this process. */ if ((n = (off-2*FDSDSIZE)/FDSDSIZE) >= nentries) break; dirent->d_ino = (ino64_t)fdtoi(n); numtos((ulong_t)n, dirent->d_name); reclen = DIRENT64_RECLEN(strlen(dirent->d_name)); } dirent->d_off = (offset_t)(uiop->uio_offset + FDSDSIZE); dirent->d_reclen = (ushort_t)reclen; if (reclen > uiop->uio_resid) { /* * Error if no entries have been returned yet. */ if (uiop->uio_resid == oresid) return (EINVAL); break; } /* * uiomove() updates both resid and offset by the same * amount. But we want offset to change in increments * of FDSDSIZE, which is different from the number of bytes * being returned to the user. So we set uio_offset * separately, ignoring what uiomove() does. */ if (uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) return (EFAULT); uiop->uio_offset = off + FDSDSIZE; } if (eofp) *eofp = ((uiop->uio_offset-2*FDSDSIZE)/FDSDSIZE >= nentries); return (0); }
/* * semget - Semget system call. */ static int semget(key_t key, int nsems, int semflg) { ksemid_t *sp; kmutex_t *lock; int id, error; proc_t *pp = curproc; top: if (error = ipc_get(sem_svc, key, semflg, (kipc_perm_t **)&sp, &lock)) return (set_errno(error)); if (!IPC_FREE(&sp->sem_perm)) { /* * A semaphore with the requested key exists. */ if (!((nsems >= 0) && (nsems <= sp->sem_nsems))) { mutex_exit(lock); return (set_errno(EINVAL)); } } else { /* * This is a new semaphore set. Finish initialization. */ if (nsems <= 0 || (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, RCA_SAFE) & RCT_DENY)) { mutex_exit(lock); mutex_exit(&pp->p_lock); ipc_cleanup(sem_svc, (kipc_perm_t *)sp); return (set_errno(EINVAL)); } mutex_exit(lock); mutex_exit(&pp->p_lock); /* * We round the allocation up to coherency granularity * so that multiple semaphore allocations won't result * in the false sharing of their sem structures. */ sp->sem_base = kmem_zalloc(P2ROUNDUP(nsems * sizeof (struct sem), 64), KM_SLEEP); sp->sem_binary = (nsems == 1); sp->sem_nsems = (ushort_t)nsems; sp->sem_ctime = gethrestime_sec(); sp->sem_otime = 0; list_create(&sp->sem_undos, sizeof (struct sem_undo), offsetof(struct sem_undo, un_list)); if (error = ipc_commit_begin(sem_svc, key, semflg, (kipc_perm_t *)sp)) { if (error == EAGAIN) goto top; return (set_errno(error)); } sp->sem_maxops = rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); if (rctl_test(rc_process_semmsl, pp->p_rctls, pp, nsems, RCA_SAFE) & RCT_DENY) { ipc_cleanup(sem_svc, (kipc_perm_t *)sp); return (set_errno(EINVAL)); } lock = ipc_commit_end(sem_svc, &sp->sem_perm); } if (audit_active) audit_ipcget(AT_IPC_SEM, (void *)sp); id = sp->sem_perm.ipc_id; mutex_exit(lock); return (id); }
long sysconfig(int which) { switch (which) { /* * if it is not handled in mach_sysconfig either * it must be EINVAL. */ default: return (mach_sysconfig(which)); /* `uname -i`/os */ case _CONFIG_CLK_TCK: return ((long)hz); /* clock frequency per second */ case _CONFIG_PROF_TCK: return ((long)hz); /* profiling clock freq per sec */ case _CONFIG_NGROUPS: /* * Maximum number of supplementary groups. */ return (ngroups_max); case _CONFIG_OPEN_FILES: /* * Maximum number of open files (soft limit). */ { rlim64_t fd_ctl; mutex_enter(&curproc->p_lock); fd_ctl = rctl_enforced_value( rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc); mutex_exit(&curproc->p_lock); return ((ulong_t)fd_ctl); } case _CONFIG_CHILD_MAX: /* * Maximum number of processes. */ return (v.v_maxup); case _CONFIG_POSIX_VER: return (_POSIX_VERSION); /* current POSIX version */ case _CONFIG_PAGESIZE: return (PAGESIZE); case _CONFIG_XOPEN_VER: return (_XOPEN_VERSION); /* current XOPEN version */ case _CONFIG_NPROC_CONF: return (zone_ncpus_get(curproc->p_zone)); case _CONFIG_NPROC_ONLN: return (zone_ncpus_online_get(curproc->p_zone)); case _CONFIG_NPROC_MAX: return (max_ncpus); case _CONFIG_STACK_PROT: return (curproc->p_stkprot & ~PROT_USER); case _CONFIG_AIO_LISTIO_MAX: return (_AIO_LISTIO_MAX); case _CONFIG_AIO_MAX: return (_AIO_MAX); case _CONFIG_AIO_PRIO_DELTA_MAX: return (0); case _CONFIG_DELAYTIMER_MAX: return (INT_MAX); case _CONFIG_MQ_OPEN_MAX: return (_MQ_OPEN_MAX); case _CONFIG_MQ_PRIO_MAX: return (_MQ_PRIO_MAX); case _CONFIG_RTSIG_MAX: return (_SIGRTMAX - _SIGRTMIN + 1); case _CONFIG_SEM_NSEMS_MAX: return (_SEM_NSEMS_MAX); case _CONFIG_SEM_VALUE_MAX: return (_SEM_VALUE_MAX); case _CONFIG_SIGQUEUE_MAX: /* * Maximum number of outstanding queued signals. */ { rlim64_t sigqsz_max; mutex_enter(&curproc->p_lock); sigqsz_max = rctl_enforced_value(rc_process_sigqueue, curproc->p_rctls, curproc); mutex_exit(&curproc->p_lock); return ((uint_t)sigqsz_max); } case _CONFIG_SIGRT_MIN: return (_SIGRTMIN); case _CONFIG_SIGRT_MAX: return (_SIGRTMAX); case _CONFIG_TIMER_MAX: return (timer_max); case _CONFIG_PHYS_PAGES: /* * If the non-global zone has a phys. memory cap, use that. * We always report the system-wide value for the global zone, * even though rcapd can be used on the global zone too. */ if (!INGLOBALZONE(curproc) && curproc->p_zone->zone_phys_mcap != 0) return (MIN(btop(curproc->p_zone->zone_phys_mcap), physinstalled)); return (physinstalled); case _CONFIG_AVPHYS_PAGES: /* * If the non-global zone has a phys. memory cap, use * the phys. memory cap - zone's current rss. We always * report the system-wide value for the global zone, even * though rcapd can be used on the global zone too. */ if (!INGLOBALZONE(curproc) && curproc->p_zone->zone_phys_mcap != 0) { pgcnt_t cap, rss, free; vmusage_t in_use; size_t cnt = 1; cap = btop(curproc->p_zone->zone_phys_mcap); if (cap > physinstalled) return (freemem); if (vm_getusage(VMUSAGE_ZONE, 1, &in_use, &cnt, FKIOCTL) != 0) in_use.vmu_rss_all = 0; rss = btop(in_use.vmu_rss_all); /* * Because rcapd implements a soft cap, it is possible * for rss to be temporarily over the cap. */ if (cap > rss) free = cap - rss; else free = 0; return (MIN(free, freemem)); } return (freemem); case _CONFIG_MAXPID: return (maxpid); case _CONFIG_CPUID_MAX: return (max_cpuid); case _CONFIG_EPHID_MAX: return (MAXEPHUID); case _CONFIG_SYMLOOP_MAX: return (MAXSYMLINKS); } }
/* * Returns 0 on success. */ int brk_internal(caddr_t nva, uint_t brkszc) { caddr_t ova; /* current break address */ size_t size; int error; struct proc *p = curproc; struct as *as = p->p_as; size_t pgsz; uint_t szc; rctl_qty_t as_rctl; /* * extend heap to brkszc alignment but use current p->p_brkpageszc * for the newly created segment. This allows the new extension * segment to be concatenated successfully with the existing brk * segment. */ if ((szc = brkszc) != 0) { pgsz = page_get_pagesize(szc); ASSERT(pgsz > PAGESIZE); } else { pgsz = PAGESIZE; } mutex_enter(&p->p_lock); as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p); mutex_exit(&p->p_lock); /* * If p_brkbase has not yet been set, the first call * to brk() will initialize it. */ if (p->p_brkbase == 0) p->p_brkbase = nva; /* * Before multiple page size support existed p_brksize was the value * not rounded to the pagesize (i.e. it stored the exact user request * for heap size). If pgsz is greater than PAGESIZE calculate the * heap size as the real new heap size by rounding it up to pgsz. * This is useful since we may want to know where the heap ends * without knowing heap pagesize (e.g. some old code) and also if * heap pagesize changes we can update p_brkpageszc but delay adding * new mapping yet still know from p_brksize where the heap really * ends. The user requested heap end is stored in libc variable. */ if (pgsz > PAGESIZE) { caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); size = tnva - p->p_brkbase; if (tnva < p->p_brkbase || (size > p->p_brksize && size > (size_t)as_rctl)) { szc = 0; pgsz = PAGESIZE; size = nva - p->p_brkbase; } } else { size = nva - p->p_brkbase; } /* * use PAGESIZE to roundup ova because we want to know the real value * of the current heap end in case p_brkpageszc changes since the last * p_brksize was computed. */ nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), PAGESIZE); if ((nva < p->p_brkbase) || (size > p->p_brksize && size > as_rctl)) { mutex_enter(&p->p_lock); (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, RCA_SAFE); mutex_exit(&p->p_lock); return (ENOMEM); } if (nva > ova) { struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); if (!(p->p_datprot & PROT_EXEC)) { crargs.prot &= ~PROT_EXEC; } /* * Add new zfod mapping to extend UNIX data segment * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate * page sizes if ova is not aligned to szc's pgsz. */ if (szc > 0) { caddr_t rbss; rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : AS_MAP_NO_LPOOB; } else if (ova == rbss) { crargs.szc = szc; } else { crargs.szc = AS_MAP_HEAP; } } else { crargs.szc = AS_MAP_NO_LPOOB; } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; error = as_map(as, ova, (size_t)(nva - ova), segvn_create, &crargs); if (error) { return (error); } } else if (nva < ova) { /* * Release mapping to shrink UNIX data segment. */ (void) as_unmap(as, nva, (size_t)(ova - nva)); } p->p_brksize = size; return (0); }