static int setrlimit_common(int resource, uint64_t rlim_cur, uint64_t rlim_max) { int rv; char *rctl; if (resource < 0 || resource >= LX_RLIMIT_NLIMITS) return (-EINVAL); rctl = l_to_rctl[resource]; if (rctl == NULL) { switch (resource) { case LX_RLIMIT_LOCKS: case LX_RLIMIT_NICE: case LX_RLIMIT_RTPRIO: case LX_RLIMIT_RTTIME: fake_limits[resource].rlim_max = rlim_max; fake_limits[resource].rlim_cur = rlim_cur; return (0); } lx_unsupported("Unsupported resource type %d\n", resource); return (-ENOTSUP); } /* * If we're emulating the value via a zone rctl, we can't set that * from within the zone. Lie and say we set the value. */ if (strncmp(rctl, "zone.", 5) == 0) return (0); /* * On Ubuntu at least, the login and sshd processes expect to set this * limit to 16k and login will fail if this fails. On Illumos we have a * system limit of 8k and normally the privileged limit is 512. We * simply pretend this works to allow login to work. */ if (strcmp(rctl, "process.max-sigqueue-size") == 0 && rlim_max > 8192) return (0); /* * Linux limits the max number of open files to 1m and there is a test * for this. */ if (resource == LX_RLIMIT_NOFILE && rlim_max > (1024 * 1024)) return (-EPERM); if ((rv = set_rctl(rctl, rlim_max, RCPRIV_PRIVILEGED)) != 0) return (rv); return (set_rctl(rctl, rlim_cur, RCPRIV_BASIC)); }
static int convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller) { struct cmsghdr *cmsg, *last; int err = 0; int level = 0; int type = 0; cmsg = CMSG_FIRSTHDR(msg); while (cmsg != NULL && err == 0) { level = cmsg->cmsg_level; type = cmsg->cmsg_type; if (direction == LX_TO_SOL) { if (cmsg->cmsg_level == LX_SOL_SOCKET) { cmsg->cmsg_level = SOL_SOCKET; if (cmsg->cmsg_type == LX_SCM_RIGHTS) cmsg->cmsg_type = SCM_RIGHTS; else if (cmsg->cmsg_type == LX_SCM_CRED) cmsg->cmsg_type = SCM_UCRED; else if (cmsg->cmsg_type == LX_SCM_TIMESTAMP) cmsg->cmsg_type = SCM_TIMESTAMP; else err = ENOTSUP; } else { err = ENOTSUP; } } else { if (cmsg->cmsg_level == SOL_SOCKET) { cmsg->cmsg_level = LX_SOL_SOCKET; if (cmsg->cmsg_type == SCM_RIGHTS) cmsg->cmsg_type = LX_SCM_RIGHTS; else if (cmsg->cmsg_type == SCM_UCRED) cmsg->cmsg_type = LX_SCM_CRED; else if (cmsg->cmsg_type == SCM_TIMESTAMP) cmsg->cmsg_type = LX_SCM_TIMESTAMP; else err = ENOTSUP; } else { err = ENOTSUP; } } last = cmsg; cmsg = CMSG_NXTHDR(msg, last); } if (err) lx_unsupported("Unsupported socket control message %d " "(%d) in %s\n.", type, level, caller); return (err); }
static lx_proto_opts_t * get_proto_opt_tbl(int level) { switch (level) { case LX_IPPROTO_IP: return (&ip_sockopts_tbl); case LX_SOL_SOCKET: return (&socket_sockopts_tbl); case LX_IPPROTO_IGMP: return (&igmp_sockopts_tbl); case LX_IPPROTO_TCP: return (&tcp_sockopts_tbl); case LX_IPPROTO_RAW: return (&raw_sockopts_tbl); default: lx_unsupported("Unsupported sockopt level %d", level); return (NULL); } }
long lx_fcntl64(uintptr_t p1, uintptr_t p2, uintptr_t p3) { int fd = (int)p1; int cmd = (int)p2; struct lx_flock lxflk; struct lx_flock64 lxflk64; struct flock fl; struct flock64 fl64; int rc; if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE || cmd == LX_F_GETLEASE) { lx_unsupported("unsupported fcntl64 command: %d", cmd); return (-ENOTSUP); } if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || cmd == LX_F_SETLKW) { if (uucopy((void *)p3, (void *)&lxflk, sizeof (struct lx_flock)) != 0) return (-errno); ltos_flock(&lxflk, &fl); rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl); if (rc >= 0) { stol_flock(&fl, &lxflk); if (uucopy((void *)&lxflk, (void *)p3, sizeof (struct lx_flock)) != 0) return (-errno); } } else if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLKW64 || \ cmd == LX_F_SETLK64) { if (uucopy((void *)p3, (void *)&lxflk64, sizeof (struct lx_flock64)) != 0) return (-errno); ltos_flock64(&lxflk64, &fl64); rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl64); if (rc >= 0) { stol_flock64(&fl64, &lxflk64); if (uucopy((void *)&lxflk64, (void *)p3, sizeof (struct lx_flock64)) != 0) return (-errno); } } else { rc = lx_fcntl_com(fd, cmd, (ulong_t)p3); } return (rc); }
/* * From the man page: * The Linux-specific prlimit() system call combines and extends the * functionality of setrlimit() and getrlimit(). It can be used to both set * and get the resource limits of an arbitrary process. * * If pid is 0, then the call applies to the calling process. */ int lx_prlimit64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) { pid_t pid = (pid_t)p1; int resource = (int)p2; lx_rlimit64_t *nrlp = (lx_rlimit64_t *)p3; lx_rlimit64_t *orlp = (lx_rlimit64_t *)p4; int rv = 0; uint64_t rlim_cur, rlim_max; lx_rlimit64_t nrl, orl; if (pid != 0) { /* XXX TBD if needed */ lx_unsupported("setting prlimit %d for another process\n", resource); return (-ENOTSUP); } if (orlp != NULL) { /* we first get the current limits */ rv = getrlimit_common(resource, &rlim_cur, &rlim_max); if (rv != 0) return (rv); } if (nrlp != NULL) { if (uucopy((void *)p3, &nrl, sizeof (nrl)) != 0) return (-errno); if ((nrl.rlim_max != LX_RLIM64_INFINITY && nrl.rlim_cur == LX_RLIM64_INFINITY) || nrl.rlim_cur > nrl.rlim_max) return (-EINVAL); rv = setrlimit_common(resource, nrl.rlim_cur, nrl.rlim_max); } if (rv == 0 && orlp != NULL) { /* now return the original limits, if necessary */ orl.rlim_cur = rlim_cur; orl.rlim_max = rlim_max; if ((uucopy(&orl, orlp, sizeof (orl))) != 0) rv = -errno; } return (rv); }
long lx_fcntl(uintptr_t p1, uintptr_t p2, uintptr_t p3) { int fd = (int)p1; int cmd = (int)p2; ulong_t arg = (ulong_t)p3; struct lx_flock lxflk; struct flock fl; int lk = 0; int rc; /* * The 64-bit fcntl commands must go through fcntl64(). */ if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLK64 || cmd == LX_F_SETLKW64) return (-EINVAL); if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE || cmd == LX_F_GETLEASE) { lx_unsupported("unsupported fcntl command: %d", cmd); return (-ENOTSUP); } if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || cmd == LX_F_SETLKW) { if (uucopy((void *)p3, (void *)&lxflk, sizeof (struct lx_flock)) != 0) return (-errno); lk = 1; ltos_flock(&lxflk, &fl); arg = (ulong_t)&fl; } rc = lx_fcntl_com(fd, cmd, arg); if (lk && rc >= 0) { stol_flock(&fl, &lxflk); if (uucopy((void *)&lxflk, (void *)p3, sizeof (struct lx_flock)) != 0) return (-errno); } return (rc); }
static int convert_sockflags(int lx_flags) { int solaris_flags = 0; if (lx_flags & LX_MSG_OOB) { solaris_flags |= MSG_OOB; lx_flags &= ~LX_MSG_OOB; } if (lx_flags & LX_MSG_PEEK) { solaris_flags |= MSG_PEEK; lx_flags &= ~LX_MSG_PEEK; } if (lx_flags & LX_MSG_DONTROUTE) { solaris_flags |= MSG_DONTROUTE; lx_flags &= ~LX_MSG_DONTROUTE; } if (lx_flags & LX_MSG_CTRUNC) { solaris_flags |= MSG_CTRUNC; lx_flags &= ~LX_MSG_CTRUNC; } if (lx_flags & LX_MSG_PROXY) { lx_unsupported("Unsupported " "socket operation with MSG_PROXY flag set"); lx_flags &= ~LX_MSG_PROXY; } if (lx_flags & LX_MSG_TRUNC) { solaris_flags |= MSG_TRUNC; lx_flags &= ~LX_MSG_TRUNC; } if (lx_flags & LX_MSG_DONTWAIT) { solaris_flags |= MSG_DONTWAIT; lx_flags &= ~LX_MSG_DONTWAIT; } if (lx_flags & LX_MSG_EOR) { solaris_flags |= MSG_EOR; lx_flags &= ~LX_MSG_EOR; } if (lx_flags & LX_MSG_WAITALL) { solaris_flags |= MSG_WAITALL; lx_flags &= ~LX_MSG_WAITALL; } if (lx_flags & LX_MSG_FIN) { lx_unsupported("Unsupported " "socket operation with MSG_FIN flag set"); lx_flags &= ~LX_MSG_FIN; } if (lx_flags & LX_MSG_SYN) { lx_unsupported("Unsupported " "socket operation with MSG_SYN flag set"); lx_flags &= ~LX_MSG_SYN; } if (lx_flags & LX_MSG_CONFIRM) { lx_unsupported("Unsupported " "socket operation with MSG_CONFIRM set"); lx_flags &= ~LX_MSG_CONFIRM; } if (lx_flags & LX_MSG_RST) { lx_unsupported("Unsupported " "socket operation with MSG_RST flag set"); lx_flags &= ~LX_MSG_RST; } if (lx_flags & LX_MSG_ERRQUEUE) { lx_unsupported("Unsupported " "socket operation with MSG_ERRQUEUE flag set"); lx_flags &= ~LX_MSG_ERRQUEUE; } if (lx_flags & LX_MSG_NOSIGNAL) { /* MSG_NOSIGNAL handled within each caller */ lx_flags &= ~LX_MSG_NOSIGNAL; } if (lx_flags & LX_MSG_MORE) { lx_unsupported("Unsupported " "socket operation with MSG_MORE flag set"); lx_flags &= ~LX_MSG_MORE; } if (lx_flags & LX_MSG_WAITFORONE) { lx_unsupported("Unsupported " "socket operation with MSG_WAITFORONE flag set"); lx_flags &= ~LX_MSG_WAITFORONE; } if (lx_flags & LX_MSG_FASTOPEN) { lx_unsupported("Unsupported " "socket operation with MSG_FASTOPEN flag set"); lx_flags &= ~LX_MSG_FASTOPEN; } if (lx_flags & LX_MSG_CMSG_CLOEXEC) { lx_unsupported("Unsupported " "socket operation with MSG_CMSG_CLOEXEC flag set"); lx_flags &= ~LX_MSG_CMSG_CLOEXEC; } if (lx_flags != 0) lx_unsupported("unknown socket flag(s) set 0x%x", lx_flags); return (solaris_flags); }
long lx_futex(uintptr_t addr, int op, int val, uintptr_t lx_timeout, uintptr_t addr2, int val3) { struct as *as = curproc->p_as; memid_t memid, memid2; timestruc_t timeout; timestruc_t *tptr = NULL; int val2 = NULL; int rval = 0; int cmd = op & FUTEX_CMD_MASK; int private = op & FUTEX_PRIVATE_FLAG; char dmsg[32]; /* must be aligned on int boundary */ if (addr & 0x3) return (set_errno(EINVAL)); /* Sanity check the futex command */ if (cmd < 0 || cmd > FUTEX_MAX_CMD) return (set_errno(EINVAL)); if (cmd == FUTEX_FD) { /* * FUTEX_FD was sentenced to death for grievous crimes of * semantics against humanity; it has been ripped out of Linux * and will never be supported by us. */ (void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd); lx_unsupported(dmsg); return (set_errno(ENOSYS)); } switch (cmd) { case FUTEX_LOCK_PI: case FUTEX_UNLOCK_PI: case FUTEX_TRYLOCK_PI: case FUTEX_WAIT_BITSET: case FUTEX_WAKE_BITSET: case FUTEX_WAIT_REQUEUE_PI: case FUTEX_CMP_REQUEUE_PI: /* * These are operations that we don't currently support, but * may well need to in the future. For now, callers need to * deal with these being missing -- but if and as that changes, * they may well need to be implemented. */ (void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd); lx_unsupported(dmsg); return (set_errno(ENOSYS)); } /* Copy in the timeout structure from userspace. */ if (cmd == FUTEX_WAIT && lx_timeout != NULL) { rval = get_timeout((timespec_t *)lx_timeout, &timeout); if (rval != 0) return (set_errno(rval)); tptr = &timeout; } switch (cmd) { case FUTEX_REQUEUE: case FUTEX_CMP_REQUEUE: case FUTEX_WAKE_OP: /* * lx_timeout is nominally a pointer to a userspace address. * For several commands, however, it actually contains * an additional interage parameter. This is horrible, and * the people who did this to us should be sorry. */ val2 = (int)lx_timeout; } /* * Translate the process-specific, user-space futex virtual * address(es) to a universal memid. If the private bit is set, we * can just use our as plus the virtual address, saving quite a bit * of effort. */ if (private) { memid.val[0] = (uintptr_t)as; memid.val[1] = (uintptr_t)addr; } else {
/* * See glibc sysdeps/unix/sysv/linux/x86_64/clone.S code for x64 argument order * and the Linux kernel/fork.c code for the various ways arguments can be passed * to the clone syscall (CONFIG_CLONE_BACKWARDS, et al). */ long lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5) { struct clone_state *cs; int flags = (int)p1; void *cldstk = (void *)p2; void *ptidp = (void *)p3; #if defined(_LP64) void *ctidp = (void *)p4; struct lx_desc *ldtinfo = (void *)p5; #else /* is 32bit */ struct lx_desc *ldtinfo = (void *)p4; void *ctidp = (void *)p5; #endif thread_t tid; volatile int clone_res; int sig; int rval; int pid; lx_regs_t *rp; sigset_t sigmask; int fork_flags = 0; if (flags & LX_CLONE_SETTLS) { lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p " "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp); } else { lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)", flags, cldstk, ptidp); } /* * Only supported for pid 0 on Linux */ if (flags & LX_CLONE_PID) return (-EINVAL); /* * CLONE_THREAD requires CLONE_SIGHAND. * * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared * in kernel 2.4 and prior. * In kernel 2.6 (and later) CLONE_DETACHED was dropped completely, so * we no longer have this requirement. */ if (flags & CLONE_TD) { if (!(flags & LX_CLONE_SIGHAND)) return (-EINVAL); if (strncmp(lx_release, "2.4", 3) == 0 && (flags & CLONE_TD) != CLONE_TD) return (-EINVAL); } rp = lx_syscall_regs(); /* test if pointer passed by user are writable */ if (flags & LX_CLONE_PARENT_SETTID) { if (uucopy(ptidp, &pid, sizeof (int)) != 0) return (-EFAULT); if (uucopy(&pid, ptidp, sizeof (int)) != 0) return (-EFAULT); } if (flags & LX_CLONE_CHILD_SETTID) { if (uucopy(ctidp, &pid, sizeof (int)) != 0) return (-EFAULT); if (uucopy(&pid, ctidp, sizeof (int)) != 0) return (-EFAULT); } /* See if this is a fork() operation or a thr_create(). */ if (IS_FORK(flags) || IS_VFORK(flags)) { if (flags & LX_CLONE_PARENT) { lx_unsupported("clone(2) only supports CLONE_PARENT " "for threads.\n"); return (-ENOTSUP); } if (flags & LX_CLONE_PTRACE) lx_ptrace_fork(); if ((flags & LX_CSIGNAL) == 0) fork_flags |= FORK_NOSIGCHLD; if (flags & LX_CLONE_VFORK) { is_vforked++; rval = vforkx(fork_flags); if (rval != 0) is_vforked--; } else { rval = forkx(fork_flags); if (rval == 0 && lx_is_rpm) (void) sleep(lx_rpm_delay); } /* * Since we've already forked, we can't do much if uucopy * fails, so we just ignore failure. Failure is unlikely since * we've tested the memory before we did the fork. */ if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) { (void) uucopy(&rval, ptidp, sizeof (int)); } if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) { /* * lx_getpid should not fail, and if it does, there's * not much we can do about it since we've already * forked, so on failure, we just don't copy the * memory. */ pid = lx_getpid(); if (pid >= 0) (void) uucopy(&pid, ctidp, sizeof (int)); } /* Parent just returns */ if (rval != 0) return ((rval < 0) ? -errno : rval); /* * Set up additional data in the lx_proc_data structure as * necessary. */ rval = syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_clone, flags, cldstk, ptidp, ldtinfo, ctidp, NULL); if (rval < 0) { return (rval); } /* * lx_setup_clone() doesn't return below, so stop now, if * necessary. */ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE); /* * If provided, the child needs its new stack set up. */ if (cldstk) { #if defined(_LP64) (void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG); lx_setup_clone((uintptr_t)rp, (void *)rp->lxr_rip, cldstk); #else lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk); #endif /* lx_setup_clone() should never return. */ assert(0); } return (0); } /* * We have very restricted support.... only exactly these flags are * supported */ if (((flags & SHARED_AS) != SHARED_AS)) { lx_unsupported("clone(2) requires that all or none of " "CLONE_VM/FS/FILES/THREAD/SIGHAND be set. (flags:0x%08X)\n", flags); return (-ENOTSUP); } if (cldstk == NULL) { lx_unsupported("clone(2) requires the caller to allocate the " "child's stack.\n"); return (-ENOTSUP); } /* * If we want a signal-on-exit, ensure that the signal is valid. */ if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) { lx_unsupported("clone(2) passed unsupported signal: %d", sig); return (-ENOTSUP); } /* * To avoid malloc() here, we steal a part of the new thread's * stack to store all the info that thread might need for * initialization. We also make it 64-bit aligned for good * measure. */ cs = (struct clone_state *) ((p2 - sizeof (struct clone_state)) & -((uintptr_t)8)); cs->c_flags = flags; cs->c_sig = sig; cs->c_stk = cldstk; cs->c_ptidp = ptidp; cs->c_ldtinfo = ldtinfo; cs->c_ctidp = ctidp; cs->c_clone_res = &clone_res; #if defined(_LP64) /* * The AMD64 ABI says that the kernel clobbers %rcx and %r11. We * return a value in %rax. The new %rsp and %rip will be setup in * lx_setup_clone. Thus, we don't worry about passing/restoring those * registers. */ cs->c_regs.lxr_rdi = rp->lxr_rdi; cs->c_regs.lxr_rsi = rp->lxr_rsi; cs->c_regs.lxr_rbx = rp->lxr_rbx; cs->c_regs.lxr_rdx = rp->lxr_rdx; cs->c_regs.lxr_rdi = rp->lxr_rdi; cs->c_regs.lxr_r8 = rp->lxr_r8; cs->c_regs.lxr_r9 = rp->lxr_r9; cs->c_regs.lxr_r10 = rp->lxr_r10; cs->c_regs.lxr_r12 = rp->lxr_r12; cs->c_regs.lxr_r13 = rp->lxr_r13; cs->c_regs.lxr_r14 = rp->lxr_r14; cs->c_regs.lxr_r15 = rp->lxr_r15; #else cs->c_gs = rp->lxr_gs; #endif if (lx_sched_getaffinity(0, sizeof (cs->c_affmask), (uintptr_t)&cs->c_affmask) == -1) lx_err_fatal("Unable to get affinity mask for parent " "thread: %s", strerror(errno)); /* * We want the new thread to return directly to the return site for * the system call. */ #if defined(_LP64) cs->c_retaddr = (void *)rp->lxr_rip; #else cs->c_retaddr = (void *)rp->lxr_eip; #endif clone_res = 0; (void) sigfillset(&sigmask); /* * Block all signals because the thread we create won't be able to * properly handle them until it's fully set up. */ if (sigprocmask(SIG_BLOCK, &sigmask, &cs->c_sigmask) < 0) { lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno)); return (-errno); } rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid); /* * Release any pending signals */ (void) sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL); /* * Wait for the child to be created and have its tid assigned. */ if (rval == 0) { while (clone_res == 0) ; rval = clone_res; } if (rval == 0) lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE); return (rval); }
static int convert_sockflags(int lx_flags, char *call) { int solaris_flags = 0; if (lx_flags & LX_MSG_OOB) { solaris_flags |= MSG_OOB; lx_flags &= ~LX_MSG_OOB; } if (lx_flags & LX_MSG_PEEK) { solaris_flags |= MSG_PEEK; lx_flags &= ~LX_MSG_PEEK; } if (lx_flags & LX_MSG_DONTROUTE) { solaris_flags |= MSG_DONTROUTE; lx_flags &= ~LX_MSG_DONTROUTE; } if (lx_flags & LX_MSG_CTRUNC) { solaris_flags |= MSG_CTRUNC; lx_flags &= ~LX_MSG_CTRUNC; } if (lx_flags & LX_MSG_PROXY) { lx_unsupported("%s: unsupported socket flag MSG_PROXY", call); lx_flags &= ~LX_MSG_PROXY; } if (lx_flags & LX_MSG_TRUNC) { solaris_flags |= MSG_TRUNC; lx_flags &= ~LX_MSG_TRUNC; } if (lx_flags & LX_MSG_DONTWAIT) { solaris_flags |= MSG_DONTWAIT; lx_flags &= ~LX_MSG_DONTWAIT; } if (lx_flags & LX_MSG_EOR) { solaris_flags |= MSG_EOR; lx_flags &= ~LX_MSG_EOR; } if (lx_flags & LX_MSG_WAITALL) { solaris_flags |= MSG_WAITALL; lx_flags &= ~LX_MSG_WAITALL; } if (lx_flags & LX_MSG_FIN) { lx_unsupported("%s: unsupported socket flag MSG_FIN", call); lx_flags &= ~LX_MSG_FIN; } if (lx_flags & LX_MSG_SYN) { lx_unsupported("%s: unsupported socket flag MSG_SYN", call); lx_flags &= ~LX_MSG_SYN; } if (lx_flags & LX_MSG_CONFIRM) { /* * See the Linux arp.7 and sendmsg.2 man pages. We can ignore * this option. */ lx_flags &= ~LX_MSG_CONFIRM; } if (lx_flags & LX_MSG_RST) { lx_unsupported("%s: unsupported socket flag MSG_RST", call); lx_flags &= ~LX_MSG_RST; } if (lx_flags & LX_MSG_ERRQUEUE) { lx_unsupported("%s: unsupported socket flag MSG_ERRQUEUE", call); lx_flags &= ~LX_MSG_ERRQUEUE; } if (lx_flags & LX_MSG_NOSIGNAL) { /* MSG_NOSIGNAL handled within each caller */ lx_flags &= ~LX_MSG_NOSIGNAL; } if (lx_flags & LX_MSG_MORE) { lx_unsupported("%s: unsupported socket flag MSG_MORE", call); lx_flags &= ~LX_MSG_MORE; } if (lx_flags & LX_MSG_WAITFORONE) { lx_unsupported("%s: unsupported socket flag MSG_WAITFORONE", call); lx_flags &= ~LX_MSG_WAITFORONE; } if (lx_flags & LX_MSG_FASTOPEN) { lx_unsupported("%s: unsupported socket flag MSG_FASTOPEN", call); lx_flags &= ~LX_MSG_FASTOPEN; } if (lx_flags & LX_MSG_CMSG_CLOEXEC) { lx_unsupported("%s: unsupported socket flag MSG_CMSG_CLOEXEC", call); lx_flags &= ~LX_MSG_CMSG_CLOEXEC; } if (lx_flags != 0) lx_unsupported("%s: unknown socket flag(s) 0x%x", call, lx_flags); return (solaris_flags); }
static int lx_sendmmsg(ulong_t *args) { lx_unsupported("Unsupported socketcall: sendmmsg\n."); return (-EINVAL); }
static int lx_getsockopt(ulong_t *args) { int sockfd = (int)args[0]; int level = (int)args[1]; int optname = (int)args[2]; void *optval = (void *)args[3]; int *optlenp = (int *)args[4]; int r; int orig_optname; lx_proto_opts_t *proto_opts; lx_debug("\tgetsockopt(%d, %d, %d, 0x%p, 0x%p)", sockfd, level, optname, optval, optlenp); /* * According to the Linux man page, a NULL optval should indicate * (as in Solaris) that no return value is expected. Instead, it * actually triggers an EFAULT error. */ if (optval == NULL) return (-EFAULT); if (level > LX_IPPROTO_RAW || level == LX_IPPROTO_UDP) return (-EOPNOTSUPP); if ((proto_opts = get_proto_opt_tbl(level)) == NULL) return (-ENOPROTOOPT); if (optname <= 0 || optname >= (proto_opts->maxentries)) { lx_unsupported("Unsupported sockopt %d, proto %d", optname, level); return (-ENOPROTOOPT); } if ((level == LX_IPPROTO_TCP) && (optname == LX_TCP_CORK)) { /* * We don't support TCP_CORK but some apps rely on it. So, * rather than return an error we just return 0. This * isn't exactly a lie, since this option really isn't set, * but it's not the whole truth either. Fortunately, we * aren't under oath. */ r = 0; if (uucopy(&r, optval, sizeof (int)) != 0) return (-errno); r = sizeof (int); if (uucopy(&r, optlenp, sizeof (int)) != 0) return (-errno); return (0); } if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PEERCRED)) { struct lx_ucred lx_ucred; ucred_t *ucp; /* * We don't support SO_PEERCRED, but we do have equivalent * functionality in getpeerucred() so invoke that here. */ /* Verify there's going to be enough room for the results. */ if (uucopy(optlenp, &r, sizeof (int)) != 0) return (-errno); if (r < sizeof (struct lx_ucred)) return (-EOVERFLOW); /* * We allocate a ucred_t ourselves rather than allow * getpeerucred() to do it for us because getpeerucred() * uses malloc(3C) and we'd rather use SAFE_ALLOCA(). */ if ((ucp = (ucred_t *)SAFE_ALLOCA(ucred_size())) == NULL) return (-ENOMEM); /* Get the credential for the remote end of this socket. */ if (getpeerucred(sockfd, &ucp) != 0) return (-errno); if (((lx_ucred.lxu_pid = ucred_getpid(ucp)) == -1) || ((lx_ucred.lxu_uid = ucred_geteuid(ucp)) == (uid_t)-1) || ((lx_ucred.lxu_gid = ucred_getegid(ucp)) == (gid_t)-1)) { return (-errno); } /* Copy out the results. */ if ((uucopy(&lx_ucred, optval, sizeof (lx_ucred))) != 0) return (-errno); r = sizeof (lx_ucred); if ((uucopy(&r, optlenp, sizeof (int))) != 0) return (-errno); return (0); } orig_optname = optname; optname = proto_opts->proto[optname]; if (optname == OPTNOTSUP) { lx_unsupported("unsupported sockopt %d, proto %d", orig_optname, level); return (-ENOPROTOOPT); } if (level == LX_SOL_SOCKET) level = SOL_SOCKET; r = getsockopt(sockfd, level, optname, optval, optlenp); if (r == 0 && level == SOL_SOCKET && optname == SO_TYPE) { /* translate our type back to Linux */ *(int *)optval = stol_socktype[(*(int *)optval)]; } return ((r < 0) ? -errno : r); }
static int lx_setsockopt(ulong_t *args) { int sockfd = (int)args[0]; int level = (int)args[1]; int optname = (int)args[2]; void *optval = (void *)args[3]; int optlen = (int)args[4]; int internal_opt; int r; lx_proto_opts_t *proto_opts; boolean_t converted = B_FALSE; lx_debug("\tsetsockopt(%d, %d, %d, 0x%p, %d)", sockfd, level, optname, optval, optlen); /* * The kernel returns EFAULT for all invalid addresses except NULL, * for which it returns EINVAL. Linux wants EFAULT for NULL too. */ if (optval == NULL) return (-EFAULT); if (level > LX_IPPROTO_RAW || level == LX_IPPROTO_UDP) return (-ENOPROTOOPT); if ((proto_opts = get_proto_opt_tbl(level)) == NULL) return (-ENOPROTOOPT); if (optname <= 0 || optname >= proto_opts->maxentries) { lx_unsupported("Unsupported sockopt %d, proto %d", optname, level); return (-ENOPROTOOPT); } if (level == LX_IPPROTO_IP) { /* * Ping sets this option to receive errors on raw sockets. * Currently we just ignore it to make ping happy. From the * Linux ip.7 man page: * For raw sockets, IP_RECVERR enables passing of all * received ICMP errors to the application. */ if (optname == LX_IP_RECVERR && strcmp(lx_cmd_name, "ping") == 0) return (0); if (optname == LX_IP_RECVERR && strcmp(lx_cmd_name, "traceroute") == 0) return (0); if (optname == LX_IP_MTU_DISCOVER && strcmp(lx_cmd_name, "traceroute") == 0) { /* * The native traceroute uses IP_DONTFRAG. Set this * and ignore LX_IP_MTU_DISCOVER for traceroute. */ optname = IP_DONTFRAG; converted = B_TRUE; } } else if (level == LX_SOL_SOCKET) { /* Linux ignores this option. */ if (optname == LX_SO_BSDCOMPAT) return (0); level = SOL_SOCKET; } else if (level == LX_IPPROTO_TCP) { if (optname == LX_TCP_CORK) { /* * TCP_CORK is a Linux-only option that instructs the * TCP stack not to send out partial frames. Illumos * doesn't include this option but some apps require * it. So, we do our best to emulate the option by * disabling TCP_NODELAY. If the app requests that we * disable TCP_CORK, we just ignore it since enabling * TCP_NODELAY may be overcompensating. */ optname = TCP_NODELAY; if (optlen != sizeof (int)) return (-EINVAL); if (uucopy(optval, &internal_opt, sizeof (int)) != 0) return (-errno); if (internal_opt == 0) return (0); internal_opt = 1; optval = &internal_opt; converted = B_TRUE; } } else if (level == LX_IPPROTO_RAW) { /* * Ping sets this option. Currently we just ignore it to make * ping happy. */ if (optname == LX_ICMP_FILTER && strcmp(lx_cmd_name, "ping") == 0) return (0); } if (!converted) { int orig_optname = optname; /* * Do a table lookup of the Illumos equivalent of the given * option. */ optname = proto_opts->proto[optname]; if (optname == OPTNOTSUP) { lx_unsupported("unsupported sockopt %d, proto %d", orig_optname, level); return (-ENOPROTOOPT); } } r = setsockopt(sockfd, level, optname, optval, optlen); return ((r < 0) ? -errno : r); }
static int getrlimit_common(int resource, uint64_t *rlim_curp, uint64_t *rlim_maxp) { char *rctl; rctlblk_t *rblk; int64_t cur = -1; boolean_t cur_inf = B_FALSE; int64_t max = -1; boolean_t max_inf = B_FALSE; if (resource < 0 || resource >= LX_RLIMIT_NLIMITS) return (-EINVAL); rctl = l_to_rctl[resource]; if (rctl == NULL) { switch (resource) { case LX_RLIMIT_LOCKS: case LX_RLIMIT_NICE: case LX_RLIMIT_RTPRIO: case LX_RLIMIT_RTTIME: *rlim_maxp = fake_limits[resource].rlim_max; *rlim_curp = fake_limits[resource].rlim_cur; return (0); default: lx_unsupported("Unsupported resource type %d\n", resource); return (-ENOTSUP); } } /* * The brand library cannot use malloc(3C) so we allocate the space * with SAFE_ALLOCA(). Thus there's no need to free it when we're done. */ rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size()); if (getrctl(rctl, NULL, rblk, RCTL_FIRST) == -1) return (-errno); do { switch (rctlblk_get_privilege(rblk)) { case RCPRIV_BASIC: cur = rctlblk_get_value(rblk); if (rctlblk_get_local_flags(rblk) & RCTL_LOCAL_MAXIMAL && rctlblk_get_global_flags(rblk) & RCTL_GLOBAL_INFINITE) cur_inf = B_TRUE; break; case RCPRIV_PRIVILEGED: max = rctlblk_get_value(rblk); if (rctlblk_get_local_flags(rblk) & RCTL_LOCAL_MAXIMAL && rctlblk_get_global_flags(rblk) & RCTL_GLOBAL_INFINITE) max_inf = B_TRUE; break; } } while (getrctl(rctl, rblk, rblk, RCTL_NEXT) != -1); /* Confirm we got values. For many rctls "basic" is not set. */ if (max == -1) max = LX_RLIM64_INFINITY; if (cur == -1) cur = max; if (cur_inf) *rlim_curp = LX_RLIM64_INFINITY; else *rlim_curp = cur; if (max_inf) *rlim_maxp = LX_RLIM64_INFINITY; else *rlim_maxp = max; return (0); }