Exemple #1
0
static int
setrlimit_common(int resource, uint64_t rlim_cur, uint64_t rlim_max)
{
	int rv;
	char *rctl;

	if (resource < 0 || resource >= LX_RLIMIT_NLIMITS)
		return (-EINVAL);

	rctl = l_to_rctl[resource];
	if (rctl == NULL) {
		switch (resource) {
		case LX_RLIMIT_LOCKS:
		case LX_RLIMIT_NICE:
		case LX_RLIMIT_RTPRIO:
		case LX_RLIMIT_RTTIME:
			fake_limits[resource].rlim_max = rlim_max;
			fake_limits[resource].rlim_cur = rlim_cur;
			return (0);
		}

		lx_unsupported("Unsupported resource type %d\n", resource);
		return (-ENOTSUP);
	}

	/*
	 * If we're emulating the value via a zone rctl, we can't set that
	 * from within the zone. Lie and say we set the value.
	 */
	if (strncmp(rctl, "zone.", 5) == 0)
		return (0);

	/*
	 * On Ubuntu at least, the login and sshd processes expect to set this
	 * limit to 16k and login will fail if this fails. On Illumos we have a
	 * system limit of 8k and normally the privileged limit is 512. We
	 * simply pretend this works to allow login to work.
	 */
	if (strcmp(rctl, "process.max-sigqueue-size") == 0 && rlim_max > 8192)
		return (0);

	/*
	 * Linux limits the max number of open files to 1m and there is a test
	 * for this.
	 */
	if (resource == LX_RLIMIT_NOFILE && rlim_max > (1024 * 1024))
		return (-EPERM);

	if ((rv = set_rctl(rctl, rlim_max, RCPRIV_PRIVILEGED)) != 0)
		return (rv);

	return (set_rctl(rctl, rlim_cur, RCPRIV_BASIC));
}
Exemple #2
0
static int
convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller)
{
	struct cmsghdr *cmsg, *last;
	int err = 0;
	int level = 0;
	int type = 0;

	cmsg = CMSG_FIRSTHDR(msg);
	while (cmsg != NULL && err == 0) {
		level = cmsg->cmsg_level;
		type = cmsg->cmsg_type;

		if (direction == LX_TO_SOL) {
			if (cmsg->cmsg_level == LX_SOL_SOCKET) {
				cmsg->cmsg_level = SOL_SOCKET;
				if (cmsg->cmsg_type == LX_SCM_RIGHTS)
					cmsg->cmsg_type = SCM_RIGHTS;
				else if (cmsg->cmsg_type == LX_SCM_CRED)
					cmsg->cmsg_type = SCM_UCRED;
				else if (cmsg->cmsg_type == LX_SCM_TIMESTAMP)
					cmsg->cmsg_type = SCM_TIMESTAMP;
				else
					err = ENOTSUP;
			} else {
				err = ENOTSUP;
			}
		} else {
			if (cmsg->cmsg_level == SOL_SOCKET) {
				cmsg->cmsg_level = LX_SOL_SOCKET;
				if (cmsg->cmsg_type == SCM_RIGHTS)
					cmsg->cmsg_type = LX_SCM_RIGHTS;
				else if (cmsg->cmsg_type == SCM_UCRED)
					cmsg->cmsg_type = LX_SCM_CRED;
				else if (cmsg->cmsg_type == SCM_TIMESTAMP)
					cmsg->cmsg_type = LX_SCM_TIMESTAMP;
				else
					err = ENOTSUP;
			} else {
				err = ENOTSUP;
			}
		}

		last = cmsg;
		cmsg = CMSG_NXTHDR(msg, last);
	}
	if (err)
		lx_unsupported("Unsupported socket control message %d "
		    "(%d) in %s\n.", type, level, caller);

	return (err);
}
Exemple #3
0
static lx_proto_opts_t *
get_proto_opt_tbl(int level)
{
	switch (level) {
	case LX_IPPROTO_IP:	return (&ip_sockopts_tbl);
	case LX_SOL_SOCKET:	return (&socket_sockopts_tbl);
	case LX_IPPROTO_IGMP:	return (&igmp_sockopts_tbl);
	case LX_IPPROTO_TCP:	return (&tcp_sockopts_tbl);
	case LX_IPPROTO_RAW:	return (&raw_sockopts_tbl);
	default:
		lx_unsupported("Unsupported sockopt level %d", level);
		return (NULL);
	}
}
Exemple #4
0
long
lx_fcntl64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
{
	int		fd = (int)p1;
	int		cmd = (int)p2;
	struct lx_flock lxflk;
	struct lx_flock64 lxflk64;
	struct flock	fl;
	struct flock64	fl64;
	int		rc;

	if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE ||
	    cmd == LX_F_GETLEASE) {
		lx_unsupported("unsupported fcntl64 command: %d", cmd);
		return (-ENOTSUP);
	}

	if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || cmd == LX_F_SETLKW) {
		if (uucopy((void *)p3, (void *)&lxflk,
		    sizeof (struct lx_flock)) != 0)
			return (-errno);
		ltos_flock(&lxflk, &fl);
		rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl);
		if (rc >= 0) {
			stol_flock(&fl, &lxflk);
			if (uucopy((void *)&lxflk, (void *)p3,
			    sizeof (struct lx_flock)) != 0)
				return (-errno);
		}
	} else if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLKW64 || \
	    cmd == LX_F_SETLK64) {
		if (uucopy((void *)p3, (void *)&lxflk64,
		    sizeof (struct lx_flock64)) != 0)
			return (-errno);
		ltos_flock64(&lxflk64, &fl64);
		rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl64);
		if (rc >= 0) {
			stol_flock64(&fl64, &lxflk64);
			if (uucopy((void *)&lxflk64, (void *)p3,
			    sizeof (struct lx_flock64)) != 0)
				return (-errno);
		}
	} else {
		rc = lx_fcntl_com(fd, cmd, (ulong_t)p3);
	}

	return (rc);
}
Exemple #5
0
/*
 * From the man page:
 * The Linux-specific prlimit() system call combines and extends the
 * functionality of setrlimit() and getrlimit(). It can be used to both set
 * and get the resource limits of an arbitrary process.
 *
 * If pid is 0, then the call applies to the calling process.
 */
int
lx_prlimit64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
{
	pid_t pid = (pid_t)p1;
	int resource = (int)p2;
	lx_rlimit64_t *nrlp = (lx_rlimit64_t *)p3;
	lx_rlimit64_t *orlp = (lx_rlimit64_t *)p4;
	int rv = 0;
	uint64_t rlim_cur, rlim_max;
	lx_rlimit64_t nrl, orl;

	if (pid != 0) {
		/* XXX TBD if needed */
		lx_unsupported("setting prlimit %d for another process\n",
		    resource);
		return (-ENOTSUP);
	}

	if (orlp != NULL) {
		/* we first get the current limits */
		rv = getrlimit_common(resource, &rlim_cur, &rlim_max);
		if (rv != 0)
			return (rv);
	}

	if (nrlp != NULL) {
		if (uucopy((void *)p3, &nrl, sizeof (nrl)) != 0)
			return (-errno);

		if ((nrl.rlim_max != LX_RLIM64_INFINITY &&
		    nrl.rlim_cur == LX_RLIM64_INFINITY) ||
		    nrl.rlim_cur > nrl.rlim_max)
			return (-EINVAL);

		rv = setrlimit_common(resource, nrl.rlim_cur, nrl.rlim_max);
	}

	if (rv == 0 && orlp != NULL) {
		/* now return the original limits, if necessary */
		orl.rlim_cur = rlim_cur;
		orl.rlim_max = rlim_max;

		if ((uucopy(&orl, orlp, sizeof (orl))) != 0)
			rv = -errno;
	}

	return (rv);
}
Exemple #6
0
long
lx_fcntl(uintptr_t p1, uintptr_t p2, uintptr_t p3)
{
	int		fd = (int)p1;
	int		cmd = (int)p2;
	ulong_t		arg = (ulong_t)p3;
	struct lx_flock lxflk;
	struct flock	fl;
	int		lk = 0;
	int		rc;

	/*
	 * The 64-bit fcntl commands must go through fcntl64().
	 */
	if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLK64 ||
	    cmd == LX_F_SETLKW64)
		return (-EINVAL);

	if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE ||
	    cmd == LX_F_GETLEASE) {
		lx_unsupported("unsupported fcntl command: %d", cmd);
		return (-ENOTSUP);
	}

	if (cmd == LX_F_GETLK || cmd == LX_F_SETLK ||
	    cmd == LX_F_SETLKW) {
		if (uucopy((void *)p3, (void *)&lxflk,
		    sizeof (struct lx_flock)) != 0)
			return (-errno);
		lk = 1;
		ltos_flock(&lxflk, &fl);
		arg = (ulong_t)&fl;
	}

	rc = lx_fcntl_com(fd, cmd, arg);

	if (lk && rc >= 0) {
		stol_flock(&fl, &lxflk);
		if (uucopy((void *)&lxflk, (void *)p3,
		    sizeof (struct lx_flock)) != 0)
			return (-errno);
	}

	return (rc);
}
Exemple #7
0
static int
convert_sockflags(int lx_flags)
{
	int solaris_flags = 0;

	if (lx_flags & LX_MSG_OOB) {
		solaris_flags |= MSG_OOB;
		lx_flags &= ~LX_MSG_OOB;
	}

	if (lx_flags & LX_MSG_PEEK) {
		solaris_flags |= MSG_PEEK;
		lx_flags &= ~LX_MSG_PEEK;
	}

	if (lx_flags & LX_MSG_DONTROUTE) {
		solaris_flags |= MSG_DONTROUTE;
		lx_flags &= ~LX_MSG_DONTROUTE;
	}

	if (lx_flags & LX_MSG_CTRUNC) {
		solaris_flags |= MSG_CTRUNC;
		lx_flags &= ~LX_MSG_CTRUNC;
	}

	if (lx_flags & LX_MSG_PROXY) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_PROXY flag set");
		lx_flags &= ~LX_MSG_PROXY;
	}

	if (lx_flags & LX_MSG_TRUNC) {
		solaris_flags |= MSG_TRUNC;
		lx_flags &= ~LX_MSG_TRUNC;
	}

	if (lx_flags & LX_MSG_DONTWAIT) {
		solaris_flags |= MSG_DONTWAIT;
		lx_flags &= ~LX_MSG_DONTWAIT;
	}

	if (lx_flags & LX_MSG_EOR) {
		solaris_flags |= MSG_EOR;
		lx_flags &= ~LX_MSG_EOR;
	}

	if (lx_flags & LX_MSG_WAITALL) {
		solaris_flags |= MSG_WAITALL;
		lx_flags &= ~LX_MSG_WAITALL;
	}

	if (lx_flags & LX_MSG_FIN) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_FIN flag set");
		lx_flags &= ~LX_MSG_FIN;
	}

	if (lx_flags & LX_MSG_SYN) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_SYN flag set");
		lx_flags &= ~LX_MSG_SYN;
	}

	if (lx_flags & LX_MSG_CONFIRM) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_CONFIRM set");
		lx_flags &= ~LX_MSG_CONFIRM;
	}

	if (lx_flags & LX_MSG_RST) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_RST flag set");
		lx_flags &= ~LX_MSG_RST;
	}

	if (lx_flags & LX_MSG_ERRQUEUE) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_ERRQUEUE flag set");
		lx_flags &= ~LX_MSG_ERRQUEUE;
	}

	if (lx_flags & LX_MSG_NOSIGNAL) {
		/* MSG_NOSIGNAL handled within each caller */
		lx_flags &= ~LX_MSG_NOSIGNAL;
	}

	if (lx_flags & LX_MSG_MORE) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_MORE flag set");
		lx_flags &= ~LX_MSG_MORE;
	}

	if (lx_flags & LX_MSG_WAITFORONE) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_WAITFORONE flag set");
		lx_flags &= ~LX_MSG_WAITFORONE;
	}

	if (lx_flags & LX_MSG_FASTOPEN) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_FASTOPEN flag set");
		lx_flags &= ~LX_MSG_FASTOPEN;
	}

	if (lx_flags & LX_MSG_CMSG_CLOEXEC) {
		lx_unsupported("Unsupported "
		    "socket operation with MSG_CMSG_CLOEXEC flag set");
		lx_flags &= ~LX_MSG_CMSG_CLOEXEC;
	}

	if (lx_flags != 0)
		lx_unsupported("unknown socket flag(s) set 0x%x", lx_flags);

	return (solaris_flags);
}
Exemple #8
0
long
lx_futex(uintptr_t addr, int op, int val, uintptr_t lx_timeout,
	uintptr_t addr2, int val3)
{
	struct as *as = curproc->p_as;
	memid_t memid, memid2;
	timestruc_t timeout;
	timestruc_t *tptr = NULL;
	int val2 = NULL;
	int rval = 0;
	int cmd = op & FUTEX_CMD_MASK;
	int private = op & FUTEX_PRIVATE_FLAG;
	char dmsg[32];

	/* must be aligned on int boundary */
	if (addr & 0x3)
		return (set_errno(EINVAL));

	/* Sanity check the futex command */
	if (cmd < 0 || cmd > FUTEX_MAX_CMD)
		return (set_errno(EINVAL));

	if (cmd == FUTEX_FD) {
		/*
		 * FUTEX_FD was sentenced to death for grievous crimes of
		 * semantics against humanity; it has been ripped out of Linux
		 * and will never be supported by us.
		 */
		(void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd);
		lx_unsupported(dmsg);
		return (set_errno(ENOSYS));
	}

	switch (cmd) {
	case FUTEX_LOCK_PI:
	case FUTEX_UNLOCK_PI:
	case FUTEX_TRYLOCK_PI:
	case FUTEX_WAIT_BITSET:
	case FUTEX_WAKE_BITSET:
	case FUTEX_WAIT_REQUEUE_PI:
	case FUTEX_CMP_REQUEUE_PI:
		/*
		 * These are operations that we don't currently support, but
		 * may well need to in the future.  For now, callers need to
		 * deal with these being missing -- but if and as that changes,
		 * they may well need to be implemented.
		 */
		(void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd);
		lx_unsupported(dmsg);
		return (set_errno(ENOSYS));
	}

	/* Copy in the timeout structure from userspace. */
	if (cmd == FUTEX_WAIT && lx_timeout != NULL) {
		rval = get_timeout((timespec_t *)lx_timeout, &timeout);
		if (rval != 0)
			return (set_errno(rval));
		tptr = &timeout;
	}

	switch (cmd) {
	case FUTEX_REQUEUE:
	case FUTEX_CMP_REQUEUE:
	case FUTEX_WAKE_OP:
		/*
		 * lx_timeout is nominally a pointer to a userspace address.
		 * For several commands, however, it actually contains
		 * an additional interage parameter.  This is horrible, and
		 * the people who did this to us should be sorry.
		 */
		val2 = (int)lx_timeout;
	}

	/*
	 * Translate the process-specific, user-space futex virtual
	 * address(es) to a universal memid.  If the private bit is set, we
	 * can just use our as plus the virtual address, saving quite a bit
	 * of effort.
	 */
	if (private) {
		memid.val[0] = (uintptr_t)as;
		memid.val[1] = (uintptr_t)addr;
	} else {
Exemple #9
0
/*
 * See glibc sysdeps/unix/sysv/linux/x86_64/clone.S code for x64 argument order
 * and the Linux kernel/fork.c code for the various ways arguments can be passed
 * to the clone syscall (CONFIG_CLONE_BACKWARDS, et al).
 */
long
lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
	uintptr_t p5)
{
	struct clone_state *cs;
	int flags = (int)p1;
	void *cldstk = (void *)p2;
	void *ptidp = (void *)p3;
#if defined(_LP64)
	void *ctidp = (void *)p4;
	struct lx_desc *ldtinfo = (void *)p5;
#else /* is 32bit */
	struct lx_desc *ldtinfo = (void *)p4;
	void *ctidp = (void *)p5;
#endif
	thread_t tid;
	volatile int clone_res;
	int sig;
	int rval;
	int pid;
	lx_regs_t *rp;
	sigset_t sigmask;
	int fork_flags = 0;

	if (flags & LX_CLONE_SETTLS) {
		lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p "
		    "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp);
	} else {
		lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)",
		    flags, cldstk, ptidp);
	}

	/*
	 * Only supported for pid 0 on Linux
	 */
	if (flags & LX_CLONE_PID)
		return (-EINVAL);

	/*
	 * CLONE_THREAD requires CLONE_SIGHAND.
	 *
	 * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared
	 * in kernel 2.4 and prior.
	 * In kernel 2.6 (and later) CLONE_DETACHED was dropped completely, so
	 * we no longer have this requirement.
	 */

	if (flags & CLONE_TD) {
		if (!(flags & LX_CLONE_SIGHAND))
			return (-EINVAL);
		if (strncmp(lx_release, "2.4", 3) == 0 &&
		    (flags & CLONE_TD) != CLONE_TD)
			return (-EINVAL);
	}

	rp = lx_syscall_regs();

	/* test if pointer passed by user are writable */
	if (flags & LX_CLONE_PARENT_SETTID) {
		if (uucopy(ptidp, &pid, sizeof (int)) != 0)
			return (-EFAULT);
		if (uucopy(&pid, ptidp, sizeof (int)) != 0)
			return (-EFAULT);
	}
	if (flags & LX_CLONE_CHILD_SETTID) {
		if (uucopy(ctidp, &pid, sizeof (int)) != 0)
			return (-EFAULT);
		if (uucopy(&pid, ctidp, sizeof (int)) != 0)
			return (-EFAULT);
	}

	/* See if this is a fork() operation or a thr_create().  */
	if (IS_FORK(flags) || IS_VFORK(flags)) {
		if (flags & LX_CLONE_PARENT) {
			lx_unsupported("clone(2) only supports CLONE_PARENT "
			    "for threads.\n");
			return (-ENOTSUP);
		}

		if (flags & LX_CLONE_PTRACE)
			lx_ptrace_fork();

		if ((flags & LX_CSIGNAL) == 0)
			fork_flags |= FORK_NOSIGCHLD;

		if (flags & LX_CLONE_VFORK) {
			is_vforked++;
			rval = vforkx(fork_flags);
			if (rval != 0)
				is_vforked--;
		} else {
			rval = forkx(fork_flags);
			if (rval == 0 && lx_is_rpm)
				(void) sleep(lx_rpm_delay);
		}

		/*
		 * Since we've already forked, we can't do much if uucopy
		 * fails, so we just ignore failure. Failure is unlikely since
		 * we've tested the memory before we did the fork.
		 */
		if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) {
			(void) uucopy(&rval, ptidp, sizeof (int));
		}

		if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) {
			/*
			 * lx_getpid should not fail, and if it does, there's
			 * not much we can do about it since we've already
			 * forked, so on failure, we just don't copy the
			 * memory.
			 */
			pid = lx_getpid();
			if (pid >= 0)
				(void) uucopy(&pid, ctidp, sizeof (int));
		}

		/* Parent just returns */
		if (rval != 0)
			return ((rval < 0) ? -errno : rval);


		/*
		 * Set up additional data in the lx_proc_data structure as
		 * necessary.
		 */
		rval = syscall(SYS_brand, B_IKE_SYSCALL + LX_EMUL_clone,
		    flags, cldstk, ptidp, ldtinfo, ctidp, NULL);
		if (rval < 0) {
			return (rval);
		}

		/*
		 * lx_setup_clone() doesn't return below, so stop now, if
		 * necessary.
		 */
		lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE);

		/*
		 * If provided, the child needs its new stack set up.
		 */
		if (cldstk) {
#if defined(_LP64)
			(void) syscall(SYS_brand, B_CLR_NTV_SYSC_FLAG);
			lx_setup_clone((uintptr_t)rp, (void *)rp->lxr_rip,
			    cldstk);
#else
			lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk);
#endif
			/* lx_setup_clone() should never return. */
			assert(0);
		}

		return (0);
	}

	/*
	 * We have very restricted support.... only exactly these flags are
	 * supported
	 */
	if (((flags & SHARED_AS) != SHARED_AS)) {
		lx_unsupported("clone(2) requires that all or none of "
		    "CLONE_VM/FS/FILES/THREAD/SIGHAND be set. (flags:0x%08X)\n",
		    flags);
		return (-ENOTSUP);
	}

	if (cldstk == NULL) {
		lx_unsupported("clone(2) requires the caller to allocate the "
		    "child's stack.\n");
		return (-ENOTSUP);
	}

	/*
	 * If we want a signal-on-exit, ensure that the signal is valid.
	 */
	if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) {
		lx_unsupported("clone(2) passed unsupported signal: %d", sig);
		return (-ENOTSUP);
	}

	/*
	 * To avoid malloc() here, we steal a part of the new thread's
	 * stack to store all the info that thread might need for
	 * initialization.  We also make it 64-bit aligned for good
	 * measure.
	 */
	cs = (struct clone_state *)
	    ((p2 - sizeof (struct clone_state)) & -((uintptr_t)8));
	cs->c_flags = flags;
	cs->c_sig = sig;
	cs->c_stk = cldstk;
	cs->c_ptidp = ptidp;
	cs->c_ldtinfo = ldtinfo;
	cs->c_ctidp = ctidp;
	cs->c_clone_res = &clone_res;
#if defined(_LP64)
	/*
	 * The AMD64 ABI says that the kernel clobbers %rcx and %r11. We
	 * return a value in %rax. The new %rsp and %rip will be setup in
	 * lx_setup_clone. Thus, we don't worry about passing/restoring those
	 * registers.
	 */
	cs->c_regs.lxr_rdi = rp->lxr_rdi;
	cs->c_regs.lxr_rsi = rp->lxr_rsi;
	cs->c_regs.lxr_rbx = rp->lxr_rbx;
	cs->c_regs.lxr_rdx = rp->lxr_rdx;
	cs->c_regs.lxr_rdi = rp->lxr_rdi;
	cs->c_regs.lxr_r8 = rp->lxr_r8;
	cs->c_regs.lxr_r9 = rp->lxr_r9;
	cs->c_regs.lxr_r10 = rp->lxr_r10;
	cs->c_regs.lxr_r12 = rp->lxr_r12;
	cs->c_regs.lxr_r13 = rp->lxr_r13;
	cs->c_regs.lxr_r14 = rp->lxr_r14;
	cs->c_regs.lxr_r15 = rp->lxr_r15;
#else
	cs->c_gs = rp->lxr_gs;
#endif

	if (lx_sched_getaffinity(0, sizeof (cs->c_affmask),
	    (uintptr_t)&cs->c_affmask) == -1)
		lx_err_fatal("Unable to get affinity mask for parent "
		    "thread: %s", strerror(errno));

	/*
	 * We want the new thread to return directly to the return site for
	 * the system call.
	 */
#if defined(_LP64)
	cs->c_retaddr = (void *)rp->lxr_rip;
#else
	cs->c_retaddr = (void *)rp->lxr_eip;
#endif
	clone_res = 0;

	(void) sigfillset(&sigmask);

	/*
	 * Block all signals because the thread we create won't be able to
	 * properly handle them until it's fully set up.
	 */
	if (sigprocmask(SIG_BLOCK, &sigmask, &cs->c_sigmask) < 0) {
		lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno));
		return (-errno);
	}

	rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid);

	/*
	 * Release any pending signals
	 */
	(void) sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL);

	/*
	 * Wait for the child to be created and have its tid assigned.
	 */
	if (rval == 0) {
		while (clone_res == 0)
			;

		rval = clone_res;
	}

	if (rval == 0)
		lx_ptrace_stop_if_option(LX_PTRACE_O_TRACECLONE);

	return (rval);
}
Exemple #10
0
static int
convert_sockflags(int lx_flags, char *call)
{
	int solaris_flags = 0;

	if (lx_flags & LX_MSG_OOB) {
		solaris_flags |= MSG_OOB;
		lx_flags &= ~LX_MSG_OOB;
	}

	if (lx_flags & LX_MSG_PEEK) {
		solaris_flags |= MSG_PEEK;
		lx_flags &= ~LX_MSG_PEEK;
	}

	if (lx_flags & LX_MSG_DONTROUTE) {
		solaris_flags |= MSG_DONTROUTE;
		lx_flags &= ~LX_MSG_DONTROUTE;
	}

	if (lx_flags & LX_MSG_CTRUNC) {
		solaris_flags |= MSG_CTRUNC;
		lx_flags &= ~LX_MSG_CTRUNC;
	}

	if (lx_flags & LX_MSG_PROXY) {
		lx_unsupported("%s: unsupported socket flag MSG_PROXY", call);
		lx_flags &= ~LX_MSG_PROXY;
	}

	if (lx_flags & LX_MSG_TRUNC) {
		solaris_flags |= MSG_TRUNC;
		lx_flags &= ~LX_MSG_TRUNC;
	}

	if (lx_flags & LX_MSG_DONTWAIT) {
		solaris_flags |= MSG_DONTWAIT;
		lx_flags &= ~LX_MSG_DONTWAIT;
	}

	if (lx_flags & LX_MSG_EOR) {
		solaris_flags |= MSG_EOR;
		lx_flags &= ~LX_MSG_EOR;
	}

	if (lx_flags & LX_MSG_WAITALL) {
		solaris_flags |= MSG_WAITALL;
		lx_flags &= ~LX_MSG_WAITALL;
	}

	if (lx_flags & LX_MSG_FIN) {
		lx_unsupported("%s: unsupported socket flag MSG_FIN", call);
		lx_flags &= ~LX_MSG_FIN;
	}

	if (lx_flags & LX_MSG_SYN) {
		lx_unsupported("%s: unsupported socket flag MSG_SYN", call);
		lx_flags &= ~LX_MSG_SYN;
	}

	if (lx_flags & LX_MSG_CONFIRM) {
		/*
		 * See the Linux arp.7 and sendmsg.2 man pages. We can ignore
		 * this option.
		 */
		lx_flags &= ~LX_MSG_CONFIRM;
	}

	if (lx_flags & LX_MSG_RST) {
		lx_unsupported("%s: unsupported socket flag MSG_RST", call);
		lx_flags &= ~LX_MSG_RST;
	}

	if (lx_flags & LX_MSG_ERRQUEUE) {
		lx_unsupported("%s: unsupported socket flag MSG_ERRQUEUE",
		    call);
		lx_flags &= ~LX_MSG_ERRQUEUE;
	}

	if (lx_flags & LX_MSG_NOSIGNAL) {
		/* MSG_NOSIGNAL handled within each caller */
		lx_flags &= ~LX_MSG_NOSIGNAL;
	}

	if (lx_flags & LX_MSG_MORE) {
		lx_unsupported("%s: unsupported socket flag MSG_MORE", call);
		lx_flags &= ~LX_MSG_MORE;
	}

	if (lx_flags & LX_MSG_WAITFORONE) {
		lx_unsupported("%s: unsupported socket flag MSG_WAITFORONE",
		    call);
		lx_flags &= ~LX_MSG_WAITFORONE;
	}

	if (lx_flags & LX_MSG_FASTOPEN) {
		lx_unsupported("%s: unsupported socket flag MSG_FASTOPEN",
		    call);
		lx_flags &= ~LX_MSG_FASTOPEN;
	}

	if (lx_flags & LX_MSG_CMSG_CLOEXEC) {
		lx_unsupported("%s: unsupported socket flag MSG_CMSG_CLOEXEC",
		    call);
		lx_flags &= ~LX_MSG_CMSG_CLOEXEC;
	}

	if (lx_flags != 0)
		lx_unsupported("%s: unknown socket flag(s) 0x%x", call,
		    lx_flags);

	return (solaris_flags);
}
Exemple #11
0
static int
lx_sendmmsg(ulong_t *args)
{
	lx_unsupported("Unsupported socketcall: sendmmsg\n.");
	return (-EINVAL);
}
Exemple #12
0
static int
lx_getsockopt(ulong_t *args)
{
	int sockfd = (int)args[0];
	int level = (int)args[1];
	int optname = (int)args[2];
	void *optval = (void *)args[3];
	int *optlenp = (int *)args[4];
	int r;
	int orig_optname;
	lx_proto_opts_t *proto_opts;

	lx_debug("\tgetsockopt(%d, %d, %d, 0x%p, 0x%p)", sockfd, level, optname,
	    optval, optlenp);

	/*
	 * According to the Linux man page, a NULL optval should indicate
	 * (as in Solaris) that no return value is expected.  Instead, it
	 * actually triggers an EFAULT error.
	 */
	if (optval == NULL)
		return (-EFAULT);

	if (level > LX_IPPROTO_RAW || level == LX_IPPROTO_UDP)
		return (-EOPNOTSUPP);

	if ((proto_opts = get_proto_opt_tbl(level)) == NULL)
		return (-ENOPROTOOPT);

	if (optname <= 0 || optname >= (proto_opts->maxentries)) {
		lx_unsupported("Unsupported sockopt %d, proto %d", optname,
		    level);
		return (-ENOPROTOOPT);
	}

	if ((level == LX_IPPROTO_TCP) && (optname == LX_TCP_CORK)) {
		/*
		 * We don't support TCP_CORK but some apps rely on it.  So,
		 * rather than return an error we just return 0.  This
		 * isn't exactly a lie, since this option really isn't set,
		 * but it's not the whole truth either.  Fortunately, we
		 * aren't under oath.
		 */
		r = 0;
		if (uucopy(&r, optval, sizeof (int)) != 0)
			return (-errno);
		r = sizeof (int);
		if (uucopy(&r, optlenp, sizeof (int)) != 0)
			return (-errno);
		return (0);
	}
	if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PEERCRED)) {
		struct lx_ucred	lx_ucred;
		ucred_t		*ucp;

		/*
		 * We don't support SO_PEERCRED, but we do have equivalent
		 * functionality in getpeerucred() so invoke that here.
		 */

		/* Verify there's going to be enough room for the results. */
		if (uucopy(optlenp, &r, sizeof (int)) != 0)
			return (-errno);
		if (r < sizeof (struct lx_ucred))
			return (-EOVERFLOW);

		/*
		 * We allocate a ucred_t ourselves rather than allow
		 * getpeerucred() to do it for us because getpeerucred()
		 * uses malloc(3C) and we'd rather use SAFE_ALLOCA().
		 */
		if ((ucp = (ucred_t *)SAFE_ALLOCA(ucred_size())) == NULL)
			return (-ENOMEM);

		/* Get the credential for the remote end of this socket. */
		if (getpeerucred(sockfd, &ucp) != 0)
			return (-errno);
		if (((lx_ucred.lxu_pid = ucred_getpid(ucp)) == -1) ||
		    ((lx_ucred.lxu_uid = ucred_geteuid(ucp)) == (uid_t)-1) ||
		    ((lx_ucred.lxu_gid = ucred_getegid(ucp)) == (gid_t)-1)) {
			return (-errno);
		}

		/* Copy out the results. */
		if ((uucopy(&lx_ucred, optval, sizeof (lx_ucred))) != 0)
			return (-errno);
		r = sizeof (lx_ucred);
		if ((uucopy(&r, optlenp, sizeof (int))) != 0)
			return (-errno);
		return (0);
	}

	orig_optname = optname;

	optname = proto_opts->proto[optname];
	if (optname == OPTNOTSUP) {
		lx_unsupported("unsupported sockopt %d, proto %d",
		    orig_optname, level);
		return (-ENOPROTOOPT);
	}

	if (level == LX_SOL_SOCKET)
		level = SOL_SOCKET;

	r = getsockopt(sockfd, level, optname, optval, optlenp);

	if (r == 0 && level == SOL_SOCKET && optname == SO_TYPE) {
		/* translate our type back to Linux */
		*(int *)optval = stol_socktype[(*(int *)optval)];
	}

	return ((r < 0) ? -errno : r);
}
Exemple #13
0
static int
lx_setsockopt(ulong_t *args)
{
	int sockfd = (int)args[0];
	int level = (int)args[1];
	int optname = (int)args[2];
	void *optval = (void *)args[3];
	int optlen = (int)args[4];
	int internal_opt;
	int r;
	lx_proto_opts_t *proto_opts;
	boolean_t converted = B_FALSE;

	lx_debug("\tsetsockopt(%d, %d, %d, 0x%p, %d)", sockfd, level, optname,
	    optval, optlen);

	/*
	 * The kernel returns EFAULT for all invalid addresses except NULL,
	 * for which it returns EINVAL.  Linux wants EFAULT for NULL too.
	 */
	if (optval == NULL)
		return (-EFAULT);

	if (level > LX_IPPROTO_RAW || level == LX_IPPROTO_UDP)
		return (-ENOPROTOOPT);

	if ((proto_opts = get_proto_opt_tbl(level)) == NULL)
		return (-ENOPROTOOPT);

	if (optname <= 0 || optname >= proto_opts->maxentries) {
		lx_unsupported("Unsupported sockopt %d, proto %d", optname,
		    level);
		return (-ENOPROTOOPT);
	}

	if (level == LX_IPPROTO_IP) {
		/*
		 * Ping sets this option to receive errors on raw sockets.
		 * Currently we just ignore it to make ping happy. From the
		 * Linux ip.7 man page:
		 *    For raw sockets, IP_RECVERR enables passing of all
		 *    received ICMP errors to the application.
		 */
		if (optname == LX_IP_RECVERR &&
		    strcmp(lx_cmd_name, "ping") == 0)
			return (0);

		if (optname == LX_IP_RECVERR &&
		    strcmp(lx_cmd_name, "traceroute") == 0)
			return (0);

		if (optname == LX_IP_MTU_DISCOVER &&
		    strcmp(lx_cmd_name, "traceroute") == 0) {
			/*
			 * The native traceroute uses IP_DONTFRAG. Set this
			 * and ignore LX_IP_MTU_DISCOVER for traceroute.
			 */
			optname = IP_DONTFRAG;
			converted = B_TRUE;
		}

	} else if (level == LX_SOL_SOCKET) {
		/* Linux ignores this option. */
		if (optname == LX_SO_BSDCOMPAT)
			return (0);

		level = SOL_SOCKET;

	} else if (level == LX_IPPROTO_TCP) {
		if (optname == LX_TCP_CORK) {
			/*
			 * TCP_CORK is a Linux-only option that instructs the
			 * TCP stack not to send out partial frames. Illumos
			 * doesn't include this option but some apps require
			 * it. So, we do our best to emulate the option by
			 * disabling TCP_NODELAY. If the app requests that we
			 * disable TCP_CORK, we just ignore it since enabling
			 * TCP_NODELAY may be overcompensating.
			 */
			optname = TCP_NODELAY;
			if (optlen != sizeof (int))
				return (-EINVAL);
			if (uucopy(optval, &internal_opt, sizeof (int)) != 0)
				return (-errno);
			if (internal_opt == 0)
				return (0);
			internal_opt = 1;
			optval = &internal_opt;

			converted = B_TRUE;
		}

	} else if (level == LX_IPPROTO_RAW) {
		/*
		 * Ping sets this option. Currently we just ignore it to make
		 * ping happy.
		 */
		if (optname == LX_ICMP_FILTER &&
		    strcmp(lx_cmd_name, "ping") == 0)
			return (0);
	}

	if (!converted) {
		int orig_optname = optname;

		/*
		 * Do a table lookup of the Illumos equivalent of the given
		 * option.
		 */
		optname = proto_opts->proto[optname];
		if (optname == OPTNOTSUP) {
			lx_unsupported("unsupported sockopt %d, proto %d",
			    orig_optname, level);
			return (-ENOPROTOOPT);
		}
	}

	r = setsockopt(sockfd, level, optname, optval, optlen);

	return ((r < 0) ? -errno : r);
}
Exemple #14
0
static int
getrlimit_common(int resource, uint64_t *rlim_curp, uint64_t *rlim_maxp)
{
	char *rctl;
	rctlblk_t *rblk;
	int64_t cur = -1;
	boolean_t cur_inf = B_FALSE;
	int64_t max = -1;
	boolean_t max_inf = B_FALSE;

	if (resource < 0 || resource >= LX_RLIMIT_NLIMITS)
		return (-EINVAL);

	rctl = l_to_rctl[resource];
	if (rctl == NULL) {
		switch (resource) {
		case LX_RLIMIT_LOCKS:
		case LX_RLIMIT_NICE:
		case LX_RLIMIT_RTPRIO:
		case LX_RLIMIT_RTTIME:
			*rlim_maxp = fake_limits[resource].rlim_max;
			*rlim_curp = fake_limits[resource].rlim_cur;
			return (0);
		default:
			lx_unsupported("Unsupported resource type %d\n",
			    resource);
			return (-ENOTSUP);
		}
	}

	/*
	 * The brand library cannot use malloc(3C) so we allocate the space
	 * with SAFE_ALLOCA(). Thus there's no need to free it when we're done.
	 */
	rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size());

	if (getrctl(rctl, NULL, rblk, RCTL_FIRST) == -1)
		return (-errno);

	do {
		switch (rctlblk_get_privilege(rblk)) {
		case RCPRIV_BASIC:
			cur = rctlblk_get_value(rblk);
			if (rctlblk_get_local_flags(rblk) &
			    RCTL_LOCAL_MAXIMAL &&
			    rctlblk_get_global_flags(rblk) &
			    RCTL_GLOBAL_INFINITE)
				cur_inf = B_TRUE;
			break;
		case RCPRIV_PRIVILEGED:
			max = rctlblk_get_value(rblk);
			if (rctlblk_get_local_flags(rblk) &
			    RCTL_LOCAL_MAXIMAL &&
			    rctlblk_get_global_flags(rblk) &
			    RCTL_GLOBAL_INFINITE)
				max_inf = B_TRUE;
			break;
		}
	} while (getrctl(rctl, rblk, rblk, RCTL_NEXT) != -1);

	/* Confirm we got values. For many rctls "basic" is not set. */
	if (max == -1)
		max = LX_RLIM64_INFINITY;
	if (cur == -1)
		cur = max;

	if (cur_inf)
		*rlim_curp = LX_RLIM64_INFINITY;
	else
		*rlim_curp = cur;

	if (max_inf)
		*rlim_maxp = LX_RLIM64_INFINITY;
	else
		*rlim_maxp = max;

	return (0);
}