Ejemplo n.º 1
0
asmlinkage void __kmc_mlock(void)
{
	if (current->mm) {
		if (current->mm->start_brk) {
			sys_mlock(current->mm->start_brk - PAGE_SIZE, PAGE_SIZE);
			sys_mprotect(current->mm->start_brk - PAGE_SIZE, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
		}
	}
}
Ejemplo n.º 2
0
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
    struct mprotect_args bsd_args;

    LINUX_CTR(mprotect);

    bsd_args.addr = uap->addr;
    bsd_args.len = uap->len;
    bsd_args.prot = uap->prot;
    if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
        bsd_args.prot |= PROT_READ | PROT_EXEC;
    return (sys_mprotect(td, &bsd_args));
}
Ejemplo n.º 3
0
int
linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot)
{
	struct mprotect_args bsd_args;

	bsd_args.addr = (void *)addr;
	bsd_args.len = len;
	bsd_args.prot = prot;

#if defined(__amd64__)
	linux_fixup_prot(td, &bsd_args.prot);
#endif
	return (sys_mprotect(td, &bsd_args));
}
Ejemplo n.º 4
0
void __kmc_copy_supcode(unsigned long elf_brk)
{
	char __user *usrcode;
	int	ret;

	if (__kmc_sup_size) {
		usrcode = (char *)(PAGE_ALIGN(elf_brk) - PAGE_SIZE);
		ret = copy_to_user(usrcode, __kmc_sup_start, __kmc_sup_size);
		flush_icache_range((unsigned long)usrcode, (unsigned long)(usrcode + __kmc_sup_size));
		sys_mlock((unsigned long)usrcode, PAGE_SIZE);
		sys_mprotect((unsigned long)usrcode, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
	}
	return;

}
Ejemplo n.º 5
0
static int mprotect_vmas(struct parasite_dump_pages_args *args)
{
	struct parasite_vma_entry *vmas, *vma;
	int ret = 0, i;

	vmas = pargs_vmas(args);
	for (i = 0; i < args->nr_vmas; i++) {
		vma = vmas + i;
		ret = sys_mprotect((void *)vma->start, vma->len, vma->prot | args->add_prot);
		if (ret) {
			pr_err("mprotect(%08lx, %lu) failed with code %d\n",
						vma->start, vma->len, ret);
			break;
		}
	}

	return ret;
}
Ejemplo n.º 6
0
int
as_complete_load(struct addrspace *as)
{
	int i;
	struct page *p;
	int num = array_getnum(as->pages);

	/* update permissions on all pages */
	for(i=0;i<num;i++)
	{
		p = (struct page *) array_getguy(as->pages, i);
		sys_mprotect(p->vaddr, PAGE_SIZE,
				(p->perms & P_R_B ? PROT_READ : 0)
			      | (p->perms & P_W_B ? PROT_WRITE : 0)
			      | (p->perms & P_X_B ? PROT_EXEC : 0));
	}

	(void)as;
	return 0;
}
Ejemplo n.º 7
0
int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
		 unsigned long vdso_rt_parked_at, size_t index,
		 VmaEntry *vmas, size_t nr_vmas)
{
	VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
	struct vdso_symtable s = VDSO_SYMTABLE_INIT;
	bool remap_rt = false;

	/*
	 * Figure out which kind of vdso tuple we get.
	 */
	if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
		vma_vdso = &vmas[index];
	else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
		vma_vvar = &vmas[index];

	if (index < (nr_vmas - 1)) {
		if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
			vma_vdso = &vmas[index + 1];
		else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
			vma_vvar = &vmas[index + 1];
	}

	if (!vma_vdso) {
		pr_err("Can't find vDSO area in image\n");
		return -1;
	}

	/*
	 * vDSO mark overwrites Elf program header of proxy vDSO thus
	 * it must never ever be greater in size.
	 */
	BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));

	/*
	 * Find symbols in vDSO zone read from image.
	 */
	if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
		return -1;

	/*
	 * Proxification strategy
	 *
	 *  - There might be two vDSO zones: vdso code and optionally vvar data
	 *  - To be able to use in-place remapping we need
	 *
	 *    a) Size and order of vDSO zones are to match
	 *    b) Symbols offsets must match
	 *    c) Have same number of vDSO zones
	 */
	if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
		size_t i;

		for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
			if (s.symbols[i].offset != sym_rt->symbols[i].offset)
				break;
		}

		if (i == ARRAY_SIZE(s.symbols)) {
			if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
				remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
				if (remap_rt) {
					long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
					long delta_this = vma_vvar->start - vma_vdso->start;

					remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
				}
			} else
				remap_rt = true;
		}
	}

	pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
		 vma_vdso->start, vma_vdso->end,
		 vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
		 vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);

	/*
	 * Easy case -- the vdso from image has same offsets, order and size
	 * as runtime, so we simply remap runtime vdso to dumpee position
	 * without generating any proxy.
	 *
	 * Note we may remap VVAR vdso as well which might not yet been mapped
	 * by a caller code. So drop VMA_AREA_REGULAR from it and caller would
	 * not touch it anymore.
	 */
	if (remap_rt) {
		int ret = 0;

		pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");

		if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
			pr_err("Failed to unmap %s\n", who);
			return -1;
		}

		if (vma_vvar) {
			if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
				pr_err("Failed to unmap %s\n", who);
				return -1;
			}

			if (vma_vdso->start < vma_vvar->start) {
				ret  = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
				vdso_rt_parked_at += vdso_vma_size(sym_rt);
				ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
			} else {
				ret  = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
				vdso_rt_parked_at += vvar_vma_size(sym_rt);
				ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
			}
		} else
			ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));

		return ret;
	}

	/*
	 * Now complex case -- we need to proxify calls. We redirect
	 * calls from dumpee vdso to runtime vdso, making dumpee
	 * to operate as proxy vdso.
	 */
	pr_info("Runtime vdso mismatches dumpee, generate proxy\n");

	/*
	 * Don't forget to shift if vvar is before vdso.
	 */
	if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
	    sym_rt->vvar_start < sym_rt->vma_start)
		vdso_rt_parked_at += vvar_vma_size(sym_rt);

	if (vdso_redirect_calls(vdso_rt_parked_at,
				vma_vdso->start,
				sym_rt, &s)) {
		pr_err("Failed to proxify dumpee contents\n");
		return -1;
	}

	/*
	 * Put a special mark into runtime vdso, thus at next checkpoint
	 * routine we could detect this vdso and do not dump it, since
	 * it's auto-generated every new session if proxy required.
	 */
	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), PROT_WRITE);
	vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), VDSO_PROT);
	return 0;
}
Ejemplo n.º 8
0
int ckpt_restore_vma(char *node, pid_t gpid, ckpt_desc_t desc)
{
    int ret;
    int nr_pages = -1;
    char *path = NULL;
    ckpt_vma_t area;

    if (ckpt_read(desc, &area, sizeof(ckpt_vma_t)) != sizeof(ckpt_vma_t)) {
        log_err("failed to get area");
        return -EIO;
    }

    if (area.arch) {
        void *vdso;
        void *sysenter_return = current_thread_info()->sysenter_return;

        current->mm->context.vdso = (void *)(~0UL);
        ret = arch_setup_additional_pages(NULL, 0);
        if (ret < 0) {
            log_err("failed to setup additional pages");
            return ret;
        }

        vdso = current->mm->context.vdso;
        if ((vdso != (void *)(0UL)) && (vdso != (void *)area.start)) {
            ret = ckpt_vma_remap((unsigned long)vdso, &area);
            if (ret < 0) {
                log_err("failed to remap vma");
                return ret;
            }
        }

        current->mm->context.vdso = (void *)area.start;
        current_thread_info()->sysenter_return = sysenter_return;
    } else {
        int sz = area.sz;

        if (sz) {
            path = (char *)kmalloc(CKPT_PATH_MAX, GFP_KERNEL);
            if (!path) {
                log_err("no memory");
                return -ENOMEM;
            }

            if (ckpt_read(desc, path, sz) != sz) {
                log_err("failed to get path");
                kfree(path);
                return -EIO;
            }
        }

        if (ckpt_is_mapped_area(&area)) {
            unsigned long prot = ckpt_get_vma_prot(&area);
            unsigned long flags = ckpt_get_vma_flags(&area);

            ret = ckpt_map_attach(node, gpid, area.start, area.end - area.start, prot, flags);
            if (ret) {
                log_err("failed to attach");
                return ret;
            }
        } else {
            ret = ckpt_vma_map(path, &area);
            if (ret) {
                log_err("failed to map vma");
                return ret;
            }

            nr_pages = ckpt_restore_pages(&area, desc);
            if (nr_pages < 0) {
                log_err("failed to restore pages");
                return nr_pages;
            }
        }
        sys_mprotect(area.start, area.end - area.start, ckpt_get_vma_prot(&area));
    }
    log_restore_vma(area.start, area.end, path, nr_pages, ckpt_debug_checksum(current->mm, NULL, area.start));
    return 0;
}
Ejemplo n.º 9
0
static void my_mprotect(const char *file, unsigned int segnum,
                        uintptr_t address, size_t size, int prot) {
  if (sys_mprotect((void *) address, size, prot) < 0)
    fail(file, "Failed to mprotect segment hole!  ",
         "segment", segnum, "errno", my_errno);
}
Ejemplo n.º 10
0
int mprotect(unsigned long start, size_t len, unsigned long prot)
{
  return sys_mprotect(start, len, prot);
}
Ejemplo n.º 11
0
/*
 * The main routine to restore task via sigreturn.
 * This one is very special, we never return there
 * but use sigreturn facility to restore core registers
 * and jump execution to some predefined ip read from
 * core file.
 */
long __export_restore_task(struct task_restore_args *args)
{
	long ret = -1;
	int i;
	VmaEntry *vma_entry;
	unsigned long va;

	struct rt_sigframe *rt_sigframe;
	unsigned long new_sp;
	k_rtsigset_t to_block;
	pid_t my_pid = sys_getpid();
	rt_sigaction_t act;

	bootstrap_start = args->bootstrap_start;
	bootstrap_len	= args->bootstrap_len;

#ifdef CONFIG_VDSO
	vdso_rt_size	= args->vdso_rt_size;
#endif

	task_entries = args->task_entries;
	helpers = args->helpers;
	n_helpers = args->n_helpers;
	*args->breakpoint = rst_sigreturn;

	ksigfillset(&act.rt_sa_mask);
	act.rt_sa_handler = sigchld_handler;
	act.rt_sa_flags = SA_SIGINFO | SA_RESTORER | SA_RESTART;
	act.rt_sa_restorer = cr_restore_rt;
	sys_sigaction(SIGCHLD, &act, NULL, sizeof(k_rtsigset_t));

	log_set_fd(args->logfd);
	log_set_loglevel(args->loglevel);

	cap_last_cap = args->cap_last_cap;

	pr_info("Switched to the restorer %d\n", my_pid);

#ifdef CONFIG_VDSO
	if (vdso_do_park(&args->vdso_sym_rt, args->vdso_rt_parked_at, vdso_rt_size))
		goto core_restore_end;
#endif

	if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len,
				bootstrap_start, bootstrap_len))
		goto core_restore_end;

	/* Shift private vma-s to the left */
	for (i = 0; i < args->nr_vmas; i++) {
		vma_entry = args->tgt_vmas + i;

		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
			continue;

		if (!vma_priv(vma_entry))
			continue;

		if (vma_entry->end >= TASK_SIZE)
			continue;

		if (vma_entry->start > vma_entry->shmid)
			break;

		if (vma_remap(vma_premmaped_start(vma_entry),
				vma_entry->start, vma_entry_len(vma_entry)))
			goto core_restore_end;
	}

	/* Shift private vma-s to the right */
	for (i = args->nr_vmas - 1; i >= 0; i--) {
		vma_entry = args->tgt_vmas + i;

		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
			continue;

		if (!vma_priv(vma_entry))
			continue;

		if (vma_entry->start > TASK_SIZE)
			continue;

		if (vma_entry->start < vma_entry->shmid)
			break;

		if (vma_remap(vma_premmaped_start(vma_entry),
				vma_entry->start, vma_entry_len(vma_entry)))
			goto core_restore_end;
	}

	/*
	 * OK, lets try to map new one.
	 */
	for (i = 0; i < args->nr_vmas; i++) {
		vma_entry = args->tgt_vmas + i;

		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
			continue;

		if (vma_priv(vma_entry))
			continue;

		va = restore_mapping(vma_entry);

		if (va != vma_entry->start) {
			pr_err("Can't restore %"PRIx64" mapping with %lx\n", vma_entry->start, va);
			goto core_restore_end;
		}
	}

#ifdef CONFIG_VDSO
	/*
	 * Proxify vDSO.
	 */
	for (i = 0; i < args->nr_vmas; i++) {
		if (vma_entry_is(&args->tgt_vmas[i], VMA_AREA_VDSO) ||
		    vma_entry_is(&args->tgt_vmas[i], VMA_AREA_VVAR)) {
			if (vdso_proxify("dumpee", &args->vdso_sym_rt,
					 args->vdso_rt_parked_at,
					 i, args->tgt_vmas, args->nr_vmas))
				goto core_restore_end;
			break;
		}
	}
#endif

	/*
	 * Walk though all VMAs again to drop PROT_WRITE
	 * if it was not there.
	 */
	for (i = 0; i < args->nr_vmas; i++) {
		vma_entry = args->tgt_vmas + i;

		if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
			continue;

		if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) {
			struct shmem_info *entry;

			entry = find_shmem(args->shmems, args->nr_shmems,
						  vma_entry->shmid);
			if (entry && entry->pid == my_pid &&
			    entry->start == vma_entry->start)
				futex_set_and_wake(&entry->lock, 1);
		}

		if (vma_entry->prot & PROT_WRITE)
			continue;

		sys_mprotect(decode_pointer(vma_entry->start),
			     vma_entry_len(vma_entry),
			     vma_entry->prot);
	}

	/*
	 * Finally restore madivse() bits
	 */
	for (i = 0; i < args->nr_vmas; i++) {
		unsigned long m;

		vma_entry = args->tgt_vmas + i;
		if (!vma_entry->has_madv || !vma_entry->madv)
			continue;

		for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) {
			if (vma_entry->madv & (1ul << m)) {
				ret = sys_madvise(vma_entry->start,
						  vma_entry_len(vma_entry),
						  m);
				if (ret) {
					pr_err("madvise(%"PRIx64", %"PRIu64", %ld) "
					       "failed with %ld\n",
						vma_entry->start,
						vma_entry_len(vma_entry),
						m, ret);
					goto core_restore_end;
				}
			}
		}
	}

	ret = 0;

	/*
	 * Tune up the task fields.
	 */
	ret |= sys_prctl_safe(PR_SET_NAME, (long)args->comm, 0, 0);

	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE,	(long)args->mm.mm_start_code, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE,	(long)args->mm.mm_end_code, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_DATA,	(long)args->mm.mm_start_data, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_DATA,	(long)args->mm.mm_end_data, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_STACK,	(long)args->mm.mm_start_stack, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_BRK,	(long)args->mm.mm_start_brk, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_BRK,		(long)args->mm.mm_brk, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_START,	(long)args->mm.mm_arg_start, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_END,	(long)args->mm.mm_arg_end, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_START,	(long)args->mm.mm_env_start, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_END,	(long)args->mm.mm_env_end, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_AUXV,	(long)args->mm_saved_auxv, args->mm_saved_auxv_size);
	if (ret)
		goto core_restore_end;

	/*
	 * Because of requirements applied from kernel side
	 * we need to restore /proc/pid/exe symlink late,
	 * after old existing VMAs are superseded with
	 * new ones from image file.
	 */
	ret = restore_self_exe_late(args);
	if (ret)
		goto core_restore_end;

	/*
	 * We need to prepare a valid sigframe here, so
	 * after sigreturn the kernel will pick up the
	 * registers from the frame, set them up and
	 * finally pass execution to the new IP.
	 */
	rt_sigframe = (void *)args->t->mem_zone.rt_sigframe;

	if (restore_thread_common(rt_sigframe, args->t))
		goto core_restore_end;

	/*
	 * Threads restoration. This requires some more comments. This
	 * restorer routine and thread restorer routine has the following
	 * memory map, prepared by a caller code.
	 *
	 * | <-- low addresses                                          high addresses --> |
	 * +-------------------------------------------------------+-----------------------+
	 * | this proc body | own stack | rt_sigframe space | thread restore zone   |
	 * +-------------------------------------------------------+-----------------------+
	 *
	 * where each thread restore zone is the following
	 *
	 * | <-- low addresses                                     high addresses --> |
	 * +--------------------------------------------------------------------------+
	 * | thread restore proc | thread1 stack | thread1 rt_sigframe |
	 * +--------------------------------------------------------------------------+
	 */

	if (args->nr_threads > 1) {
		struct thread_restore_args *thread_args = args->thread_args;
		long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND	|
				   CLONE_THREAD | CLONE_SYSVSEM;
		long last_pid_len;
		long parent_tid;
		int i, fd;

		fd = args->fd_last_pid;
		ret = sys_flock(fd, LOCK_EX);
		if (ret) {
			pr_err("Can't lock last_pid %d\n", fd);
			goto core_restore_end;
		}

		for (i = 0; i < args->nr_threads; i++) {
			char last_pid_buf[16], *s;

			/* skip self */
			if (thread_args[i].pid == args->t->pid)
				continue;

			new_sp = restorer_stack(thread_args + i);
			last_pid_len = vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s);
			sys_lseek(fd, 0, SEEK_SET);
			ret = sys_write(fd, s, last_pid_len);
			if (ret < 0) {
				pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf);
				goto core_restore_end;
			}

			/*
			 * To achieve functionality like libc's clone()
			 * we need a pure assembly here, because clone()'ed
			 * thread will run with own stack and we must not
			 * have any additional instructions... oh, dear...
			 */

			RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn);
		}

		ret = sys_flock(fd, LOCK_UN);
		if (ret) {
			pr_err("Can't unlock last_pid %ld\n", ret);
			goto core_restore_end;
		}

	}

	sys_close(args->fd_last_pid);

	restore_rlims(args);

	ret = create_posix_timers(args);
	if (ret < 0) {
		pr_err("Can't restore posix timers %ld\n", ret);
		goto core_restore_end;
	}

	ret = timerfd_arm(args);
	if (ret < 0) {
		pr_err("Can't restore timerfd %ld\n", ret);
		goto core_restore_end;
	}

	pr_info("%ld: Restored\n", sys_getpid());

	futex_set(&zombies_inprogress, args->nr_zombies);

	restore_finish_stage(CR_STATE_RESTORE);

	futex_wait_while_gt(&zombies_inprogress, 0);

	if (wait_helpers(args) < 0)
		goto core_restore_end;

	ksigfillset(&to_block);
	ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
	if (ret) {
		pr_err("Unable to block signals %ld", ret);
		goto core_restore_end;
	}

	sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t));

	ret = restore_signals(args->siginfo, args->siginfo_nr, true);
	if (ret)
		goto core_restore_end;

	ret = restore_signals(args->t->siginfo, args->t->siginfo_nr, false);
	if (ret)
		goto core_restore_end;

	restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);

	rst_tcp_socks_all(args);

	/*
	 * Writing to last-pid is CAP_SYS_ADMIN protected,
	 * turning off TCP repair is CAP_SYS_NED_ADMIN protected,
	 * thus restore* creds _after_ all of the above.
	 */

	ret = restore_creds(&args->creds);
	ret = ret || restore_dumpable_flag(&args->mm);
	ret = ret || restore_pdeath_sig(args->t);

	futex_set_and_wake(&thread_inprogress, args->nr_threads);

	restore_finish_stage(CR_STATE_RESTORE_CREDS);

	if (ret)
		BUG();

	/* Wait until children stop to use args->task_entries */
	futex_wait_while_gt(&thread_inprogress, 1);

	log_set_fd(-1);

	/*
	 * The code that prepared the itimers makes shure the
	 * code below doesn't fail due to bad timing values.
	 */

#define itimer_armed(args, i)				\
		(args->itimers[i].it_interval.tv_sec ||	\
		 args->itimers[i].it_interval.tv_usec)

	if (itimer_armed(args, 0))
		sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL);
	if (itimer_armed(args, 1))
		sys_setitimer(ITIMER_VIRTUAL, &args->itimers[1], NULL);
	if (itimer_armed(args, 2))
		sys_setitimer(ITIMER_PROF, &args->itimers[2], NULL);

	restore_posix_timers(args);

	sys_munmap(args->rst_mem, args->rst_mem_size);

	/*
	 * Sigframe stack.
	 */
	new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;

	/*
	 * Prepare the stack and call for sigreturn,
	 * pure assembly since we don't need any additional
	 * code insns from gcc.
	 */
	rst_sigreturn(new_sp);

core_restore_end:
	futex_abort_and_wake(&task_entries->nr_in_progress);
	pr_err("Restorer fail %ld\n", sys_getpid());
	sys_exit_group(1);
	return -1;
}
Ejemplo n.º 12
0
/*
 * This is the main page dumping routine, it's executed
 * inside a victim process space.
 */
static int dump_pages(struct parasite_dump_pages_args *args)
{
	unsigned long nrpages, pfn, length;
	unsigned long prot_old, prot_new;
	u64 *map, off;
	int ret = -1, fd;

	args->nrpages_dumped = 0;
	args->nrpages_skipped = 0;
	prot_old = prot_new = 0;

	pfn = args->vma_entry.start / PAGE_SIZE;
	nrpages	= (args->vma_entry.end - args->vma_entry.start) / PAGE_SIZE;
	args->nrpages_total = nrpages;
	length = nrpages * sizeof(*map);

	/*
	 * Up to 10M of pagemap will handle 5G mapping.
	 */
	map = brk_alloc(length);
	if (!map) {
		ret = -ENOMEM;
		goto err;
	}

	fd = sys_open("/proc/self/pagemap", O_RDONLY, 0);
	if (fd < 0) {
		sys_write_msg("Can't open self pagemap");
		ret = fd;
		goto err_free;
	}

	off = pfn * sizeof(*map);
	off = sys_lseek(fd, off, SEEK_SET);
	if (off != pfn * sizeof(*map)) {
		sys_write_msg("Can't seek pagemap");
		ret = off;
		goto err_close;
	}

	ret = sys_read(fd, map, length);
	if (ret != length) {
		sys_write_msg("Can't read self pagemap");
		goto err_free;
	}

	sys_close(fd);
	fd = fd_pages;

	/*
	 * Try to change page protection if needed so we would
	 * be able to dump contents.
	 */
	if (!(args->vma_entry.prot & PROT_READ)) {
		prot_old = (unsigned long)args->vma_entry.prot;
		prot_new = prot_old | PROT_READ;
		ret = sys_mprotect((void *)args->vma_entry.start,
				   (unsigned long)vma_entry_len(&args->vma_entry),
				   prot_new);
		if (ret) {
			sys_write_msg("sys_mprotect failed\n");
			goto err_free;
		}
	}

	ret = 0;
	for (pfn = 0; pfn < nrpages; pfn++) {
		size_t vaddr;

		if (should_dump_page(&args->vma_entry, map[pfn])) {
			/*
			 * That's the optimized write of
			 * page_entry structure, see image.h
			 */
			vaddr = (size_t)args->vma_entry.start + pfn * PAGE_SIZE;

			ret = sys_write_safe(fd, &vaddr, sizeof(vaddr));
			if (ret)
				return ret;
			ret = sys_write_safe(fd, (void *)vaddr, PAGE_SIZE);
			if (ret)
				return ret;

			args->nrpages_dumped++;
		} else if (map[pfn] & PME_PRESENT)
			args->nrpages_skipped++;
	}

	/*
	 * Don't left pages readable if they were not.
	 */
	if (prot_old != prot_new) {
		ret = sys_mprotect((void *)args->vma_entry.start,
				   (unsigned long)vma_entry_len(&args->vma_entry),
				   prot_old);
		if (ret) {
			sys_write_msg("PANIC: Ouch! sys_mprotect failed on restore\n");
			goto err_free;
		}
	}

	ret = 0;
err_free:
	brk_free(length);
err:
	return ret;

err_close:
	sys_close(fd);
	goto err_free;
}
Ejemplo n.º 13
0
/*
 * The main routine to restore task via sigreturn.
 * This one is very special, we never return there
 * but use sigreturn facility to restore core registers
 * and jump execution to some predefined ip read from
 * core file.
 */
long __export_restore_task(struct task_restore_core_args *args)
{
	long ret = -1;
	VmaEntry *vma_entry;
	u64 va;
	unsigned long premmapped_end = args->premmapped_addr + args->premmapped_len;

	struct rt_sigframe *rt_sigframe;
	unsigned long new_sp;
	pid_t my_pid = sys_getpid();
	rt_sigaction_t act;

	task_entries = args->task_entries;

	ksigfillset(&act.rt_sa_mask);
	act.rt_sa_handler = sigchld_handler;
	act.rt_sa_flags = SA_SIGINFO | SA_RESTORER | SA_RESTART;
	act.rt_sa_restorer = cr_restore_rt;
	sys_sigaction(SIGCHLD, &act, NULL, sizeof(k_rtsigset_t));

	log_set_fd(args->logfd);
	log_set_loglevel(args->loglevel);

	cap_last_cap = args->cap_last_cap;

	pr_info("Switched to the restorer %d\n", my_pid);

	for (vma_entry = args->self_vmas; vma_entry->start != 0; vma_entry++) {
		unsigned long addr = vma_entry->start;
		unsigned long len;

		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
			continue;

		pr_debug("Examine %"PRIx64"-%"PRIx64"\n", vma_entry->start, vma_entry->end);

		if (addr < args->premmapped_addr) {
			if (vma_entry->end >= args->premmapped_addr)
				len = args->premmapped_addr - addr;
			else
				len = vma_entry->end - vma_entry->start;
			if (sys_munmap((void *) addr, len)) {
				pr_err("munmap fail for %lx - %lx\n", addr, addr + len);
				goto core_restore_end;
			}
		}

		if (vma_entry->end >= TASK_SIZE)
			continue;

		if (vma_entry->end > premmapped_end) {
			if (vma_entry->start < premmapped_end)
				addr = premmapped_end;
			len = vma_entry->end - addr;
			if (sys_munmap((void *) addr, len)) {
				pr_err("munmap fail for %lx - %lx\n", addr, addr + len);
				goto core_restore_end;
			}
		}
	}

	sys_munmap(args->self_vmas,
			((void *)(vma_entry + 1) - ((void *)args->self_vmas)));

	/* Shift private vma-s to the left */
	for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
			continue;

		if (!vma_priv(vma_entry))
			continue;

		if (vma_entry->end >= TASK_SIZE)
			continue;

		if (vma_entry->start > vma_entry->shmid)
			break;

		if (vma_remap(vma_premmaped_start(vma_entry),
				vma_entry->start, vma_entry_len(vma_entry)))
			goto core_restore_end;
	}

	/* Shift private vma-s to the right */
	for (vma_entry = args->tgt_vmas + args->nr_vmas -1;
				vma_entry >= args->tgt_vmas; vma_entry--) {
		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
			continue;

		if (!vma_priv(vma_entry))
			continue;

		if (vma_entry->start > TASK_SIZE)
			continue;

		if (vma_entry->start < vma_entry->shmid)
			break;

		if (vma_remap(vma_premmaped_start(vma_entry),
				vma_entry->start, vma_entry_len(vma_entry)))
			goto core_restore_end;
	}

	/*
	 * OK, lets try to map new one.
	 */
	for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
		if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
			continue;

		if (vma_priv(vma_entry))
			continue;

		va = restore_mapping(vma_entry);

		if (va != vma_entry->start) {
			pr_err("Can't restore %"PRIx64" mapping with %"PRIx64"\n", vma_entry->start, va);
			goto core_restore_end;
		}
	}

	/*
	 * Walk though all VMAs again to drop PROT_WRITE
	 * if it was not there.
	 */
	for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
		if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
			continue;

		if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) {
			struct shmem_info *entry;

			entry = find_shmem(args->shmems,
						  vma_entry->shmid);
			if (entry && entry->pid == my_pid &&
			    entry->start == vma_entry->start)
				futex_set_and_wake(&entry->lock, 1);
		}

		if (vma_entry->prot & PROT_WRITE)
			continue;

		sys_mprotect(decode_pointer(vma_entry->start),
			     vma_entry_len(vma_entry),
			     vma_entry->prot);
	}

	/*
	 * Finally restore madivse() bits
	 */
	for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
		unsigned long i;

		if (!vma_entry->has_madv || !vma_entry->madv)
			continue;
		for (i = 0; i < sizeof(vma_entry->madv) * 8; i++) {
			if (vma_entry->madv & (1ul << i)) {
				ret = sys_madvise(vma_entry->start,
						  vma_entry_len(vma_entry),
						  i);
				if (ret) {
					pr_err("madvise(%"PRIx64", %"PRIu64", %ld) "
					       "failed with %ld\n",
						vma_entry->start,
						vma_entry_len(vma_entry),
						i, ret);
					goto core_restore_end;
				}
			}
		}
	}

	sys_munmap(args->tgt_vmas,
			((void *)(vma_entry + 1) - ((void *)args->tgt_vmas)));

	ret = sys_munmap(args->shmems, SHMEMS_SIZE);
	if (ret < 0) {
		pr_err("Can't unmap shmem %ld\n", ret);
		goto core_restore_end;
	}

	/*
	 * Tune up the task fields.
	 */
	ret |= sys_prctl_safe(PR_SET_NAME, (long)args->comm, 0, 0);

	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE,	(long)args->mm.mm_start_code, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE,	(long)args->mm.mm_end_code, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_DATA,	(long)args->mm.mm_start_data, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_DATA,	(long)args->mm.mm_end_data, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_STACK,	(long)args->mm.mm_start_stack, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_BRK,	(long)args->mm.mm_start_brk, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_BRK,		(long)args->mm.mm_brk, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_START,	(long)args->mm.mm_arg_start, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ARG_END,	(long)args->mm.mm_arg_end, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_START,	(long)args->mm.mm_env_start, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_ENV_END,	(long)args->mm.mm_env_end, 0);
	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_AUXV,	(long)args->mm_saved_auxv, args->mm_saved_auxv_size);
	if (ret)
		goto core_restore_end;

	/*
	 * Because of requirements applied from kernel side
	 * we need to restore /proc/pid/exe symlink late,
	 * after old existing VMAs are superseded with
	 * new ones from image file.
	 */
	ret = restore_self_exe_late(args);
	if (ret)
		goto core_restore_end;

	/*
	 * We need to prepare a valid sigframe here, so
	 * after sigreturn the kernel will pick up the
	 * registers from the frame, set them up and
	 * finally pass execution to the new IP.
	 */
	rt_sigframe = (void *)args->t->mem_zone.rt_sigframe + 8;

	if (restore_thread_common(rt_sigframe, args->t))
		goto core_restore_end;

	/*
	 * Threads restoration. This requires some more comments. This
	 * restorer routine and thread restorer routine has the following
	 * memory map, prepared by a caller code.
	 *
	 * | <-- low addresses                                          high addresses --> |
	 * +-------------------------------------------------------+-----------------------+
	 * | this proc body | own stack | heap | rt_sigframe space | thread restore zone   |
	 * +-------------------------------------------------------+-----------------------+
	 *
	 * where each thread restore zone is the following
	 *
	 * | <-- low addresses                                     high addresses --> |
	 * +--------------------------------------------------------------------------+
	 * | thread restore proc | thread1 stack | thread1 heap | thread1 rt_sigframe |
	 * +--------------------------------------------------------------------------+
	 */

	if (args->nr_threads > 1) {
		struct thread_restore_args *thread_args = args->thread_args;
		long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND	|
				   CLONE_THREAD | CLONE_SYSVSEM;
		long last_pid_len;
		long parent_tid;
		int i, fd;

		fd = sys_open(LAST_PID_PATH, O_RDWR, LAST_PID_PERM);
		if (fd < 0) {
			pr_err("Can't open last_pid %d\n", fd);
			goto core_restore_end;
		}

		ret = sys_flock(fd, LOCK_EX);
		if (ret) {
			pr_err("Can't lock last_pid %d\n", fd);
			goto core_restore_end;
		}

		for (i = 0; i < args->nr_threads; i++) {
			char last_pid_buf[16], *s;

			/* skip self */
			if (thread_args[i].pid == args->t->pid)
				continue;

			mutex_lock(&args->rst_lock);

			new_sp =
				RESTORE_ALIGN_STACK((long)thread_args[i].mem_zone.stack,
						    sizeof(thread_args[i].mem_zone.stack));

			last_pid_len = vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s);
			ret = sys_write(fd, s, last_pid_len);
			if (ret < 0) {
				pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf);
				goto core_restore_end;
			}

			/*
			 * To achieve functionality like libc's clone()
			 * we need a pure assembly here, because clone()'ed
			 * thread will run with own stack and we must not
			 * have any additional instructions... oh, dear...
			 */

			RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn);
		}

		ret = sys_flock(fd, LOCK_UN);
		if (ret) {
			pr_err("Can't unlock last_pid %ld\n", ret);
			goto core_restore_end;
		}

		sys_close(fd);
	}

	restore_rlims(args);

	pr_info("%ld: Restored\n", sys_getpid());

	futex_set(&zombies_inprogress, args->nr_zombies);

	restore_finish_stage(CR_STATE_RESTORE);

	futex_wait_while_gt(&zombies_inprogress, 0);

	sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t));

	ret = restore_signals(args->siginfo, args->siginfo_nr, true);
	if (ret)
		goto core_restore_end;

	ret = restore_signals(args->t->siginfo, args->t->siginfo_nr, false);
	if (ret)
		goto core_restore_end;

	restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);

	if (args->siginfo_size) {
		ret = sys_munmap(args->siginfo, args->siginfo_size);
		if (ret < 0) {
			pr_err("Can't unmap signals %ld\n", ret);
			goto core_restore_failed;
		}
	}

	rst_tcp_socks_all(args->rst_tcp_socks, args->rst_tcp_socks_size);

	/* 
	 * Writing to last-pid is CAP_SYS_ADMIN protected,
	 * turning off TCP repair is CAP_SYS_NED_ADMIN protected,
	 * thus restore* creds _after_ all of the above.
	 */

	ret = restore_creds(&args->creds);

	futex_set_and_wake(&thread_inprogress, args->nr_threads);

	restore_finish_stage(CR_STATE_RESTORE_CREDS);

	if (ret)
		BUG();

	/* Wait until children stop to use args->task_entries */
	futex_wait_while_gt(&thread_inprogress, 1);

	log_set_fd(-1);

	/*
	 * The code that prepared the itimers makes shure the
	 * code below doesn't fail due to bad timing values.
	 */

#define itimer_armed(args, i)				\
		(args->itimers[i].it_interval.tv_sec ||	\
		 args->itimers[i].it_interval.tv_usec)

	if (itimer_armed(args, 0))
		sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL);
	if (itimer_armed(args, 1))
		sys_setitimer(ITIMER_VIRTUAL, &args->itimers[1], NULL);
	if (itimer_armed(args, 2))
		sys_setitimer(ITIMER_PROF, &args->itimers[2], NULL);

	ret = sys_munmap(args->task_entries, TASK_ENTRIES_SIZE);
	if (ret < 0) {
		ret = ((long)__LINE__ << 16) | ((-ret) & 0xffff);
		goto core_restore_failed;
	}

	/*
	 * Sigframe stack.
	 */
	new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;

	/*
	 * Prepare the stack and call for sigreturn,
	 * pure assembly since we don't need any additional
	 * code insns from gcc.
	 */
	ARCH_RT_SIGRETURN(new_sp);

core_restore_end:
	futex_abort_and_wake(&task_entries->nr_in_progress);
	pr_err("Restorer fail %ld\n", sys_getpid());
	sys_exit_group(1);
	return -1;

core_restore_failed:
	ARCH_FAIL_CORE_RESTORE;

	return ret;
}
Ejemplo n.º 14
0
uint32_t elf_load(struct sys_state *sys, void *base, int flags, struct elf_load_info *info)
{
	unsigned aligned_sz, align_pad;
	void *aligned_base;
	char *interp_filename = NULL;
	Elf32_Ehdr *ehdr = base;
	Elf32_Phdr *phdr;
	int i, prot, err;
	int have_loadaddr = 0;
	uint32_t entry;

	if (ehdr->e_version != 1) {
		debug("ELF version invalid\n");
		sys_exit(1);
	}

	if (ehdr->e_machine != EM_MIPS) {
		debug("ELF machine invalid\n");
		sys_exit(1);
	}

	debug("ELF header looks valid\n");

	if (info && !(flags & ELF_INTERP)) {
		info->phent = ehdr->e_phentsize;
		info->phnum = ehdr->e_phnum;
		info->interp_filename[0] = 0;
	}

	entry = ehdr->e_entry;

	for (i = 0; i < ehdr->e_phnum; i++) {
		phdr = base + ehdr->e_phoff + (i * ehdr->e_phentsize);

		debug("Phdr[%d] type 0x%x off 0x%x vaddr 0x%x paddr 0x%x filesz 0x%x "
		      "memsz 0x%x flags 0x%x align 0x%x\n", i,
		      phdr->p_type, phdr->p_offset, phdr->p_vaddr, phdr->p_paddr,
		      phdr->p_filesz, phdr->p_memsz, phdr->p_flags, phdr->p_align);

		if (phdr->p_type == PT_LOAD) {
			prot = 0;

			if (phdr->p_flags & PF_X)
				prot |= PROT_EXEC;
			if (phdr->p_flags & PF_W)
				prot |= PROT_WRITE;
			if (phdr->p_flags & PF_R)
				prot |= PROT_READ;

			align_pad = phdr->p_vaddr & (phdr->p_align - 1);
			aligned_base = sys->mem_base + phdr->p_vaddr - align_pad;
			aligned_sz = phdr->p_memsz + align_pad;

			aligned_sz += 4095;
			aligned_sz &= ~4095;

			err = sys_mprotect(aligned_base, aligned_sz, prot | PROT_WRITE);
			if (err) {
				debug("Failed to mprotect memory region\n");
				sys_exit(1);
			}

			memcpy(sys->mem_base + phdr->p_vaddr, base + phdr->p_offset, phdr->p_filesz);

			if (phdr->p_memsz > phdr->p_filesz)
				memset(sys->mem_base + phdr->p_vaddr + phdr->p_filesz,
				       0, phdr->p_memsz - phdr->p_filesz);

			if (!(prot & PROT_WRITE)) {
				err = sys_mprotect(aligned_base, aligned_sz, prot);
				if (err) {
					debug("Failed to mprotect memory region\n");
					sys_exit(1);
				}
			}

			sys->brk = max(sys->brk, aligned_base + aligned_sz - sys->mem_base);

			if (info && !(flags & ELF_INTERP) && !have_loadaddr) {
				uint32_t load_addr = phdr->p_vaddr - phdr->p_offset;
				have_loadaddr = 1;
				info->phdr_base = load_addr + ehdr->e_phoff;
			}
		}

		if (phdr->p_type == PT_INTERP) {
			interp_filename = base + phdr->p_offset;
			if (info)
				strcpy(info->interp_filename, interp_filename);
		}
	}

	if (!(flags & ELF_INTERP) && interp_filename) {
		debug("Load interpreter \"%s\"\n", interp_filename);
		entry = elf_load_filename(sys, interp_filename, ELF_INTERP, info);
		if (info)
			info->interp_base = entry;
	}

	if (info && !(flags & ELF_INTERP))
		info->entry = entry;

	return entry;
}
Ejemplo n.º 15
0
int
cloudabi_sys_mem_advise(struct thread *td,
    struct cloudabi_sys_mem_advise_args *uap)
{
	struct madvise_args madvise_args = {
		.addr	= uap->addr,
		.len	= uap->len
	};

	switch (uap->advice) {
	case CLOUDABI_ADVICE_DONTNEED:
		madvise_args.behav = MADV_DONTNEED;
		break;
	case CLOUDABI_ADVICE_NORMAL:
		madvise_args.behav = MADV_NORMAL;
		break;
	case CLOUDABI_ADVICE_RANDOM:
		madvise_args.behav = MADV_RANDOM;
		break;
	case CLOUDABI_ADVICE_SEQUENTIAL:
		madvise_args.behav = MADV_SEQUENTIAL;
		break;
	case CLOUDABI_ADVICE_WILLNEED:
		madvise_args.behav = MADV_WILLNEED;
		break;
	default:
		return (EINVAL);
	}

	return (sys_madvise(td, &madvise_args));
}

int
cloudabi_sys_mem_lock(struct thread *td, struct cloudabi_sys_mem_lock_args *uap)
{
	struct mlock_args mlock_args = {
		.addr	= uap->addr,
		.len	= uap->len
	};

	return (sys_mlock(td, &mlock_args));
}

int
cloudabi_sys_mem_map(struct thread *td, struct cloudabi_sys_mem_map_args *uap)
{
	struct mmap_args mmap_args = {
		.addr	= uap->addr,
		.len	= uap->len,
		.fd	= uap->fd,
		.pos	= uap->off
	};
	int error;

	/* Translate flags. */
	if (uap->flags & CLOUDABI_MAP_ANON)
		mmap_args.flags |= MAP_ANON;
	if (uap->flags & CLOUDABI_MAP_FIXED)
		mmap_args.flags |= MAP_FIXED;
	if (uap->flags & CLOUDABI_MAP_PRIVATE)
		mmap_args.flags |= MAP_PRIVATE;
	if (uap->flags & CLOUDABI_MAP_SHARED)
		mmap_args.flags |= MAP_SHARED;

	/* Translate protection. */
	error = convert_mprot(uap->prot, &mmap_args.prot);
	if (error != 0)
		return (error);

	return (sys_mmap(td, &mmap_args));
}

int
cloudabi_sys_mem_protect(struct thread *td,
    struct cloudabi_sys_mem_protect_args *uap)
{
	struct mprotect_args mprotect_args = {
		.addr	= uap->addr,
		.len	= uap->len,
	};
	int error;

	/* Translate protection. */
	error = convert_mprot(uap->prot, &mprotect_args.prot);
	if (error != 0)
		return (error);

	return (sys_mprotect(td, &mprotect_args));
}

int
cloudabi_sys_mem_sync(struct thread *td, struct cloudabi_sys_mem_sync_args *uap)
{
	struct msync_args msync_args = {
		.addr	= uap->addr,
		.len	= uap->len,
	};

	/* Convert flags. */
	switch (uap->flags & (CLOUDABI_MS_ASYNC | CLOUDABI_MS_SYNC)) {
	case CLOUDABI_MS_ASYNC:
		msync_args.flags |= MS_ASYNC;
		break;
	case CLOUDABI_MS_SYNC:
		msync_args.flags |= MS_SYNC;
		break;
	default:
		return (EINVAL);
	}
	if ((uap->flags & CLOUDABI_MS_INVALIDATE) != 0)
		msync_args.flags |= MS_INVALIDATE;

	return (sys_msync(td, &msync_args));
}

int
cloudabi_sys_mem_unlock(struct thread *td,
    struct cloudabi_sys_mem_unlock_args *uap)
{
	struct munlock_args munlock_args = {
		.addr	= uap->addr,
		.len	= uap->len
	};

	return (sys_munlock(td, &munlock_args));
}

int
cloudabi_sys_mem_unmap(struct thread *td,
    struct cloudabi_sys_mem_unmap_args *uap)
{
	struct munmap_args munmap_args = {
		.addr	= uap->addr,
		.len	= uap->len
	};

	return (sys_munmap(td, &munmap_args));
}