示例#1
0
/*
 *  stress_vm_rw
 *	stress vm_read_v/vm_write_v
 */
int stress_vm_rw(
	uint64_t *const counter,
	const uint32_t instance,
	const uint64_t max_ops,
	const char *name)
{
	context_t ctxt;
	uint8_t stack[64*1024];
	const ssize_t stack_offset =
		stress_get_stack_direction(&ctxt) * (STACK_SIZE - 64);
	uint8_t *stack_top = stack + stack_offset;

	(void)instance;

	if (!set_vm_rw_bytes) {
		if (opt_flags & OPT_FLAGS_MAXIMIZE)
			opt_vm_rw_bytes = MAX_VM_RW_BYTES;
		if (opt_flags & OPT_FLAGS_MINIMIZE)
			opt_vm_rw_bytes = MIN_VM_RW_BYTES;
	}
	ctxt.name = name;
	ctxt.page_size = stress_get_pagesize();
	ctxt.sz = opt_vm_rw_bytes & ~(ctxt.page_size - 1);
	ctxt.counter = counter;
	ctxt.max_ops = max_ops;

	if (pipe(ctxt.pipe_wr) < 0) {
		pr_fail_dbg(name, "pipe");
		return EXIT_FAILURE;
	}
	if (pipe(ctxt.pipe_rd) < 0) {
		(void)close(ctxt.pipe_wr[0]);
		(void)close(ctxt.pipe_wr[1]);
		pr_fail_dbg(name, "pipe");
		return EXIT_FAILURE;
	}

	ctxt.pid = clone(stress_vm_child, align_stack(stack_top),
		SIGCHLD | CLONE_VM, &ctxt);
	if (ctxt.pid < 0) {
		(void)close(ctxt.pipe_wr[0]);
		(void)close(ctxt.pipe_wr[1]);
		(void)close(ctxt.pipe_rd[0]);
		(void)close(ctxt.pipe_rd[1]);
		pr_fail_dbg(name, "clone");
		return EXIT_FAILURE;
	}
	return stress_vm_parent(&ctxt);
}
/*
 *  stress_bind_mount()
 *      stress bind mounting
 */
int stress_bind_mount(
        uint64_t *const counter,
        const uint32_t instance,
        const uint64_t max_ops,
        const char *name)
{
	int pid = 0, status;
	context_t context;
	const ssize_t stack_offset =
		stress_get_stack_direction(&pid) *
		(CLONE_STACK_SIZE - 64);
	char stack[CLONE_STACK_SIZE];
	char *stack_top = stack + stack_offset;

	(void)instance;

	context.name = name;
	context.max_ops = max_ops;
	context.counter = counter;

	pid = clone(stress_bind_mount_child,
		stack_top, CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWPID | CLONE_VM, &context, 0);
	if (pid < 0) {
		int rc = exit_status(errno);

		pr_fail(stderr, "%s: clone failed: errno=%d (%s)\n",
			name, errno, strerror(errno));
		return rc;
	}

	do {
		/* Twiddle thumbs */
		sleep(1);
	} while (opt_do_run && (!max_ops || *counter < max_ops));

	(void)kill(pid, SIGKILL);
	(void)waitpid(pid, &status, 0);

	return EXIT_SUCCESS;
}
/*
 *  stress_userfaultfd_oomable()
 *	stress userfaultfd system call, this
 *	is an OOM-able child process that the
 *	parent can restart
 */
static int stress_userfaultfd_oomable(
	const args_t *args,
	const size_t userfaultfd_bytes)
{
	const size_t page_size = args->page_size;
	size_t sz;
	uint8_t *data;
	void *zero_page = NULL;
	int fd = -1, fdinfo = -1, status, rc = EXIT_SUCCESS, count = 0;
	const unsigned int uffdio_copy = 1 << _UFFDIO_COPY;
	const unsigned int uffdio_zeropage = 1 << _UFFDIO_ZEROPAGE;
	pid_t pid;
	struct uffdio_api api;
	struct uffdio_register reg;
	context_t c;
	bool do_poll = true;
	char filename[PATH_MAX];

	/* Child clone stack */
	static uint8_t stack[STACK_SIZE];
	const ssize_t stack_offset =
		stress_get_stack_direction() * (STACK_SIZE - 64);
	uint8_t *stack_top = stack + stack_offset;

	sz = userfaultfd_bytes & ~(page_size - 1);

	if (posix_memalign(&zero_page, page_size, page_size)) {
		pr_err("%s: zero page allocation failed\n", args->name);
		return EXIT_NO_RESOURCE;
	}

	data = mmap(NULL, sz, PROT_READ | PROT_WRITE,
		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	if (data == MAP_FAILED) {
		rc = EXIT_NO_RESOURCE;
		pr_err("%s: mmap failed\n", args->name);
		goto free_zeropage;
	}

	/* Get userfault fd */
	if ((fd = shim_userfaultfd(0)) < 0) {
		if (errno == ENOSYS) {
			pr_inf("%s: stressor will be skipped, "
				"userfaultfd not supported\n",
				args->name);
			rc = EXIT_NOT_IMPLEMENTED;
			goto unmap_data;
		}
		rc = exit_status(errno);
		pr_err("%s: userfaultfd failed, errno = %d (%s)\n",
			args->name, errno, strerror(errno));
		goto unmap_data;
	}

	(void)snprintf(filename, sizeof(filename), "/proc/%d/fdinfo/%d",
		getpid(), fd);
	fdinfo = open(filename, O_RDONLY);

	if (stress_set_nonblock(fd) < 0)
		do_poll = false;

	/* API sanity check */
	(void)memset(&api, 0, sizeof(api));
	api.api = UFFD_API;
	api.features = 0;
	if (ioctl(fd, UFFDIO_API, &api) < 0) {
		pr_err("%s: ioctl UFFDIO_API failed, errno = %d (%s)\n",
			args->name, errno, strerror(errno));
		rc = EXIT_FAILURE;
		goto unmap_data;
	}
	if (api.api != UFFD_API) {
		pr_err("%s: ioctl UFFDIO_API API check failed\n",
			args->name);
		rc = EXIT_FAILURE;
		goto unmap_data;
	}

	/* Register fault handling mode */
	(void)memset(&reg, 0, sizeof(reg));
	reg.range.start = (unsigned long)data;
	reg.range.len = sz;
	reg.mode = UFFDIO_REGISTER_MODE_MISSING;
	if (ioctl(fd, UFFDIO_REGISTER, &reg) < 0) {
		pr_err("%s: ioctl UFFDIO_REGISTER failed, errno = %d (%s)\n",
			args->name, errno, strerror(errno));
		rc = EXIT_FAILURE;
		goto unmap_data;
	}

	/* OK, so do we have copy supported? */
	if ((reg.ioctls & uffdio_copy) != uffdio_copy) {
		pr_err("%s: ioctl UFFDIO_REGISTER did not support _UFFDIO_COPY\n",
			args->name);
		rc = EXIT_FAILURE;
		goto unmap_data;
	}
	/* OK, so do we have zeropage supported? */
	if ((reg.ioctls & uffdio_zeropage) != uffdio_zeropage) {
		pr_err("%s: ioctl UFFDIO_REGISTER did not support _UFFDIO_ZEROPAGE\n",
			args->name);
		rc = EXIT_FAILURE;
		goto unmap_data;
	}

	/* Set up context for child */
	c.args = args;
	c.data = data;
	c.sz = sz;
	c.page_size = page_size;
	c.parent = getpid();

	/*
	 *  We need to clone the child and share the same VM address space
	 *  as parent so we can perform the page fault handling
	 */
	pid = clone(stress_userfaultfd_child, align_stack(stack_top),
		SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_SIGHAND | CLONE_VM, &c);
	if (pid < 0) {
		pr_err("%s: fork failed, errno = %d (%s)\n",
			args->name, errno, strerror(errno));
		goto unreg;
	}

	/* Parent */
	do {
		struct uffd_msg msg;
		ssize_t ret;

		/* check we should break out before we block on the read */
		if (!g_keep_stressing_flag)
			break;

		/*
		 * polled wait exercises userfaultfd_poll
		 * in the kernel, but only works if fd is NONBLOCKing
		 */
		if (do_poll) {
			struct pollfd fds[1];

			(void)memset(fds, 0, sizeof fds);
			fds[0].fd = fd;
			fds[0].events = POLLIN;
			/* wait for 1 second max */

			ret = poll(fds, 1, 1000);
			if (ret == 0)
				continue;	/* timed out, redo the poll */
			if (ret < 0) {
				if (errno == EINTR)
					continue;
				if (errno != ENOMEM) {
					pr_fail_err("poll userfaultfd");
					if (!g_keep_stressing_flag)
						break;
				}
				/*
				 *  poll ran out of free space for internal
				 *  fd tables, so give up and block on the
				 *  read anyway
				 */
				goto do_read;
			}
			/* No data, re-poll */
			if (!(fds[0].revents & POLLIN))
				continue;

			if (LIKELY(fdinfo > -1) &&
			    UNLIKELY(count++ >= COUNT_MAX)) {
				ret = lseek(fdinfo, 0, SEEK_SET);
				if (ret == 0) {
					char buffer[4096];

					ret = read(fdinfo, buffer, sizeof(buffer));
					(void)ret;
				}
				count = 0;
			}
		}

do_read:
		if ((ret = read(fd, &msg, sizeof(msg))) < 0) {
			if (errno == EINTR)
				continue;
			pr_fail_err("read userfaultfd");
			if (!g_keep_stressing_flag)
				break;
			continue;
		}
		/* We only expect a page fault event */
		if (msg.event != UFFD_EVENT_PAGEFAULT) {
			pr_fail_err("userfaultfd msg not pagefault event");
			continue;
		}
		/* We only expect a write fault */
		if (!(msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)) {
			pr_fail_err("userfaultfd msg not write page fault event");
			continue;
		}
		/* Go handle the page fault */
		if (handle_page_fault(args, fd, (uint8_t *)(ptrdiff_t)msg.arg.pagefault.address,
				zero_page, data, data + sz, page_size) < 0)
			break;
		inc_counter(args);
	} while (keep_stressing());

	/* Run it over, zap child */
	(void)kill(pid, SIGKILL);
	if (shim_waitpid(pid, &status, 0) < 0) {
		pr_dbg("%s: waitpid failed, errno = %d (%s)\n",
			args->name, errno, strerror(errno));
	}
unreg:
	if (ioctl(fd, UFFDIO_UNREGISTER, &reg) < 0) {
		pr_err("%s: ioctl UFFDIO_UNREGISTER failed, errno = %d (%s)\n",
			args->name, errno, strerror(errno));
		rc = EXIT_FAILURE;
		goto unmap_data;
	}
unmap_data:
	(void)munmap(data, sz);
free_zeropage:
	free(zero_page);
	if (fdinfo > -1)
		(void)close(fdinfo);
	if (fd > -1)
		(void)close(fd);

	return rc;
}