Пример #1
0
static int scrub_start(int argc, char **argv, int resume)
{
	int fdmnt;
	int prg_fd = -1;
	int fdres = -1;
	int ret;
	pid_t pid;
	int c;
	int i;
	int err = 0;
	int e_uncorrectable = 0;
	int e_correctable = 0;
	int print_raw = 0;
	char *path;
	int do_background = 1;
	int do_wait = 0;
	int do_print = 0;
	int do_quiet = 0;
	int do_record = 1;
	int readonly = 0;
	int do_stats_per_dev = 0;
	int ioprio_class = IOPRIO_CLASS_IDLE;
	int ioprio_classdata = 0;
	int n_start = 0;
	int n_skip = 0;
	int n_resume = 0;
	struct btrfs_ioctl_fs_info_args fi_args;
	struct btrfs_ioctl_dev_info_args *di_args = NULL;
	struct scrub_progress *sp = NULL;
	struct scrub_fs_stat fs_stat;
	struct timeval tv;
	struct sockaddr_un addr = {
		.sun_family = AF_UNIX,
	};
	pthread_t *t_devs = NULL;
	pthread_t t_prog;
	pthread_attr_t t_attr;
	struct scrub_file_record **past_scrubs = NULL;
	struct scrub_file_record *last_scrub = NULL;
	char *datafile = strdup(SCRUB_DATA_FILE);
	char fsid[37];
	char sock_path[BTRFS_PATH_NAME_MAX + 1] = "";
	struct scrub_progress_cycle spc;
	pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER;
	void *terr;
	u64 devid;

	optind = 1;
	while ((c = getopt(argc, argv, "BdqrRc:n:")) != -1) {
		switch (c) {
		case 'B':
			do_background = 0;
			do_wait = 1;
			do_print = 1;
			break;
		case 'd':
			do_stats_per_dev = 1;
			break;
		case 'q':
			do_quiet = 1;
			break;
		case 'r':
			readonly = 1;
			break;
		case 'R':
			print_raw = 1;
			break;
		case 'c':
			ioprio_class = (int)strtol(optarg, NULL, 10);
			break;
		case 'n':
			ioprio_classdata = (int)strtol(optarg, NULL, 10);
			break;
		case '?':
		default:
			usage(resume ? cmd_scrub_resume_usage :
						cmd_scrub_start_usage);
		}
	}

	/* try to catch most error cases before forking */

	if (check_argc_exact(argc - optind, 1)) {
		usage(resume ? cmd_scrub_resume_usage :
					cmd_scrub_start_usage);
	}

	spc.progress = NULL;
	if (do_quiet && do_print)
		do_print = 0;

	if (mkdir_p(datafile)) {
		ERR(!do_quiet, "WARNING: cannot create scrub data "
			       "file, mkdir %s failed: %s. Status recording "
			       "disabled\n", datafile, strerror(errno));
		do_record = 0;
	}
	free(datafile);

	path = argv[optind];

	fdmnt = open_path_or_dev_mnt(path);

	if (fdmnt < 0) {
		ERR(!do_quiet, "ERROR: can't access '%s'\n", path);
		return 12;
	}

	ret = get_fs_info(path, &fi_args, &di_args);
	if (ret) {
		ERR(!do_quiet, "ERROR: getting dev info for scrub failed: "
		    "%s\n", strerror(-ret));
		err = 1;
		goto out;
	}
	if (!fi_args.num_devices) {
		ERR(!do_quiet, "ERROR: no devices found\n");
		err = 1;
		goto out;
	}

	uuid_unparse(fi_args.fsid, fsid);
	fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
	if (fdres < 0 && fdres != -ENOENT) {
		ERR(!do_quiet, "WARNING: failed to open status file: "
		    "%s\n", strerror(-fdres));
	} else if (fdres >= 0) {
		past_scrubs = scrub_read_file(fdres, !do_quiet);
		if (IS_ERR(past_scrubs))
			ERR(!do_quiet, "WARNING: failed to read status file: "
			    "%s\n", strerror(-PTR_ERR(past_scrubs)));
		close(fdres);
	}

	t_devs = malloc(fi_args.num_devices * sizeof(*t_devs));
	sp = calloc(fi_args.num_devices, sizeof(*sp));
	spc.progress = calloc(fi_args.num_devices * 2, sizeof(*spc.progress));

	if (!t_devs || !sp || !spc.progress) {
		ERR(!do_quiet, "ERROR: scrub failed: %s", strerror(errno));
		err = 1;
		goto out;
	}

	ret = pthread_attr_init(&t_attr);
	if (ret) {
		ERR(!do_quiet, "ERROR: pthread_attr_init failed: %s\n",
		    strerror(ret));
		err = 1;
		goto out;
	}

	for (i = 0; i < fi_args.num_devices; ++i) {
		devid = di_args[i].devid;
		ret = pthread_mutex_init(&sp[i].progress_mutex, NULL);
		if (ret) {
			ERR(!do_quiet, "ERROR: pthread_mutex_init failed: "
			    "%s\n", strerror(ret));
			err = 1;
			goto out;
		}
		last_scrub = last_dev_scrub(past_scrubs, devid);
		sp[i].scrub_args.devid = devid;
		sp[i].fd = fdmnt;
		if (resume && last_scrub && (last_scrub->stats.canceled ||
					     !last_scrub->stats.finished)) {
			++n_resume;
			sp[i].scrub_args.start = last_scrub->p.last_physical;
			sp[i].resumed = last_scrub;
		} else if (resume) {
			++n_skip;
			sp[i].skip = 1;
			sp[i].resumed = last_scrub;
			continue;
		} else {
			++n_start;
			sp[i].scrub_args.start = 0ll;
			sp[i].resumed = NULL;
		}
		sp[i].skip = 0;
		sp[i].scrub_args.end = (u64)-1ll;
		sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0;
		sp[i].ioprio_class = ioprio_class;
		sp[i].ioprio_classdata = ioprio_classdata;
	}

	if (!n_start && !n_resume) {
		if (!do_quiet)
			printf("scrub: nothing to resume for %s, fsid %s\n",
			       path, fsid);
		err = 0;
		goto out;
	}

	ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0);
	while (ret != -1) {
		ret = scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, NULL,
					sock_path, sizeof(sock_path));
		/* ignore EOVERFLOW, try using a shorter path for the socket */
		addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
		strncpy(addr.sun_path, sock_path, sizeof(addr.sun_path) - 1);
		ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
		if (ret != -1 || errno != EADDRINUSE)
			break;
		/*
		 * bind failed with EADDRINUSE. so let's see if anyone answers
		 * when we make a call to the socket ...
		 */
		ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
		if (!ret || errno != ECONNREFUSED) {
			/* ... yes, so scrub must be running. error out */
			fprintf(stderr, "ERROR: scrub already running\n");
			close(prg_fd);
			prg_fd = -1;
			goto out;
		}
		/*
		 * ... no, this means someone left us alone with an unused
		 * socket in the file system. remove it and try again.
		 */
		ret = unlink(sock_path);
	}
	if (ret != -1)
		ret = listen(prg_fd, 100);
	if (ret == -1) {
		ERR(!do_quiet, "WARNING: failed to open the progress status "
		    "socket at %s: %s. Progress cannot be queried\n",
		    sock_path[0] ? sock_path : SCRUB_PROGRESS_SOCKET_PATH,
		    strerror(errno));
		if (prg_fd != -1) {
			close(prg_fd);
			prg_fd = -1;
			if (sock_path[0])
				unlink(sock_path);
		}
	}

	if (do_record) {
		/* write all-zero progress file for a start */
		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
					   fi_args.num_devices);
		if (ret) {
			ERR(!do_quiet, "WARNING: failed to write the progress "
			    "status file: %s. Status recording disabled\n",
			    strerror(-ret));
			do_record = 0;
		}
	}

	if (do_background) {
		pid = fork();
		if (pid == -1) {
			ERR(!do_quiet, "ERROR: cannot scrub, fork failed: "
					"%s\n", strerror(errno));
			err = 1;
			goto out;
		}

		if (pid) {
			int stat;
			scrub_handle_sigint_parent();
			if (!do_quiet)
				printf("scrub %s on %s, fsid %s (pid=%d)\n",
				       n_start ? "started" : "resumed",
				       path, fsid, pid);
			if (!do_wait) {
				err = 0;
				goto out;
			}
			ret = wait(&stat);
			if (ret != pid) {
				ERR(!do_quiet, "ERROR: wait failed: (ret=%d) "
				    "%s\n", ret, strerror(errno));
				err = 1;
				goto out;
			}
			if (!WIFEXITED(stat) || WEXITSTATUS(stat)) {
				ERR(!do_quiet, "ERROR: scrub process failed\n");
				err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1;
				goto out;
			}
			err = 0;
			goto out;
		}
	}

	scrub_handle_sigint_child(fdmnt);

	for (i = 0; i < fi_args.num_devices; ++i) {
		if (sp[i].skip) {
			sp[i].scrub_args.progress = sp[i].resumed->p;
			sp[i].stats = sp[i].resumed->stats;
			sp[i].ret = 0;
			sp[i].stats.finished = 1;
			continue;
		}
		devid = di_args[i].devid;
		gettimeofday(&tv, NULL);
		sp[i].stats.t_start = tv.tv_sec;
		ret = pthread_create(&t_devs[i], &t_attr,
					scrub_one_dev, &sp[i]);
		if (ret) {
			if (do_print)
				fprintf(stderr, "ERROR: creating "
					"scrub_one_dev[%llu] thread failed: "
					"%s\n", devid, strerror(ret));
			err = 1;
			goto out;
		}
	}

	spc.fdmnt = fdmnt;
	spc.prg_fd = prg_fd;
	spc.do_record = do_record;
	spc.write_mutex = &spc_write_mutex;
	spc.shared_progress = sp;
	spc.fi = &fi_args;
	ret = pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc);
	if (ret) {
		if (do_print)
			fprintf(stderr, "ERROR: creating progress thread "
				"failed: %s\n", strerror(ret));
		err = 1;
		goto out;
	}

	err = 0;
	for (i = 0; i < fi_args.num_devices; ++i) {
		if (sp[i].skip)
			continue;
		devid = di_args[i].devid;
		ret = pthread_join(t_devs[i], NULL);
		if (ret) {
			if (do_print)
				fprintf(stderr, "ERROR: pthread_join failed "
					"for scrub_one_dev[%llu]: %s\n", devid,
					strerror(ret));
			++err;
			continue;
		}
		if (sp[i].ret && sp[i].ioctl_errno == ENODEV) {
			if (do_print)
				fprintf(stderr, "WARNING: device %lld not "
					"present\n", devid);
			continue;
		}
		if (sp[i].ret && sp[i].ioctl_errno == ECANCELED) {
			++err;
		} else if (sp[i].ret) {
			if (do_print)
				fprintf(stderr, "ERROR: scrubbing %s failed "
					"for device id %lld (%s)\n", path,
					devid, strerror(sp[i].ioctl_errno));
			++err;
			continue;
		}
		if (sp[i].scrub_args.progress.uncorrectable_errors > 0)
			e_uncorrectable++;
		if (sp[i].scrub_args.progress.corrected_errors > 0
		    || sp[i].scrub_args.progress.unverified_errors > 0)
			e_correctable++;
	}

	if (do_print) {
		const char *append = "done";
		if (!do_stats_per_dev)
			init_fs_stat(&fs_stat);
		for (i = 0; i < fi_args.num_devices; ++i) {
			if (do_stats_per_dev) {
				print_scrub_dev(&di_args[i],
						&sp[i].scrub_args.progress,
						print_raw,
						sp[i].ret ? "canceled" : "done",
						&sp[i].stats);
			} else {
				if (sp[i].ret)
					append = "canceled";
				add_to_fs_stat(&sp[i].scrub_args.progress,
						&sp[i].stats, &fs_stat);
			}
		}
		if (!do_stats_per_dev) {
			printf("scrub %s for %s\n", append, fsid);
			print_fs_stat(&fs_stat, print_raw);
		}
	}

	ret = pthread_cancel(t_prog);
	if (!ret)
		ret = pthread_join(t_prog, &terr);

	/* check for errors from the handling of the progress thread */
	if (do_print && ret) {
		fprintf(stderr, "ERROR: progress thread handling failed: %s\n",
			strerror(ret));
	}

	/* check for errors returned from the progress thread itself */
	if (do_print && terr && terr != PTHREAD_CANCELED) {
		fprintf(stderr, "ERROR: recording progress "
			"failed: %s\n", strerror(-PTR_ERR(terr)));
	}

	if (do_record) {
		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
					   fi_args.num_devices);
		if (ret && do_print) {
			fprintf(stderr, "ERROR: failed to record the result: "
				"%s\n", strerror(-ret));
		}
	}

	scrub_handle_sigint_child(-1);

out:
	free_history(past_scrubs);
	free(di_args);
	free(t_devs);
	free(sp);
	free(spc.progress);
	if (prg_fd > -1) {
		close(prg_fd);
		if (sock_path[0])
			unlink(sock_path);
	}
	close(fdmnt);

	if (err)
		return 1;
	if (e_correctable)
		return 7;
	if (e_uncorrectable)
		return 8;
	return 0;
}
Пример #2
0
static int cmd_scrub_status(int argc, char **argv)
{
	char *path;
	struct btrfs_ioctl_fs_info_args fi_args;
	struct btrfs_ioctl_dev_info_args *di_args = NULL;
	struct scrub_file_record **past_scrubs = NULL;
	struct scrub_file_record *last_scrub;
	struct scrub_fs_stat fs_stat;
	struct sockaddr_un addr = {
		.sun_family = AF_UNIX,
	};
	int ret;
	int i;
	int fdmnt;
	int print_raw = 0;
	int do_stats_per_dev = 0;
	int c;
	char fsid[37];
	int fdres = -1;
	int err = 0;

	optind = 1;
	while ((c = getopt(argc, argv, "dR")) != -1) {
		switch (c) {
		case 'd':
			do_stats_per_dev = 1;
			break;
		case 'R':
			print_raw = 1;
			break;
		case '?':
		default:
			usage(cmd_scrub_status_usage);
		}
	}

	if (check_argc_exact(argc - optind, 1))
		usage(cmd_scrub_status_usage);

	path = argv[optind];

	fdmnt = open_path_or_dev_mnt(path);

	if (fdmnt < 0) {
		fprintf(stderr, "ERROR: can't access to '%s'\n", path);
		return 12;
	}

	ret = get_fs_info(path, &fi_args, &di_args);
	if (ret) {
		fprintf(stderr, "ERROR: getting dev info for scrub failed: "
				"%s\n", strerror(-ret));
		err = 1;
		goto out;
	}
	if (!fi_args.num_devices) {
		fprintf(stderr, "ERROR: no devices found\n");
		err = 1;
		goto out;
	}

	uuid_unparse(fi_args.fsid, fsid);

	fdres = socket(AF_UNIX, SOCK_STREAM, 0);
	if (fdres == -1) {
		fprintf(stderr, "ERROR: failed to create socket to "
			"receive progress information: %s\n",
			strerror(errno));
		err = 1;
		goto out;
	}
	scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid,
			NULL, addr.sun_path, sizeof(addr.sun_path));
	/* ignore EOVERFLOW, just use shorter name and hope for the best */
	addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
	ret = connect(fdres, (struct sockaddr *)&addr, sizeof(addr));
	if (ret == -1) {
		close(fdres);
		fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
		if (fdres < 0 && fdres != -ENOENT) {
			fprintf(stderr, "WARNING: failed to open status file: "
				"%s\n", strerror(-fdres));
			err = 1;
			goto out;
		}
	}

	if (fdres >= 0) {
		past_scrubs = scrub_read_file(fdres, 1);
		if (IS_ERR(past_scrubs))
			fprintf(stderr, "WARNING: failed to read status: %s\n",
				strerror(-PTR_ERR(past_scrubs)));
	}

	printf("scrub status for %s\n", fsid);

	if (do_stats_per_dev) {
		for (i = 0; i < fi_args.num_devices; ++i) {
			last_scrub = last_dev_scrub(past_scrubs,
							di_args[i].devid);
			if (!last_scrub) {
				print_scrub_dev(&di_args[i], NULL, print_raw,
						NULL, NULL);
				continue;
			}
			print_scrub_dev(&di_args[i], &last_scrub->p, print_raw,
					last_scrub->stats.finished ?
							"history" : "status",
					&last_scrub->stats);
		}
	} else {
		init_fs_stat(&fs_stat);
		for (i = 0; i < fi_args.num_devices; ++i) {
			last_scrub = last_dev_scrub(past_scrubs,
							di_args[i].devid);
			if (!last_scrub)
				continue;
			add_to_fs_stat(&last_scrub->p, &last_scrub->stats,
					&fs_stat);
		}
		print_fs_stat(&fs_stat, print_raw);
	}

out:
	free_history(past_scrubs);
	free(di_args);
	if (fdres > -1)
		close(fdres);

	return err;
}
Пример #3
0
static int scrub_start(int argc, char **argv, int resume)
{
	int fdmnt;
	int prg_fd = -1;
	int fdres = -1;
	int ret;
	pid_t pid;
	int c;
	int i;
	int err = 0;
	int e_uncorrectable = 0;
	int e_correctable = 0;
	int print_raw = 0;
	char *path;
	int do_background = 1;
	int do_wait = 0;
	int do_print = 0;
	int do_quiet = 0;
	int do_record = 1;
	int readonly = 0;
	int do_stats_per_dev = 0;
	int ioprio_class = IOPRIO_CLASS_IDLE;
	int ioprio_classdata = 0;
	int n_start = 0;
	int n_skip = 0;
	int n_resume = 0;
	struct btrfs_ioctl_fs_info_args fi_args;
	struct btrfs_ioctl_dev_info_args *di_args = NULL;
	struct scrub_progress *sp = NULL;
	struct scrub_fs_stat fs_stat;
	struct timeval tv;
	struct sockaddr_un addr = {
		.sun_family = AF_UNIX,
	};
	pthread_t *t_devs = NULL;
	pthread_t t_prog;
	struct scrub_file_record **past_scrubs = NULL;
	struct scrub_file_record *last_scrub = NULL;
	char *datafile = strdup(SCRUB_DATA_FILE);
	char fsid[BTRFS_UUID_UNPARSED_SIZE];
	char sock_path[PATH_MAX] = "";
	struct scrub_progress_cycle spc;
	pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER;
	void *terr;
	u64 devid;
	DIR *dirstream = NULL;
	int force = 0;
	int nothing_to_resume = 0;

	while ((c = getopt(argc, argv, "BdqrRc:n:f")) != -1) {
		switch (c) {
		case 'B':
			do_background = 0;
			do_wait = 1;
			do_print = 1;
			break;
		case 'd':
			do_stats_per_dev = 1;
			break;
		case 'q':
			do_quiet = 1;
			break;
		case 'r':
			readonly = 1;
			break;
		case 'R':
			print_raw = 1;
			break;
		case 'c':
			ioprio_class = (int)strtol(optarg, NULL, 10);
			break;
		case 'n':
			ioprio_classdata = (int)strtol(optarg, NULL, 10);
			break;
		case 'f':
			force = 1;
			break;
		case '?':
		default:
			usage(resume ? cmd_scrub_resume_usage :
						cmd_scrub_start_usage);
		}
	}

	/* try to catch most error cases before forking */

	if (check_argc_exact(argc - optind, 1)) {
		usage(resume ? cmd_scrub_resume_usage :
					cmd_scrub_start_usage);
	}

	spc.progress = NULL;
	if (do_quiet && do_print)
		do_print = 0;

	if (mkdir_p(datafile)) {
		warning_on(!do_quiet,
    "cannot create scrub data file, mkdir %s failed: %s. Status recording disabled",
			datafile, strerror(errno));
		do_record = 0;
	}
	free(datafile);

	path = argv[optind];

	fdmnt = open_path_or_dev_mnt(path, &dirstream, !do_quiet);
	if (fdmnt < 0)
		return 1;

	ret = get_fs_info(path, &fi_args, &di_args);
	if (ret) {
		error_on(!do_quiet,
			"getting dev info for scrub failed: %s",
			 strerror(-ret));
		err = 1;
		goto out;
	}
	if (!fi_args.num_devices) {
		error_on(!do_quiet, "no devices found");
		err = 1;
		goto out;
	}

	uuid_unparse(fi_args.fsid, fsid);
	fdres = scrub_open_file_r(SCRUB_DATA_FILE, fsid);
	if (fdres < 0 && fdres != -ENOENT) {
		warning_on(!do_quiet, "failed to open status file: %s",
			strerror(-fdres));
	} else if (fdres >= 0) {
		past_scrubs = scrub_read_file(fdres, !do_quiet);
		if (IS_ERR(past_scrubs))
			warning_on(!do_quiet, "failed to read status file: %s",
				strerror(-PTR_ERR(past_scrubs)));
		close(fdres);
	}

	/*
	 * Check for stale information in the status file, ie. if it's
	 * canceled=0, finished=0 but no scrub is running.
	 */
	if (!is_scrub_running_in_kernel(fdmnt, di_args, fi_args.num_devices))
		force = 1;

	/*
	 * check whether any involved device is already busy running a
	 * scrub. This would cause damaged status messages and the state
	 * "aborted" without the explanation that a scrub was already
	 * running. Therefore check it first, prevent it and give some
	 * feedback to the user if scrub is already running.
	 * Note that if scrub is started with a block device as the
	 * parameter, only that particular block device is checked. It
	 * is a normal mode of operation to start scrub on multiple
	 * single devices, there is no reason to prevent this.
	 */
	if (!force && is_scrub_running_on_fs(&fi_args, di_args, past_scrubs)) {
		error_on(!do_quiet,
			"Scrub is already running.\n"
			"To cancel use 'btrfs scrub cancel %s'.\n"
			"To see the status use 'btrfs scrub status [-d] %s'",
			path, path);
		err = 1;
		goto out;
	}

	t_devs = malloc(fi_args.num_devices * sizeof(*t_devs));
	sp = calloc(fi_args.num_devices, sizeof(*sp));
	spc.progress = calloc(fi_args.num_devices * 2, sizeof(*spc.progress));

	if (!t_devs || !sp || !spc.progress) {
		error_on(!do_quiet, "scrub failed: %s", strerror(errno));
		err = 1;
		goto out;
	}

	for (i = 0; i < fi_args.num_devices; ++i) {
		devid = di_args[i].devid;
		ret = pthread_mutex_init(&sp[i].progress_mutex, NULL);
		if (ret) {
			error_on(!do_quiet, "pthread_mutex_init failed: %s",
				strerror(ret));
			err = 1;
			goto out;
		}
		last_scrub = last_dev_scrub(past_scrubs, devid);
		sp[i].scrub_args.devid = devid;
		sp[i].fd = fdmnt;
		if (resume && last_scrub && (last_scrub->stats.canceled ||
					     !last_scrub->stats.finished)) {
			++n_resume;
			sp[i].scrub_args.start = last_scrub->p.last_physical;
			sp[i].resumed = last_scrub;
		} else if (resume) {
			++n_skip;
			sp[i].skip = 1;
			sp[i].resumed = last_scrub;
			continue;
		} else {
			++n_start;
			sp[i].scrub_args.start = 0ll;
			sp[i].resumed = NULL;
		}
		sp[i].skip = 0;
		sp[i].scrub_args.end = (u64)-1ll;
		sp[i].scrub_args.flags = readonly ? BTRFS_SCRUB_READONLY : 0;
		sp[i].ioprio_class = ioprio_class;
		sp[i].ioprio_classdata = ioprio_classdata;
	}

	if (!n_start && !n_resume) {
		if (!do_quiet)
			printf("scrub: nothing to resume for %s, fsid %s\n",
			       path, fsid);
		nothing_to_resume = 1;
		goto out;
	}

	ret = prg_fd = socket(AF_UNIX, SOCK_STREAM, 0);
	while (ret != -1) {
		ret = scrub_datafile(SCRUB_PROGRESS_SOCKET_PATH, fsid, NULL,
					sock_path, sizeof(sock_path));
		/* ignore EOVERFLOW, try using a shorter path for the socket */
		addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
		strncpy(addr.sun_path, sock_path, sizeof(addr.sun_path) - 1);
		ret = bind(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
		if (ret != -1 || errno != EADDRINUSE)
			break;
		/*
		 * bind failed with EADDRINUSE. so let's see if anyone answers
		 * when we make a call to the socket ...
		 */
		ret = connect(prg_fd, (struct sockaddr *)&addr, sizeof(addr));
		if (!ret || errno != ECONNREFUSED) {
			/* ... yes, so scrub must be running. error out */
			error("scrub already running");
			close(prg_fd);
			prg_fd = -1;
			goto out;
		}
		/*
		 * ... no, this means someone left us alone with an unused
		 * socket in the file system. remove it and try again.
		 */
		ret = unlink(sock_path);
	}
	if (ret != -1)
		ret = listen(prg_fd, 100);
	if (ret == -1) {
		warning_on(!do_quiet,
   "failed to open the progress status socket at %s: %s. Progress cannot be queried",
			sock_path[0] ? sock_path :
			SCRUB_PROGRESS_SOCKET_PATH, strerror(errno));
		if (prg_fd != -1) {
			close(prg_fd);
			prg_fd = -1;
			if (sock_path[0])
				unlink(sock_path);
		}
	}

	if (do_record) {
		/* write all-zero progress file for a start */
		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
					   fi_args.num_devices);
		if (ret) {
			warning_on(!do_quiet,
   "failed to write the progress status file: %s. Status recording disabled",
				strerror(-ret));
			do_record = 0;
		}
	}

	if (do_background) {
		pid = fork();
		if (pid == -1) {
			error_on(!do_quiet, "cannot scrub, fork failed: %s",
				strerror(errno));
			err = 1;
			goto out;
		}

		if (pid) {
			int stat;
			scrub_handle_sigint_parent();
			if (!do_quiet)
				printf("scrub %s on %s, fsid %s (pid=%d)\n",
				       n_start ? "started" : "resumed",
				       path, fsid, pid);
			if (!do_wait) {
				err = 0;
				goto out;
			}
			ret = wait(&stat);
			if (ret != pid) {
				error_on(!do_quiet, "wait failed (ret=%d): %s",
					ret, strerror(errno));
				err = 1;
				goto out;
			}
			if (!WIFEXITED(stat) || WEXITSTATUS(stat)) {
				error_on(!do_quiet, "scrub process failed");
				err = WIFEXITED(stat) ? WEXITSTATUS(stat) : -1;
				goto out;
			}
			err = 0;
			goto out;
		}
	}

	scrub_handle_sigint_child(fdmnt);

	for (i = 0; i < fi_args.num_devices; ++i) {
		if (sp[i].skip) {
			sp[i].scrub_args.progress = sp[i].resumed->p;
			sp[i].stats = sp[i].resumed->stats;
			sp[i].ret = 0;
			sp[i].stats.finished = 1;
			continue;
		}
		devid = di_args[i].devid;
		gettimeofday(&tv, NULL);
		sp[i].stats.t_start = tv.tv_sec;
		ret = pthread_create(&t_devs[i], NULL,
					scrub_one_dev, &sp[i]);
		if (ret) {
			if (do_print)
			error("creating scrub_one_dev[%llu] thread failed: %s",
				devid, strerror(ret));
			err = 1;
			goto out;
		}
	}

	spc.fdmnt = fdmnt;
	spc.prg_fd = prg_fd;
	spc.do_record = do_record;
	spc.write_mutex = &spc_write_mutex;
	spc.shared_progress = sp;
	spc.fi = &fi_args;
	ret = pthread_create(&t_prog, NULL, scrub_progress_cycle, &spc);
	if (ret) {
		if (do_print)
			error("creating progress thread failed: %s",
				strerror(ret));
		err = 1;
		goto out;
	}

	err = 0;
	for (i = 0; i < fi_args.num_devices; ++i) {
		if (sp[i].skip)
			continue;
		devid = di_args[i].devid;
		ret = pthread_join(t_devs[i], NULL);
		if (ret) {
			if (do_print)
			  error("pthread_join failed for scrub_one_dev[%llu]: %s",
				devid, strerror(ret));
			++err;
			continue;
		}
		if (sp[i].ret) {
			switch (sp[i].ioctl_errno) {
			case ENODEV:
				if (do_print)
					warning("device %lld not present",
						devid);
				continue;
			case ECANCELED:
				++err;
				break;
			default:
				if (do_print)
		error("scrubbing %s failed for device id %lld: ret=%d, errno=%d (%s)",
					path, devid,
					sp[i].ret, sp[i].ioctl_errno,
					strerror(sp[i].ioctl_errno));
				++err;
				continue;
			}
		}
		if (sp[i].scrub_args.progress.uncorrectable_errors > 0)
			e_uncorrectable++;
		if (sp[i].scrub_args.progress.corrected_errors > 0
		    || sp[i].scrub_args.progress.unverified_errors > 0)
			e_correctable++;
	}

	if (do_print) {
		const char *append = "done";
		if (!do_stats_per_dev)
			init_fs_stat(&fs_stat);
		for (i = 0; i < fi_args.num_devices; ++i) {
			if (do_stats_per_dev) {
				print_scrub_dev(&di_args[i],
						&sp[i].scrub_args.progress,
						print_raw,
						sp[i].ret ? "canceled" : "done",
						&sp[i].stats);
			} else {
				if (sp[i].ret)
					append = "canceled";
				add_to_fs_stat(&sp[i].scrub_args.progress,
						&sp[i].stats, &fs_stat);
			}
		}
		if (!do_stats_per_dev) {
			printf("scrub %s for %s\n", append, fsid);
			print_fs_stat(&fs_stat, print_raw);
		}
	}

	ret = pthread_cancel(t_prog);
	if (!ret)
		ret = pthread_join(t_prog, &terr);

	/* check for errors from the handling of the progress thread */
	if (do_print && ret) {
		error("progress thread handling failed: %s",
			strerror(ret));
	}

	/* check for errors returned from the progress thread itself */
	if (do_print && terr && terr != PTHREAD_CANCELED)
		error("recording progress failed: %s",
			strerror(-PTR_ERR(terr)));

	if (do_record) {
		ret = scrub_write_progress(&spc_write_mutex, fsid, sp,
					   fi_args.num_devices);
		if (ret && do_print)
			error("failed to record the result: %s",
				strerror(-ret));
	}

	scrub_handle_sigint_child(-1);

out:
	free_history(past_scrubs);
	free(di_args);
	free(t_devs);
	free(sp);
	free(spc.progress);
	if (prg_fd > -1) {
		close(prg_fd);
		if (sock_path[0])
			unlink(sock_path);
	}
	close_file_or_dir(fdmnt, dirstream);

	if (err)
		return 1;
	if (nothing_to_resume)
		return 2;
	if (e_uncorrectable) {
		error_on(!do_quiet, "there are uncorrectable errors");
		return 3;
	}
	if (e_correctable)
		warning_on(!do_quiet,
			"errors detected during scrubbing, corrected");

	return 0;
}