示例#1
0
文件: mdmon.c 项目: timsoooooon/mdadm
int main(int argc, char *argv[])
{
	char *container_name = NULL;
	char *devnm = NULL;
	int status = 0;
	int opt;
	int all = 0;
	int takeover = 0;
	int dofork = 1;
	static struct option options[] = {
		{"all", 0, NULL, 'a'},
		{"takeover", 0, NULL, 't'},
		{"help", 0, NULL, 'h'},
		{"offroot", 0, NULL, OffRootOpt},
		{"foreground", 0, NULL, 'F'},
		{NULL, 0, NULL, 0}
	};

	if (in_initrd()) {
		/*
		 * set first char of argv[0] to @. This is used by
		 * systemd to signal that the task was launched from
		 * initrd/initramfs and should be preserved during shutdown
		 */
		argv[0][0] = '@';
	}

	while ((opt = getopt_long(argc, argv, "thaF", options, NULL)) != -1) {
		switch (opt) {
		case 'a':
			container_name = argv[optind-1];
			all = 1;
			break;
		case 't':
			takeover = 1;
			break;
		case 'F':
			dofork = 0;
			break;
		case OffRootOpt:
			argv[0][0] = '@';
			break;
		case 'h':
		default:
			usage();
			break;
		}
	}

	if (all == 0 && container_name == NULL) {
		if (argv[optind])
			container_name = argv[optind];
	}

	if (container_name == NULL)
		usage();

	if (argc - optind > 1)
		usage();

	if (strcmp(container_name, "/proc/mdstat") == 0)
		all = 1;

	if (all) {
		struct mdstat_ent *mdstat, *e;
		int container_len = strlen(container_name);

		/* launch an mdmon instance for each container found */
		mdstat = mdstat_read(0, 0);
		for (e = mdstat; e; e = e->next) {
			if (e->metadata_version &&
			    strncmp(e->metadata_version, "external:", 9) == 0 &&
			    !is_subarray(&e->metadata_version[9])) {
				/* update cmdline so this mdmon instance can be
				 * distinguished from others in a call to ps(1)
				 */
				if (strlen(e->devnm) <= (unsigned)container_len) {
					memset(container_name, 0, container_len);
					sprintf(container_name, "%s", e->devnm);
				}
				status |= mdmon(e->devnm, 1, takeover);
			}
		}
		free_mdstat(mdstat);

		return status;
	} else if (strncmp(container_name, "md", 2) == 0) {
		int id = devnm2devid(container_name);
		if (id)
			devnm = container_name;
	} else {
		struct stat st;

		if (stat(container_name, &st) == 0)
			devnm = xstrdup(stat2devnm(&st));
	}

	if (!devnm) {
		pr_err("%s is not a valid md device name\n",
			container_name);
		exit(1);
	}
	return mdmon(devnm, dofork && do_fork(), takeover);
}
示例#2
0
/**
 * block_monitor - prevent mdmon spare assignment
 * @container - container to block
 * @freeze - flag to additionally freeze sync_action
 *
 * This is used by the reshape code to freeze the container, and the
 * auto-rebuild implementation to atomically move spares.
 * In both cases we need to stop mdmon from assigning spares to replace
 * failed devices as we might have other plans for the spare.
 * For the reshape case we also need to 'freeze' sync_action so that
 * no recovery happens until we have fully prepared for the reshape.
 *
 * We tell mdmon that the array is frozen by marking the 'metadata' name
 * with a leading '-'.  The previously told mdmon "Don't make this array
 * read/write, leave it readonly".  Now it means a more general "Don't
 * reconfigure this array at all".
 * As older versions of mdmon (which might run from initrd) don't understand
 * this, we first check that the running mdmon is new enough.
 */
int block_monitor(char *container, const int freeze)
{
	int devnum = devname2devnum(container);
	struct mdstat_ent *ent, *e, *e2;
	struct mdinfo *sra = NULL;
	char *version = NULL;
	char buf[64];
	int rv = 0;

	if (!mdmon_running(devnum)) {
		/* if mdmon is not active we assume that any instance that is
		 * later started will match the current mdadm version, if this
		 * assumption is violated we may inadvertantly rebuild an array
		 * that was meant for reshape, or start rebuild on a spare that
		 * was to be moved to another container
		 */
		/* pass */;
	} else {
		int ver;

		version = ping_monitor_version(container);
		ver = version ? mdadm_version(version) : -1;
		free(version);
		if (ver < 3002000) {
			fprintf(stderr, Name
				": mdmon instance for %s cannot be disabled\n",
				container);
			return -1;
		}
	}

	ent = mdstat_read(0, 0);
	if (!ent) {
		fprintf(stderr, Name
			": failed to read /proc/mdstat while disabling mdmon\n");
		return -1;
	}

	/* freeze container contents */
	for (e = ent; e; e = e->next) {
		if (!is_container_member(e, container))
			continue;
		sysfs_free(sra);
		sra = sysfs_read(-1, e->devnum, GET_VERSION);
		if (!sra) {
			fprintf(stderr, Name
				": failed to read sysfs for subarray%s\n",
				to_subarray(e, container));
			break;
		}
		/* can't reshape an array that we can't monitor */
		if (sra->text_version[0] == '-')
			break;

		if (freeze && sysfs_freeze_array(sra) < 1)
			break;
		/* flag this array to not be modified by mdmon (close race with
		 * takeover in reshape case and spare reassignment in the
		 * auto-rebuild case)
		 */
		if (block_subarray(sra))
			break;
		ping_monitor(container);

		/* check that we did not race with recovery */
		if ((freeze &&
		     !sysfs_attribute_available(sra, NULL, "sync_action")) ||
		    (freeze &&
		     sysfs_attribute_available(sra, NULL, "sync_action") &&
		     sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 &&
		     strcmp(buf, "frozen\n") == 0))
			/* pass */;
		else {
			unblock_subarray(sra, 0);
			break;
		}
		/* Double check against races - there should be no spares
		 * or part-spares
		 */
		sysfs_free(sra);
		sra = sysfs_read(-1, e->devnum, GET_DEVS | GET_STATE);
		if (sra && sra->array.spare_disks > 0) {
			unblock_subarray(sra, freeze);
			break;
		}
	}

	if (e) {
		fprintf(stderr, Name ": failed to freeze subarray%s\n",
			to_subarray(e, container));

		/* thaw the partially frozen container */
		for (e2 = ent; e2 && e2 != e; e2 = e2->next) {
			if (!is_container_member(e2, container))
				continue;
			sysfs_free(sra);
			sra = sysfs_read(-1, e2->devnum, GET_VERSION);
			if (unblock_subarray(sra, freeze))
				fprintf(stderr, Name ": Failed to unfreeze %s\n", e2->dev);
		}

		ping_monitor(container); /* cleared frozen */
		rv = -1;
	}

	sysfs_free(sra);
	free_mdstat(ent);

	return rv;
}
示例#3
0
int Monitor(mddev_dev_t devlist,
	    char *mailaddr, char *alert_cmd,
	    int period, int daemonise, int scan, int oneshot,
	    int dosyslog, int test, char* pidfile, int increments)
{
	/*
	 * Every few seconds, scan every md device looking for changes
	 * When a change is found, log it, possibly run the alert command,
	 * and possibly send Email
	 *
	 * For each array, we record:
	 *   Update time
	 *   active/working/failed/spare drives
	 *   State of each device.
	 *   %rebuilt if rebuilding
	 *
	 * If the update time changes, check out all the data again
	 * It is possible that we cannot get the state of each device
	 * due to bugs in the md kernel module.
	 * We also read /proc/mdstat to get rebuild percent,
	 * and to get state on all active devices incase of kernel bug.
	 *
	 * Events are:
	 *    Fail
	 *	An active device had Faulty set or Active/Sync removed
	 *    FailSpare
	 *      A spare device had Faulty set
	 *    SpareActive
	 *      An active device had a reverse transition
	 *    RebuildStarted
	 *      percent went from -1 to +ve
	 *    RebuildNN
	 *      percent went from below to not-below NN%
	 *    DeviceDisappeared
	 *      Couldn't access a device which was previously visible
	 *
	 * if we detect an array with active<raid and spare==0
	 * we look at other arrays that have same spare-group
	 * If we find one with active==raid and spare>0,
	 *  and if we can get_disk_info and find a name
	 *  Then we hot-remove and hot-add to the other array
	 *
	 * If devlist is NULL, then we can monitor everything because --scan
	 * was given.  We get an initial list from config file and add anything
	 * that appears in /proc/mdstat
	 */

	struct state {
		char *devname;
		int devnum;	/* to sync with mdstat info */
		long utime;
		int err;
		char *spare_group;
		int active, working, failed, spare, raid;
		int expected_spares;
		int devstate[MaxDisks];
		unsigned devid[MaxDisks];
		int percent;
		struct state *next;
	} *statelist = NULL;
	int finished = 0;
	struct mdstat_ent *mdstat = NULL;
	char *mailfrom = NULL;

	if (!mailaddr) {
		mailaddr = conf_get_mailaddr();
		if (mailaddr && ! scan)
			fprintf(stderr, Name ": Monitor using email address \"%s\" from config file\n",
			       mailaddr);
	}
	mailfrom = conf_get_mailfrom();

	if (!alert_cmd) {
		alert_cmd = conf_get_program();
		if (alert_cmd && ! scan)
			fprintf(stderr, Name ": Monitor using program \"%s\" from config file\n",
			       alert_cmd);
	}
	if (scan && !mailaddr && !alert_cmd) {
		fprintf(stderr, Name ": No mail address or alert command - not monitoring.\n");
		return 1;
	}

	if (daemonise) {
		int pid = fork();
		if (pid > 0) {
			if (!pidfile)
				printf("%d\n", pid);
			else {
				FILE *pid_file;
				pid_file=fopen(pidfile, "w");
				if (!pid_file)
					perror("cannot create pid file");
				else {
					fprintf(pid_file,"%d\n", pid);
					fclose(pid_file);
				}
			}
			return 0;
		}
		if (pid < 0) {
			perror("daemonise");
			return 1;
		}
		close(0);
		open("/dev/null", O_RDWR);
		dup2(0,1);
		dup2(0,2);
		setsid();
	}

	if (devlist == NULL) {
		mddev_ident_t mdlist = conf_get_ident(NULL);
		for (; mdlist; mdlist=mdlist->next) {
			struct state *st;
			if (mdlist->devname == NULL)
				continue;
			if (strcasecmp(mdlist->devname, "<ignore>") == 0)
				continue;
			st = malloc(sizeof *st);
			if (st == NULL)
				continue;
			if (mdlist->devname[0] == '/')
				st->devname = strdup(mdlist->devname);
			else {
				st->devname = malloc(8+strlen(mdlist->devname)+1);
				strcpy(strcpy(st->devname, "/dev/md/"),
				       mdlist->devname);
			}
			st->utime = 0;
			st->next = statelist;
			st->err = 0;
			st->devnum = INT_MAX;
			st->percent = -2;
			st->expected_spares = mdlist->spare_disks;
			if (mdlist->spare_group)
				st->spare_group = strdup(mdlist->spare_group);
			else
				st->spare_group = NULL;
			statelist = st;
		}
	} else {
		mddev_dev_t dv;
		for (dv=devlist ; dv; dv=dv->next) {
			mddev_ident_t mdlist = conf_get_ident(dv->devname);
			struct state *st = malloc(sizeof *st);
			if (st == NULL)
				continue;
			st->devname = strdup(dv->devname);
			st->utime = 0;
			st->next = statelist;
			st->err = 0;
			st->devnum = INT_MAX;
			st->percent = -2;
			st->expected_spares = -1;
			st->spare_group = NULL;
			if (mdlist) {
				st->expected_spares = mdlist->spare_disks;
				if (mdlist->spare_group)
					st->spare_group = strdup(mdlist->spare_group);
			}
			statelist = st;
		}
	}


	while (! finished) {
		int new_found = 0;
		struct state *st;

		if (mdstat)
			free_mdstat(mdstat);
		mdstat = mdstat_read(oneshot?0:1, 0);

		for (st=statelist; st; st=st->next) {
			struct { int state, major, minor; } info[MaxDisks];
			mdu_array_info_t array;
			struct mdstat_ent *mse = NULL, *mse2;
			char *dev = st->devname;
			int fd;
			int i;

			if (test)
				alert("TestMessage", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			fd = open(dev, O_RDONLY);
			if (fd < 0) {
				if (!st->err)
					alert("DeviceDisappeared", dev, NULL,
					      mailaddr, mailfrom, alert_cmd, dosyslog);
/*					fprintf(stderr, Name ": cannot open %s: %s\n",
						dev, strerror(errno));
*/				st->err=1;
				continue;
			}
			fcntl(fd, F_SETFD, FD_CLOEXEC);
			if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
				if (!st->err)
					alert("DeviceDisappeared", dev, NULL,
					      mailaddr, mailfrom, alert_cmd, dosyslog);
/*					fprintf(stderr, Name ": cannot get array info for %s: %s\n",
						dev, strerror(errno));
*/				st->err=1;
				close(fd);
				continue;
			}
			/* It's much easier to list what array levels can't
			 * have a device disappear than all of them that can
			 */
			if (array.level == 0 || array.level == -1) {
				if (!st->err)
					alert("DeviceDisappeared", dev, "Wrong-Level",
					      mailaddr, mailfrom, alert_cmd, dosyslog);
				st->err = 1;
				close(fd);
				continue;
			}
			if (st->devnum == INT_MAX) {
				struct stat stb;
				if (fstat(fd, &stb) == 0 &&
				    (S_IFMT&stb.st_mode)==S_IFBLK) {
					if (major(stb.st_rdev) == MD_MAJOR)
						st->devnum = minor(stb.st_rdev);
					else
						st->devnum = -1- (minor(stb.st_rdev)>>6);
				}
			}

			for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
				if (mse2->devnum == st->devnum) {
					mse2->devnum = INT_MAX; /* flag it as "used" */
					mse = mse2;
				}

			if (array.utime == 0)
				/* external arrays don't update utime */
				array.utime = time(0);

			if (st->utime == array.utime &&
			    st->failed == array.failed_disks &&
			    st->working == array.working_disks &&
			    st->spare == array.spare_disks &&
			    (mse == NULL  || (
				    mse->percent == st->percent
				    ))) {
				close(fd);
				st->err = 0;
				continue;
			}
			if (st->utime == 0 && /* new array */
			    mse &&	/* is in /proc/mdstat */
			    mse->pattern && strchr(mse->pattern, '_') /* degraded */
				)
				alert("DegradedArray", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);

			if (st->utime == 0 && /* new array */
			    st->expected_spares > 0 &&
			    array.spare_disks < st->expected_spares)
				alert("SparesMissing", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			if (mse &&
			    st->percent == -1 &&
			    mse->percent >= 0)
				alert("RebuildStarted", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			if (mse &&
			    st->percent >= 0 &&
			    mse->percent >= 0 &&
			    (mse->percent / increments) > (st->percent / increments)) {
				char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)

				if((mse->percent / increments) == 0)
					snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
				else
					snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);

				alert(percentalert,
				      dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			}

			if (mse &&
			    mse->percent == -1 &&
			    st->percent >= 0) {
				/* Rebuild/sync/whatever just finished.
				 * If there is a number in /mismatch_cnt,
				 * we should report that.
				 */
				struct mdinfo *sra =
				       sysfs_read(-1, st->devnum, GET_MISMATCH);
				if (sra && sra->mismatch_cnt > 0) {
					char cnt[40];
					sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
					alert("RebuildFinished", dev, cnt, mailaddr, mailfrom, alert_cmd, dosyslog);
				} else
					alert("RebuildFinished", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
				if (sra)
					free(sra);
			}

			if (mse)
				st->percent = mse->percent;


			for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
			     i++) {
				mdu_disk_info_t disc;
				disc.number = i;
				if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
					info[i].state = disc.state;
					info[i].major = disc.major;
					info[i].minor = disc.minor;
				} else
					info[i].major = info[i].minor = 0;
			}
			close(fd);

			for (i=0; i<MaxDisks; i++) {
				mdu_disk_info_t disc = {0,0,0,0,0};
				int newstate=0;
				int change;
				char *dv = NULL;
				disc.number = i;
				if (i > array.raid_disks + array.nr_disks) {
					newstate = 0;
					disc.major = disc.minor = 0;
				} else if (info[i].major || info[i].minor) {
					newstate = info[i].state;
					dv = map_dev(info[i].major, info[i].minor, 1);
					disc.state = newstate;
					disc.major = info[i].major;
					disc.minor = info[i].minor;
				} else if (mse &&  mse->pattern && i < (int)strlen(mse->pattern)) {
					switch(mse->pattern[i]) {
					case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
					case '_': newstate = 0; break;
					}
					disc.major = disc.minor = 0;
				}
				if (dv == NULL && st->devid[i])
					dv = map_dev(major(st->devid[i]),
						     minor(st->devid[i]), 1);
				change = newstate ^ st->devstate[i];
				if (st->utime && change && !st->err) {
					if (i < array.raid_disks &&
					    (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
					     ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
					     ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
						)
						alert("Fail", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
					else if (i >= array.raid_disks &&
						 (disc.major || disc.minor) &&
						 st->devid[i] == makedev(disc.major, disc.minor) &&
						 ((newstate&change)&(1<<MD_DISK_FAULTY))
						)
						alert("FailSpare", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
					else if (i < array.raid_disks &&
						 ! (newstate & (1<<MD_DISK_REMOVED)) &&
						 (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
						  ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
						  ((newstate&change)&(1<<MD_DISK_SYNC)))
						)
						alert("SpareActive", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
				}
				st->devstate[i] = newstate;
				st->devid[i] = makedev(disc.major, disc.minor);
			}
			st->active = array.active_disks;
			st->working = array.working_disks;
			st->spare = array.spare_disks;
			st->failed = array.failed_disks;
			st->utime = array.utime;
			st->raid = array.raid_disks;
			st->err = 0;
		}
		/* now check if there are any new devices found in mdstat */
		if (scan) {
			struct mdstat_ent *mse;
			for (mse=mdstat; mse; mse=mse->next)
				if (mse->devnum != INT_MAX &&
				    mse->level &&
				    (strcmp(mse->level, "raid0")!=0 &&
				     strcmp(mse->level, "linear")!=0)
					) {
					struct state *st = malloc(sizeof *st);
					mdu_array_info_t array;
					int fd;
					if (st == NULL)
						continue;
					st->devname = strdup(get_md_name(mse->devnum));
					if ((fd = open(st->devname, O_RDONLY)) < 0 ||
					    ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
						/* no such array */
						if (fd >=0) close(fd);
						put_md_name(st->devname);
						free(st->devname);
						free(st);
						continue;
					}
					close(fd);
					st->utime = 0;
					st->next = statelist;
					st->err = 1;
					st->devnum = mse->devnum;
					st->percent = -2;
					st->spare_group = NULL;
					st->expected_spares = -1;
					statelist = st;
					if (test)
						alert("TestMessage", st->devname, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
					alert("NewArray", st->devname, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
					new_found = 1;
				}
		}
		/* If an array has active < raid && spare == 0 && spare_group != NULL
		 * Look for another array with spare > 0 and active == raid and same spare_group
		 *  if found, choose a device and hotremove/hotadd
		 */
		for (st = statelist; st; st=st->next)
			if (st->active < st->raid &&
			    st->spare == 0 &&
			    st->spare_group != NULL) {
				struct state *st2;
				for (st2=statelist ; st2 ; st2=st2->next)
					if (st2 != st &&
					    st2->spare > 0 &&
					    st2->active == st2->raid &&
					    st2->spare_group != NULL &&
					    strcmp(st->spare_group, st2->spare_group) == 0) {
						/* try to remove and add */
						int fd1 = open(st->devname, O_RDONLY);
						int fd2 = open(st2->devname, O_RDONLY);
						int dev = -1;
						int d;
						if (fd1 < 0 || fd2 < 0) {
							if (fd1>=0) close(fd1);
							if (fd2>=0) close(fd2);
							continue;
						}
						for (d=st2->raid; d < MaxDisks; d++) {
							if (st2->devid[d] > 0 &&
							    st2->devstate[d] == 0) {
								dev = st2->devid[d];
								break;
							}
						}
						if (dev > 0) {
							struct mddev_dev_s devlist;
							char devname[20];
							devlist.next = NULL;
							devlist.used = 0;
							devlist.re_add = 0;
							devlist.writemostly = 0;
							devlist.devname = devname;
							sprintf(devname, "%d:%d", major(dev), minor(dev));

							devlist.disposition = 'r';
							if (Manage_subdevs(st2->devname, fd2, &devlist, -1, 0) == 0) {
								devlist.disposition = 'a';
								if (Manage_subdevs(st->devname, fd1, &devlist, -1, 0) == 0) {
									alert("MoveSpare", st->devname, st2->devname, mailaddr, mailfrom, alert_cmd, dosyslog);
									close(fd1);
									close(fd2);
									break;
								}
								else Manage_subdevs(st2->devname, fd2, &devlist, -1, 0);
							}
						}
						close(fd1);
						close(fd2);
					}
			}
		if (!new_found) {
			if (oneshot)
				break;
			else
				mdstat_wait(period);
		}
		test = 0;
	}
	if (pidfile)
		unlink(pidfile);
	return 0;
}
示例#4
0
int Monitor(struct mddev_dev *devlist,
	    char *mailaddr, char *alert_cmd,
	    struct context *c,
	    int daemonise, int oneshot,
	    int dosyslog, char *pidfile, int increments,
	    int share)
{
	/*
	 * Every few seconds, scan every md device looking for changes
	 * When a change is found, log it, possibly run the alert command,
	 * and possibly send Email
	 *
	 * For each array, we record:
	 *   Update time
	 *   active/working/failed/spare drives
	 *   State of each device.
	 *   %rebuilt if rebuilding
	 *
	 * If the update time changes, check out all the data again
	 * It is possible that we cannot get the state of each device
	 * due to bugs in the md kernel module.
	 * We also read /proc/mdstat to get rebuild percent,
	 * and to get state on all active devices incase of kernel bug.
	 *
	 * Events are:
	 *    Fail
	 *	An active device had Faulty set or Active/Sync removed
	 *    FailSpare
	 *      A spare device had Faulty set
	 *    SpareActive
	 *      An active device had a reverse transition
	 *    RebuildStarted
	 *      percent went from -1 to +ve
	 *    RebuildNN
	 *      percent went from below to not-below NN%
	 *    DeviceDisappeared
	 *      Couldn't access a device which was previously visible
	 *
	 * if we detect an array with active<raid and spare==0
	 * we look at other arrays that have same spare-group
	 * If we find one with active==raid and spare>0,
	 *  and if we can get_disk_info and find a name
	 *  Then we hot-remove and hot-add to the other array
	 *
	 * If devlist is NULL, then we can monitor everything because --scan
	 * was given.  We get an initial list from config file and add anything
	 * that appears in /proc/mdstat
	 */

	struct state *statelist = NULL;
	struct state *st2;
	int finished = 0;
	struct mdstat_ent *mdstat = NULL;
	char *mailfrom = NULL;
	struct alert_info info;

	if (!mailaddr) {
		mailaddr = conf_get_mailaddr();
		if (mailaddr && ! c->scan)
			pr_err("Monitor using email address \"%s\" from config file\n",
			       mailaddr);
	}
	mailfrom = conf_get_mailfrom();

	if (!alert_cmd) {
		alert_cmd = conf_get_program();
		if (alert_cmd && ! c->scan)
			pr_err("Monitor using program \"%s\" from config file\n",
			       alert_cmd);
	}
	if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
		pr_err("No mail address or alert command - not monitoring.\n");
		return 1;
	}
	info.alert_cmd = alert_cmd;
	info.mailaddr = mailaddr;
	info.mailfrom = mailfrom;
	info.dosyslog = dosyslog;

	if (daemonise) {
		int rv = make_daemon(pidfile);
		if (rv >= 0)
			return rv;
	}

	if (share)
		if (check_one_sharer(c->scan))
			return 1;

	if (devlist == NULL) {
		struct mddev_ident *mdlist = conf_get_ident(NULL);
		for (; mdlist; mdlist=mdlist->next) {
			struct state *st;
			if (mdlist->devname == NULL)
				continue;
			if (strcasecmp(mdlist->devname, "<ignore>") == 0)
				continue;
			st = xcalloc(1, sizeof *st);
			if (mdlist->devname[0] == '/')
				st->devname = xstrdup(mdlist->devname);
			else {
				st->devname = xmalloc(8+strlen(mdlist->devname)+1);
				strcpy(strcpy(st->devname, "/dev/md/"),
				       mdlist->devname);
			}
			st->next = statelist;
			st->devnm[0] = 0;
			st->percent = RESYNC_UNKNOWN;
			st->from_config = 1;
			st->expected_spares = mdlist->spare_disks;
			if (mdlist->spare_group)
				st->spare_group = xstrdup(mdlist->spare_group);
			statelist = st;
		}
	} else {
		struct mddev_dev *dv;
		for (dv=devlist ; dv; dv=dv->next) {
			struct mddev_ident *mdlist = conf_get_ident(dv->devname);
			struct state *st = xcalloc(1, sizeof *st);
			st->devname = xstrdup(dv->devname);
			st->next = statelist;
			st->devnm[0] = 0;
			st->percent = RESYNC_UNKNOWN;
			st->expected_spares = -1;
			if (mdlist) {
				st->expected_spares = mdlist->spare_disks;
				if (mdlist->spare_group)
					st->spare_group = xstrdup(mdlist->spare_group);
			}
			statelist = st;
		}
	}

	while (! finished) {
		int new_found = 0;
		struct state *st, **stp;
		int anydegraded = 0;

		if (mdstat)
			free_mdstat(mdstat);
		mdstat = mdstat_read(oneshot?0:1, 0);

		for (st=statelist; st; st=st->next)
			if (check_array(st, mdstat, c->test, &info,
					increments, c->prefer))
				anydegraded = 1;

		/* now check if there are any new devices found in mdstat */
		if (c->scan)
			new_found = add_new_arrays(mdstat, &statelist, c->test,
						   &info);

		/* If an array has active < raid && spare == 0 && spare_group != NULL
		 * Look for another array with spare > 0 and active == raid and same spare_group
		 *  if found, choose a device and hotremove/hotadd
		 */
		if (share && anydegraded)
			try_spare_migration(statelist, &info);
		if (!new_found) {
			if (oneshot)
				break;
			else
				mdstat_wait(c->delay);
		}
		c->test = 0;

		for (stp = &statelist; (st = *stp) != NULL; ) {
			if (st->from_auto && st->err > 5) {
				*stp = st->next;
				free(st->devname);
				free(st->spare_group);
				free(st);
			} else
				stp = &st->next;
		}
	}
	for (st2 = statelist; st2; st2 = statelist) {
		statelist = st2->next;
		free(st2);
	}

	if (pidfile)
		unlink(pidfile);
	return 0;
}
示例#5
0
int Manage_runstop(char *devname, int fd, int runstop, int quiet)
{
	/* Run or stop the array. array must already be configured
	 * required >= 0.90.0
	 * Only print failure messages if quiet == 0;
	 * quiet > 0 means really be quiet
	 * quiet < 0 means we will try again if it fails.
	 */
	mdu_param_t param; /* unused */

	if (runstop == -1 && md_get_version(fd) < 9000) {
		if (ioctl(fd, STOP_MD, 0)) {
			if (quiet == 0) fprintf(stderr,
						Name ": stopping device %s "
						"failed: %s\n",
						devname, strerror(errno));
			return 1;
		}
	}

	if (md_get_version(fd) < 9000) {
		fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
		return 1;
	}
	/*
	if (ioctl(fd, GET_ARRAY_INFO, &array)) {
		fprintf(stderr, Name ": %s does not appear to be active.\n",
			devname);
		return 1;
	}
	*/
	if (runstop>0) {
		if (ioctl(fd, RUN_ARRAY, &param)) {
			fprintf(stderr, Name ": failed to run array %s: %s\n",
				devname, strerror(errno));
			return 1;
		}
		if (quiet <= 0)
			fprintf(stderr, Name ": started %s\n", devname);
	} else if (runstop < 0){
		struct map_ent *map = NULL;
		struct stat stb;
		struct mdinfo *mdi;
		int devnum;
		int err;
		int count;
		/* If this is an mdmon managed array, just write 'inactive'
		 * to the array state and let mdmon clear up.
		 */
		devnum = fd2devnum(fd);
		/* Get EXCL access first.  If this fails, then attempting
		 * to stop is probably a bad idea.
		 */
		close(fd);
		fd = open(devname, O_RDONLY|O_EXCL);
		if (fd < 0 || fd2devnum(fd) != devnum) {
			if (fd >= 0)
				close(fd);
			fprintf(stderr,
				Name ": Cannot get exclusive access to %s:"
				"Perhaps a running "
				"process, mounted filesystem "
				"or active volume group?\n",
				devname);
			return 1;
		}
		mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
		if (mdi &&
		    mdi->array.level > 0 &&
		    is_subarray(mdi->text_version)) {
			int err;
			/* This is mdmon managed. */
			close(fd);

			count = 25;
			while (count &&
			       (err = sysfs_set_str(mdi, NULL,
						    "array_state",
						    "inactive")) < 0
			       && errno == EBUSY) {
				usleep(200000);
				count--;
			}
			if (err && !quiet) {
				fprintf(stderr, Name
					": failed to stop array %s: %s\n",
					devname, strerror(errno));
				return 1;
			}

			/* Give monitor a chance to act */
			ping_monitor(mdi->text_version);

			fd = open_dev_excl(devnum);
			if (fd < 0) {
				fprintf(stderr, Name
					": failed to completely stop %s"
					": Device is busy\n",
					devname);
				return 1;
			}
		} else if (mdi &&
			   mdi->array.major_version == -1 &&
			   mdi->array.minor_version == -2 &&
			   !is_subarray(mdi->text_version)) {
			struct mdstat_ent *mds, *m;
			/* container, possibly mdmon-managed.
			 * Make sure mdmon isn't opening it, which
			 * would interfere with the 'stop'
			 */
			ping_monitor(mdi->sys_name);

			/* now check that there are no existing arrays
			 * which are members of this array
			 */
			mds = mdstat_read(0, 0);
			for (m=mds; m; m=m->next)
				if (m->metadata_version &&
				    strncmp(m->metadata_version, "external:", 9)==0 &&
				    is_subarray(m->metadata_version+9) &&
				    devname2devnum(m->metadata_version+10) == devnum) {
					if (!quiet)
						fprintf(stderr, Name
							": Cannot stop container %s: "
							"member %s still active\n",
							devname, m->dev);
					free_mdstat(mds);
					if (mdi)
						sysfs_free(mdi);
					return 1;
				}
		}

		/* As we have an O_EXCL open, any use of the device
		 * which blocks STOP_ARRAY is probably a transient use,
		 * so it is reasonable to retry for a while - 5 seconds.
		 */
		count = 25; err = 0;
		while (count && fd >= 0
		       && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
		       && errno == EBUSY) {
			usleep(200000);
			count --;
		}
		if (fd >= 0 && err) {
			if (quiet == 0) {
				fprintf(stderr, Name
					": failed to stop array %s: %s\n",
					devname, strerror(errno));
				if (errno == EBUSY)
					fprintf(stderr, "Perhaps a running "
						"process, mounted filesystem "
						"or active volume group?\n");
			}
			if (mdi)
				sysfs_free(mdi);
			return 1;
		}
		/* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
		 * was stopped, so We'll do it here just to be sure.  Drop any
		 * partitions as well...
		 */
		if (fd >= 0)
			ioctl(fd, BLKRRPART, 0);
		if (mdi)
			sysfs_uevent(mdi, "change");

		
		if (devnum != NoMdDev &&
		    (stat("/dev/.udev", &stb) != 0 ||
		     check_env("MDADM_NO_UDEV"))) {
			struct map_ent *mp = map_by_devnum(&map, devnum);
			remove_devices(devnum, mp ? mp->path : NULL);
		}


		if (quiet <= 0)
			fprintf(stderr, Name ": stopped %s\n", devname);
		map_lock(&map);
		map_remove(&map, devnum);
		map_unlock(&map);
	}
	return 0;
}