Пример #1
0
int main(int argc, char *argv[])
{
	struct mddev_ident *array_list =  conf_get_ident(NULL);
	struct context c = { .freeze_reshape = 1 };
	if (!array_list) {
		pr_err("No arrays found in config file\n");
		rv = 1;
	} else
		for (; array_list; array_list = array_list->next) {
			mdu_array_info_t array;
			if (strcasecmp(array_list->devname, "<ignore>") == 0)
				continue;
			mdfd = open_mddev(array_list->devname, 0);
			if (mdfd >= 0 && ioctl(mdfd, GET_ARRAY_INFO, &array) == 0) {
				rv |= Manage_ro(array_list->devname, mdfd, -1); /* make it readwrite */
				continue;
			}
			if (mdfd >= 0)
				close(mdfd);
			rv |= Assemble(array_list->st, array_list->devname,
				       array_list, NULL, &c);
		}
	return rv;
}
Пример #2
0
int Monitor(mddev_dev_t devlist,
            char *mailaddr, char *alert_cmd,
            int period, int daemonise, int scan, int oneshot,
            int dosyslog, char *config, int test, char* pidfile)
{
    /*
     * Every few seconds, scan every md device looking for changes
     * When a change is found, log it, possibly run the alert command,
     * and possibly send Email
     *
     * For each array, we record:
     *   Update time
     *   active/working/failed/spare drives
     *   State of each device.
     *   %rebuilt if rebuilding
     *
     * If the update time changes, check out all the data again
     * It is possible that we cannot get the state of each device
     * due to bugs in the md kernel module.
     * We also read /proc/mdstat to get rebuild percent,
     * and to get state on all active devices incase of kernel bug.
     *
     * Events are:
     *    Fail
     *	An active device had Faulty set or Active/Sync removed
     *    FailSpare
     *      A spare device had Faulty set
     *    SpareActive
     *      An active device had a reverse transition
     *    RebuildStarted
     *      percent went from -1 to +ve
     *    Rebuild20 Rebuild40 Rebuild60 Rebuild80
     *      percent went from below to not-below that number
     *    DeviceDisappeared
     *      Couldn't access a device which was previously visible
     *
     * if we detect an array with active<raid and spare==0
     * we look at other arrays that have same spare-group
     * If we find one with active==raid and spare>0,
     *  and if we can get_disk_info and find a name
     *  Then we hot-remove and hot-add to the other array
     *
     * If devlist is NULL, then we can monitor everything because --scan
     * was given.  We get an initial list from config file and add anything
     * that appears in /proc/mdstat
     */

    struct state {
        char *devname;
        int devnum;	/* to sync with mdstat info */
        long utime;
        int err;
        char *spare_group;
        int active, working, failed, spare, raid;
        int expected_spares;
        int devstate[MD_SB_DISKS];
        int devid[MD_SB_DISKS];
        int percent;
        struct state *next;
    } *statelist = NULL;
    int finished = 0;
    struct mdstat_ent *mdstat = NULL;

    if (!mailaddr) {
        mailaddr = conf_get_mailaddr(config);
        if (mailaddr && ! scan)
            fprintf(stderr, Name ": Monitor using email address \"%s\" from config file\n",
                    mailaddr);
    }
    if (!alert_cmd) {
        alert_cmd = conf_get_program(config);
        if (alert_cmd && ! scan)
            fprintf(stderr, Name ": Monitor using program \"%s\" from config file\n",
                    alert_cmd);
    }
    if (scan && !mailaddr && !alert_cmd) {
        fprintf(stderr, Name ": No mail address or alert command - not monitoring.\n");
        return 1;
    }

    if (daemonise) {
        int pid = fork();
        if (pid > 0) {
            if (!pidfile)
                printf("%d\n", pid);
            else {
                FILE *pid_file;
                pid_file=fopen(pidfile, "w");
                if (!pid_file)
                    perror("cannot create pid file");
                else {
                    fprintf(pid_file,"%d\n", pid);
                    fclose(pid_file);
                }
            }
            return 0;
        }
        if (pid < 0) {
            perror("daemonise");
            return 1;
        }
        close(0);
        open("/dev/null", 3);
        dup2(0,1);
        dup2(0,2);
        setsid();
    }

    if (devlist == NULL) {
        mddev_ident_t mdlist = conf_get_ident(config, NULL);
        for (; mdlist; mdlist=mdlist->next) {
            struct state *st = malloc(sizeof *st);
            if (st == NULL)
                continue;
            st->devname = strdup(mdlist->devname);
            st->utime = 0;
            st->next = statelist;
            st->err = 0;
            st->devnum = MAXINT;
            st->percent = -2;
            st->expected_spares = mdlist->spare_disks;
            if (mdlist->spare_group)
                st->spare_group = strdup(mdlist->spare_group);
            else
                st->spare_group = NULL;
            statelist = st;
        }
    } else {
        mddev_dev_t dv;
        for (dv=devlist ; dv; dv=dv->next) {
            mddev_ident_t mdlist = conf_get_ident(config, dv->devname);
            struct state *st = malloc(sizeof *st);
            if (st == NULL)
                continue;
            st->devname = strdup(dv->devname);
            st->utime = 0;
            st->next = statelist;
            st->err = 0;
            st->devnum = MAXINT;
            st->percent = -2;
            st->expected_spares = -1;
            st->spare_group = NULL;
            if (mdlist) {
                st->expected_spares = mdlist->spare_disks;
                if (mdlist->spare_group)
                    st->spare_group = strdup(mdlist->spare_group);
            }
            statelist = st;
        }
    }


    while (! finished) {
        int new_found = 0;
        struct state *st;

        if (mdstat)
            free_mdstat(mdstat);
        mdstat = mdstat_read(oneshot?0:1, 0);

        for (st=statelist; st; st=st->next) {
            mdu_array_info_t array;
            struct mdstat_ent *mse = NULL, *mse2;
            char *dev = st->devname;
            int fd;
            unsigned int i;

            if (test)
                alert("TestMessage", dev, NULL, mailaddr, alert_cmd, dosyslog);
            fd = open(dev, O_RDONLY);
            if (fd < 0) {
                if (!st->err)
                    alert("DeviceDisappeared", dev, NULL,
                          mailaddr, alert_cmd, dosyslog);
                /*					fprintf(stderr, Name ": cannot open %s: %s\n",
                						dev, strerror(errno));
                */				st->err=1;
                continue;
            }
            if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
                if (!st->err)
                    alert("DeviceDisappeared", dev, NULL,
                          mailaddr, alert_cmd, dosyslog);
                /*					fprintf(stderr, Name ": cannot get array info for %s: %s\n",
                						dev, strerror(errno));
                */				st->err=1;
                close(fd);
                continue;
            }
            if (array.level != 1 && array.level != 5 && array.level != -4 &&
                    array.level != 6 && array.level != 10) {
                if (!st->err)
                    alert("DeviceDisappeared", dev, "Wrong-Level",
                          mailaddr, alert_cmd, dosyslog);
                st->err = 1;
                close(fd);
                continue;
            }
            if (st->devnum == MAXINT) {
                struct stat stb;
                if (fstat(fd, &stb) == 0 &&
                        (S_IFMT&stb.st_mode)==S_IFBLK) {
                    if (major(stb.st_rdev) == MD_MAJOR)
                        st->devnum = minor(stb.st_rdev);
                    else
                        st->devnum = -1- (minor(stb.st_rdev)>>6);
                }
            }

            for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
                if (mse2->devnum == st->devnum) {
                    mse2->devnum = MAXINT; /* flag it as "used" */
                    mse = mse2;
                }

            if (st->utime == array.utime &&
                    st->failed == array.failed_disks &&
                    st->working == array.working_disks &&
                    st->spare == array.spare_disks &&
                    (mse == NULL  || (
                         mse->percent == st->percent
                     ))) {
                close(fd);
                st->err = 0;
                continue;
            }
            if (st->utime == 0 && /* new array */
                    mse &&	/* is in /proc/mdstat */
                    mse->pattern && strchr(mse->pattern, '_') /* degraded */
               )
                alert("DegradedArray", dev, NULL, mailaddr, alert_cmd, dosyslog);

            if (st->utime == 0 && /* new array */
                    st->expected_spares > 0 &&
                    array.spare_disks < st->expected_spares)
                alert("SparesMissing", dev, NULL, mailaddr, alert_cmd, dosyslog);
            if (mse &&
                    st->percent == -1 &&
                    mse->percent >= 0)
                alert("RebuildStarted", dev, NULL, mailaddr, alert_cmd, dosyslog);
            if (mse &&
                    st->percent >= 0 &&
                    mse->percent >= 0 &&
                    (mse->percent / 20) > (st->percent / 20))
                alert(percentalerts[mse->percent/20],
                      dev, NULL, mailaddr, alert_cmd, dosyslog);

            if (mse &&
                    mse->percent == -1 &&
                    st->percent >= 0)
                alert("RebuildFinished", dev, NULL, mailaddr, alert_cmd, dosyslog);

            if (mse)
                st->percent = mse->percent;

            for (i=0; i<MD_SB_DISKS; i++) {
                mdu_disk_info_t disc;
                int newstate=0;
                int change;
                char *dv = NULL;
                disc.number = i;
                if (ioctl(fd, GET_DISK_INFO, &disc)>= 0) {
                    newstate = disc.state;
                    dv = map_dev(disc.major, disc.minor);
                } else if (mse &&  mse->pattern && i < strlen(mse->pattern))
                    switch(mse->pattern[i]) {
                    case 'U':
                        newstate = 6 /* ACTIVE/SYNC */;
                        break;
                    case '_':
                        newstate = 0;
                        break;
                    }
                if (dv == NULL && st->devid[i])
                    dv = map_dev(major(st->devid[i]),
                                 minor(st->devid[i]));
                change = newstate ^ st->devstate[i];
                if (st->utime && change && !st->err) {
                    if (i < (unsigned)array.raid_disks &&
                            (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
                             ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
                             ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
                       )
                        alert("Fail", dev, dv, mailaddr, alert_cmd, dosyslog);
                    else if (i >= (unsigned)array.raid_disks &&
                             (disc.major || disc.minor) &&
                             st->devid[i] == makedev(disc.major, disc.minor) &&
                             ((newstate&change)&(1<<MD_DISK_FAULTY))
                            )
                        alert("FailSpare", dev, dv, mailaddr, alert_cmd, dosyslog);
                    else if (i < (unsigned)array.raid_disks &&
                             (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
                              ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
                              ((newstate&change)&(1<<MD_DISK_SYNC)))
                            )
                        alert("SpareActive", dev, dv, mailaddr, alert_cmd, dosyslog);
                }
                st->devstate[i] = disc.state;
                st->devid[i] = makedev(disc.major, disc.minor);
            }
            close(fd);
            st->active = array.active_disks;
            st->working = array.working_disks;
            st->spare = array.spare_disks;
            st->failed = array.failed_disks;
            st->utime = array.utime;
            st->raid = array.raid_disks;
            st->err = 0;
        }
        /* now check if there are any new devices found in mdstat */
        if (scan) {
            struct mdstat_ent *mse;
            for (mse=mdstat; mse; mse=mse->next)
                if (mse->devnum != MAXINT &&
                        (strcmp(mse->level, "raid1")==0 ||
                         strcmp(mse->level, "raid5")==0 ||
                         strcmp(mse->level, "multipath")==0)
                   ) {
                    struct state *st = malloc(sizeof *st);
                    mdu_array_info_t array;
                    int fd;
                    if (st == NULL)
                        continue;
                    st->devname = strdup(get_md_name(mse->devnum));
                    if ((fd = open(st->devname, O_RDONLY)) < 0 ||
                            ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
                        /* no such array */
                        if (fd >=0) close(fd);
                        free(st->devname);
                        free(st);
                        continue;
                    }
                    close(fd);
                    st->utime = 0;
                    st->next = statelist;
                    st->err = 1;
                    st->devnum = mse->devnum;
                    st->percent = -2;
                    st->spare_group = NULL;
                    st->expected_spares = -1;
                    statelist = st;
                    alert("NewArray", st->devname, NULL, mailaddr, alert_cmd, dosyslog);
                    new_found = 1;
                }
        }
        /* If an array has active < raid && spare == 0 && spare_group != NULL
         * Look for another array with spare > 0 and active == raid and same spare_group
         *  if found, choose a device and hotremove/hotadd
         */
        for (st = statelist; st; st=st->next)
            if (st->active < st->raid &&
                    st->spare == 0 &&
                    st->spare_group != NULL) {
                struct state *st2;
                for (st2=statelist ; st2 ; st2=st2->next)
                    if (st2 != st &&
                            st2->spare > 0 &&
                            st2->active == st2->raid &&
                            st2->spare_group != NULL &&
                            strcmp(st->spare_group, st2->spare_group) == 0) {
                        /* try to remove and add */
                        int fd1 = open(st->devname, O_RDONLY);
                        int fd2 = open(st2->devname, O_RDONLY);
                        int dev = -1;
                        int d;
                        if (fd1 < 0 || fd2 < 0) {
                            if (fd1>=0) close(fd1);
                            if (fd2>=0) close(fd2);
                            continue;
                        }
                        for (d=st2->raid; d<MD_SB_DISKS; d++) {
                            if (st2->devid[d] > 0 &&
                                    st2->devstate[d] == 0) {
                                dev = st2->devid[d];
                                break;
                            }
                        }
                        if (dev > 0) {
                            if (ioctl(fd2, HOT_REMOVE_DISK,
                                      (unsigned long)dev) == 0) {
                                if (ioctl(fd1, HOT_ADD_DISK,
                                          (unsigned long)dev) == 0) {
                                    alert("MoveSpare", st->devname, st2->devname, mailaddr, alert_cmd, dosyslog);
                                    close(fd1);
                                    close(fd2);
                                    break;
                                }
                                else ioctl(fd2, HOT_ADD_DISK, (unsigned long) dev);
                            }
                        }
                        close(fd1);
                        close(fd2);
                    }
            }
        if (!new_found) {
            if (oneshot)
                break;
            else
                mdstat_wait(period);
        }
        test = 0;
    }
    if (pidfile)
        unlink(pidfile);
    return 0;
}
Пример #3
0
int Monitor(struct mddev_dev *devlist,
	    char *mailaddr, char *alert_cmd,
	    struct context *c,
	    int daemonise, int oneshot,
	    int dosyslog, char *pidfile, int increments,
	    int share)
{
	/*
	 * Every few seconds, scan every md device looking for changes
	 * When a change is found, log it, possibly run the alert command,
	 * and possibly send Email
	 *
	 * For each array, we record:
	 *   Update time
	 *   active/working/failed/spare drives
	 *   State of each device.
	 *   %rebuilt if rebuilding
	 *
	 * If the update time changes, check out all the data again
	 * It is possible that we cannot get the state of each device
	 * due to bugs in the md kernel module.
	 * We also read /proc/mdstat to get rebuild percent,
	 * and to get state on all active devices incase of kernel bug.
	 *
	 * Events are:
	 *    Fail
	 *	An active device had Faulty set or Active/Sync removed
	 *    FailSpare
	 *      A spare device had Faulty set
	 *    SpareActive
	 *      An active device had a reverse transition
	 *    RebuildStarted
	 *      percent went from -1 to +ve
	 *    RebuildNN
	 *      percent went from below to not-below NN%
	 *    DeviceDisappeared
	 *      Couldn't access a device which was previously visible
	 *
	 * if we detect an array with active<raid and spare==0
	 * we look at other arrays that have same spare-group
	 * If we find one with active==raid and spare>0,
	 *  and if we can get_disk_info and find a name
	 *  Then we hot-remove and hot-add to the other array
	 *
	 * If devlist is NULL, then we can monitor everything because --scan
	 * was given.  We get an initial list from config file and add anything
	 * that appears in /proc/mdstat
	 */

	struct state *statelist = NULL;
	struct state *st2;
	int finished = 0;
	struct mdstat_ent *mdstat = NULL;
	char *mailfrom = NULL;
	struct alert_info info;

	if (!mailaddr) {
		mailaddr = conf_get_mailaddr();
		if (mailaddr && ! c->scan)
			pr_err("Monitor using email address \"%s\" from config file\n",
			       mailaddr);
	}
	mailfrom = conf_get_mailfrom();

	if (!alert_cmd) {
		alert_cmd = conf_get_program();
		if (alert_cmd && ! c->scan)
			pr_err("Monitor using program \"%s\" from config file\n",
			       alert_cmd);
	}
	if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
		pr_err("No mail address or alert command - not monitoring.\n");
		return 1;
	}
	info.alert_cmd = alert_cmd;
	info.mailaddr = mailaddr;
	info.mailfrom = mailfrom;
	info.dosyslog = dosyslog;

	if (daemonise) {
		int rv = make_daemon(pidfile);
		if (rv >= 0)
			return rv;
	}

	if (share)
		if (check_one_sharer(c->scan))
			return 1;

	if (devlist == NULL) {
		struct mddev_ident *mdlist = conf_get_ident(NULL);
		for (; mdlist; mdlist=mdlist->next) {
			struct state *st;
			if (mdlist->devname == NULL)
				continue;
			if (strcasecmp(mdlist->devname, "<ignore>") == 0)
				continue;
			st = xcalloc(1, sizeof *st);
			if (mdlist->devname[0] == '/')
				st->devname = xstrdup(mdlist->devname);
			else {
				st->devname = xmalloc(8+strlen(mdlist->devname)+1);
				strcpy(strcpy(st->devname, "/dev/md/"),
				       mdlist->devname);
			}
			st->next = statelist;
			st->devnm[0] = 0;
			st->percent = RESYNC_UNKNOWN;
			st->from_config = 1;
			st->expected_spares = mdlist->spare_disks;
			if (mdlist->spare_group)
				st->spare_group = xstrdup(mdlist->spare_group);
			statelist = st;
		}
	} else {
		struct mddev_dev *dv;
		for (dv=devlist ; dv; dv=dv->next) {
			struct mddev_ident *mdlist = conf_get_ident(dv->devname);
			struct state *st = xcalloc(1, sizeof *st);
			st->devname = xstrdup(dv->devname);
			st->next = statelist;
			st->devnm[0] = 0;
			st->percent = RESYNC_UNKNOWN;
			st->expected_spares = -1;
			if (mdlist) {
				st->expected_spares = mdlist->spare_disks;
				if (mdlist->spare_group)
					st->spare_group = xstrdup(mdlist->spare_group);
			}
			statelist = st;
		}
	}

	while (! finished) {
		int new_found = 0;
		struct state *st, **stp;
		int anydegraded = 0;

		if (mdstat)
			free_mdstat(mdstat);
		mdstat = mdstat_read(oneshot?0:1, 0);

		for (st=statelist; st; st=st->next)
			if (check_array(st, mdstat, c->test, &info,
					increments, c->prefer))
				anydegraded = 1;

		/* now check if there are any new devices found in mdstat */
		if (c->scan)
			new_found = add_new_arrays(mdstat, &statelist, c->test,
						   &info);

		/* If an array has active < raid && spare == 0 && spare_group != NULL
		 * Look for another array with spare > 0 and active == raid and same spare_group
		 *  if found, choose a device and hotremove/hotadd
		 */
		if (share && anydegraded)
			try_spare_migration(statelist, &info);
		if (!new_found) {
			if (oneshot)
				break;
			else
				mdstat_wait(c->delay);
		}
		c->test = 0;

		for (stp = &statelist; (st = *stp) != NULL; ) {
			if (st->from_auto && st->err > 5) {
				*stp = st->next;
				free(st->devname);
				free(st->spare_group);
				free(st);
			} else
				stp = &st->next;
		}
	}
	for (st2 = statelist; st2; st2 = statelist) {
		statelist = st2->next;
		free(st2);
	}

	if (pidfile)
		unlink(pidfile);
	return 0;
}
Пример #4
0
struct mddev_ident *conf_match(struct supertype *st,
			       struct mdinfo *info,
			       char *devname,
			       int verbose, int *rvp)
{
	struct mddev_ident *array_list, *match;
	array_list = conf_get_ident(NULL);
	match = NULL;
	for (; array_list; array_list = array_list->next) {
		if (array_list->uuid_set &&
		    same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
		    == 0) {
			if (verbose >= 2 && array_list->devname)
				pr_err("UUID differs from %s.\n",
				       array_list->devname);
			continue;
		}
		if (array_list->name[0] &&
		    strcasecmp(array_list->name, info->name) != 0) {
			if (verbose >= 2 && array_list->devname)
				pr_err("Name differs from %s.\n",
				       array_list->devname);
			continue;
		}
		if (array_list->devices && devname &&
		    !match_oneof(array_list->devices, devname)) {
			if (verbose >= 2 && array_list->devname)
				pr_err("Not a listed device for %s.\n",
				       array_list->devname);
			continue;
		}
		if (array_list->super_minor != UnSet &&
		    array_list->super_minor != info->array.md_minor) {
			if (verbose >= 2 && array_list->devname)
				pr_err("Different super-minor to %s.\n",
				       array_list->devname);
			continue;
		}
		if (!array_list->uuid_set &&
		    !array_list->name[0] &&
		    !array_list->devices &&
		    array_list->super_minor == UnSet) {
			if (verbose >= 2 && array_list->devname)
				pr_err("%s doesn't have any identifying"
				       " information.\n",
				       array_list->devname);
			continue;
		}
		/* FIXME, should I check raid_disks and level too?? */

		if (match) {
			if (verbose >= 0) {
				if (match->devname && array_list->devname)
					pr_err("we match both %s and %s - "
					       "cannot decide which to use.\n",
					       match->devname,
					       array_list->devname);
				else
					pr_err("multiple lines in mdadm.conf"
					       " match\n");
			}
			if (rvp)
				*rvp = 2;
			match = NULL;
			break;
		}
		match = array_list;
	}
	return match;
}
Пример #5
0
int Monitor(mddev_dev_t devlist,
	    char *mailaddr, char *alert_cmd,
	    int period, int daemonise, int scan, int oneshot,
	    int dosyslog, int test, char* pidfile, int increments)
{
	/*
	 * Every few seconds, scan every md device looking for changes
	 * When a change is found, log it, possibly run the alert command,
	 * and possibly send Email
	 *
	 * For each array, we record:
	 *   Update time
	 *   active/working/failed/spare drives
	 *   State of each device.
	 *   %rebuilt if rebuilding
	 *
	 * If the update time changes, check out all the data again
	 * It is possible that we cannot get the state of each device
	 * due to bugs in the md kernel module.
	 * We also read /proc/mdstat to get rebuild percent,
	 * and to get state on all active devices incase of kernel bug.
	 *
	 * Events are:
	 *    Fail
	 *	An active device had Faulty set or Active/Sync removed
	 *    FailSpare
	 *      A spare device had Faulty set
	 *    SpareActive
	 *      An active device had a reverse transition
	 *    RebuildStarted
	 *      percent went from -1 to +ve
	 *    RebuildNN
	 *      percent went from below to not-below NN%
	 *    DeviceDisappeared
	 *      Couldn't access a device which was previously visible
	 *
	 * if we detect an array with active<raid and spare==0
	 * we look at other arrays that have same spare-group
	 * If we find one with active==raid and spare>0,
	 *  and if we can get_disk_info and find a name
	 *  Then we hot-remove and hot-add to the other array
	 *
	 * If devlist is NULL, then we can monitor everything because --scan
	 * was given.  We get an initial list from config file and add anything
	 * that appears in /proc/mdstat
	 */

	struct state {
		char *devname;
		int devnum;	/* to sync with mdstat info */
		long utime;
		int err;
		char *spare_group;
		int active, working, failed, spare, raid;
		int expected_spares;
		int devstate[MaxDisks];
		unsigned devid[MaxDisks];
		int percent;
		struct state *next;
	} *statelist = NULL;
	int finished = 0;
	struct mdstat_ent *mdstat = NULL;
	char *mailfrom = NULL;

	if (!mailaddr) {
		mailaddr = conf_get_mailaddr();
		if (mailaddr && ! scan)
			fprintf(stderr, Name ": Monitor using email address \"%s\" from config file\n",
			       mailaddr);
	}
	mailfrom = conf_get_mailfrom();

	if (!alert_cmd) {
		alert_cmd = conf_get_program();
		if (alert_cmd && ! scan)
			fprintf(stderr, Name ": Monitor using program \"%s\" from config file\n",
			       alert_cmd);
	}
	if (scan && !mailaddr && !alert_cmd) {
		fprintf(stderr, Name ": No mail address or alert command - not monitoring.\n");
		return 1;
	}

	if (daemonise) {
		int pid = fork();
		if (pid > 0) {
			if (!pidfile)
				printf("%d\n", pid);
			else {
				FILE *pid_file;
				pid_file=fopen(pidfile, "w");
				if (!pid_file)
					perror("cannot create pid file");
				else {
					fprintf(pid_file,"%d\n", pid);
					fclose(pid_file);
				}
			}
			return 0;
		}
		if (pid < 0) {
			perror("daemonise");
			return 1;
		}
		close(0);
		open("/dev/null", O_RDWR);
		dup2(0,1);
		dup2(0,2);
		setsid();
	}

	if (devlist == NULL) {
		mddev_ident_t mdlist = conf_get_ident(NULL);
		for (; mdlist; mdlist=mdlist->next) {
			struct state *st;
			if (mdlist->devname == NULL)
				continue;
			if (strcasecmp(mdlist->devname, "<ignore>") == 0)
				continue;
			st = malloc(sizeof *st);
			if (st == NULL)
				continue;
			if (mdlist->devname[0] == '/')
				st->devname = strdup(mdlist->devname);
			else {
				st->devname = malloc(8+strlen(mdlist->devname)+1);
				strcpy(strcpy(st->devname, "/dev/md/"),
				       mdlist->devname);
			}
			st->utime = 0;
			st->next = statelist;
			st->err = 0;
			st->devnum = INT_MAX;
			st->percent = -2;
			st->expected_spares = mdlist->spare_disks;
			if (mdlist->spare_group)
				st->spare_group = strdup(mdlist->spare_group);
			else
				st->spare_group = NULL;
			statelist = st;
		}
	} else {
		mddev_dev_t dv;
		for (dv=devlist ; dv; dv=dv->next) {
			mddev_ident_t mdlist = conf_get_ident(dv->devname);
			struct state *st = malloc(sizeof *st);
			if (st == NULL)
				continue;
			st->devname = strdup(dv->devname);
			st->utime = 0;
			st->next = statelist;
			st->err = 0;
			st->devnum = INT_MAX;
			st->percent = -2;
			st->expected_spares = -1;
			st->spare_group = NULL;
			if (mdlist) {
				st->expected_spares = mdlist->spare_disks;
				if (mdlist->spare_group)
					st->spare_group = strdup(mdlist->spare_group);
			}
			statelist = st;
		}
	}


	while (! finished) {
		int new_found = 0;
		struct state *st;

		if (mdstat)
			free_mdstat(mdstat);
		mdstat = mdstat_read(oneshot?0:1, 0);

		for (st=statelist; st; st=st->next) {
			struct { int state, major, minor; } info[MaxDisks];
			mdu_array_info_t array;
			struct mdstat_ent *mse = NULL, *mse2;
			char *dev = st->devname;
			int fd;
			int i;

			if (test)
				alert("TestMessage", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			fd = open(dev, O_RDONLY);
			if (fd < 0) {
				if (!st->err)
					alert("DeviceDisappeared", dev, NULL,
					      mailaddr, mailfrom, alert_cmd, dosyslog);
/*					fprintf(stderr, Name ": cannot open %s: %s\n",
						dev, strerror(errno));
*/				st->err=1;
				continue;
			}
			fcntl(fd, F_SETFD, FD_CLOEXEC);
			if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
				if (!st->err)
					alert("DeviceDisappeared", dev, NULL,
					      mailaddr, mailfrom, alert_cmd, dosyslog);
/*					fprintf(stderr, Name ": cannot get array info for %s: %s\n",
						dev, strerror(errno));
*/				st->err=1;
				close(fd);
				continue;
			}
			/* It's much easier to list what array levels can't
			 * have a device disappear than all of them that can
			 */
			if (array.level == 0 || array.level == -1) {
				if (!st->err)
					alert("DeviceDisappeared", dev, "Wrong-Level",
					      mailaddr, mailfrom, alert_cmd, dosyslog);
				st->err = 1;
				close(fd);
				continue;
			}
			if (st->devnum == INT_MAX) {
				struct stat stb;
				if (fstat(fd, &stb) == 0 &&
				    (S_IFMT&stb.st_mode)==S_IFBLK) {
					if (major(stb.st_rdev) == MD_MAJOR)
						st->devnum = minor(stb.st_rdev);
					else
						st->devnum = -1- (minor(stb.st_rdev)>>6);
				}
			}

			for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
				if (mse2->devnum == st->devnum) {
					mse2->devnum = INT_MAX; /* flag it as "used" */
					mse = mse2;
				}

			if (array.utime == 0)
				/* external arrays don't update utime */
				array.utime = time(0);

			if (st->utime == array.utime &&
			    st->failed == array.failed_disks &&
			    st->working == array.working_disks &&
			    st->spare == array.spare_disks &&
			    (mse == NULL  || (
				    mse->percent == st->percent
				    ))) {
				close(fd);
				st->err = 0;
				continue;
			}
			if (st->utime == 0 && /* new array */
			    mse &&	/* is in /proc/mdstat */
			    mse->pattern && strchr(mse->pattern, '_') /* degraded */
				)
				alert("DegradedArray", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);

			if (st->utime == 0 && /* new array */
			    st->expected_spares > 0 &&
			    array.spare_disks < st->expected_spares)
				alert("SparesMissing", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			if (mse &&
			    st->percent == -1 &&
			    mse->percent >= 0)
				alert("RebuildStarted", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			if (mse &&
			    st->percent >= 0 &&
			    mse->percent >= 0 &&
			    (mse->percent / increments) > (st->percent / increments)) {
				char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)

				if((mse->percent / increments) == 0)
					snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
				else
					snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);

				alert(percentalert,
				      dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
			}

			if (mse &&
			    mse->percent == -1 &&
			    st->percent >= 0) {
				/* Rebuild/sync/whatever just finished.
				 * If there is a number in /mismatch_cnt,
				 * we should report that.
				 */
				struct mdinfo *sra =
				       sysfs_read(-1, st->devnum, GET_MISMATCH);
				if (sra && sra->mismatch_cnt > 0) {
					char cnt[40];
					sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
					alert("RebuildFinished", dev, cnt, mailaddr, mailfrom, alert_cmd, dosyslog);
				} else
					alert("RebuildFinished", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
				if (sra)
					free(sra);
			}

			if (mse)
				st->percent = mse->percent;


			for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
			     i++) {
				mdu_disk_info_t disc;
				disc.number = i;
				if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
					info[i].state = disc.state;
					info[i].major = disc.major;
					info[i].minor = disc.minor;
				} else
					info[i].major = info[i].minor = 0;
			}
			close(fd);

			for (i=0; i<MaxDisks; i++) {
				mdu_disk_info_t disc = {0,0,0,0,0};
				int newstate=0;
				int change;
				char *dv = NULL;
				disc.number = i;
				if (i > array.raid_disks + array.nr_disks) {
					newstate = 0;
					disc.major = disc.minor = 0;
				} else if (info[i].major || info[i].minor) {
					newstate = info[i].state;
					dv = map_dev(info[i].major, info[i].minor, 1);
					disc.state = newstate;
					disc.major = info[i].major;
					disc.minor = info[i].minor;
				} else if (mse &&  mse->pattern && i < (int)strlen(mse->pattern)) {
					switch(mse->pattern[i]) {
					case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
					case '_': newstate = 0; break;
					}
					disc.major = disc.minor = 0;
				}
				if (dv == NULL && st->devid[i])
					dv = map_dev(major(st->devid[i]),
						     minor(st->devid[i]), 1);
				change = newstate ^ st->devstate[i];
				if (st->utime && change && !st->err) {
					if (i < array.raid_disks &&
					    (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
					     ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
					     ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
						)
						alert("Fail", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
					else if (i >= array.raid_disks &&
						 (disc.major || disc.minor) &&
						 st->devid[i] == makedev(disc.major, disc.minor) &&
						 ((newstate&change)&(1<<MD_DISK_FAULTY))
						)
						alert("FailSpare", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
					else if (i < array.raid_disks &&
						 ! (newstate & (1<<MD_DISK_REMOVED)) &&
						 (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
						  ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
						  ((newstate&change)&(1<<MD_DISK_SYNC)))
						)
						alert("SpareActive", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
				}
				st->devstate[i] = newstate;
				st->devid[i] = makedev(disc.major, disc.minor);
			}
			st->active = array.active_disks;
			st->working = array.working_disks;
			st->spare = array.spare_disks;
			st->failed = array.failed_disks;
			st->utime = array.utime;
			st->raid = array.raid_disks;
			st->err = 0;
		}
		/* now check if there are any new devices found in mdstat */
		if (scan) {
			struct mdstat_ent *mse;
			for (mse=mdstat; mse; mse=mse->next)
				if (mse->devnum != INT_MAX &&
				    mse->level &&
				    (strcmp(mse->level, "raid0")!=0 &&
				     strcmp(mse->level, "linear")!=0)
					) {
					struct state *st = malloc(sizeof *st);
					mdu_array_info_t array;
					int fd;
					if (st == NULL)
						continue;
					st->devname = strdup(get_md_name(mse->devnum));
					if ((fd = open(st->devname, O_RDONLY)) < 0 ||
					    ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
						/* no such array */
						if (fd >=0) close(fd);
						put_md_name(st->devname);
						free(st->devname);
						free(st);
						continue;
					}
					close(fd);
					st->utime = 0;
					st->next = statelist;
					st->err = 1;
					st->devnum = mse->devnum;
					st->percent = -2;
					st->spare_group = NULL;
					st->expected_spares = -1;
					statelist = st;
					if (test)
						alert("TestMessage", st->devname, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
					alert("NewArray", st->devname, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
					new_found = 1;
				}
		}
		/* If an array has active < raid && spare == 0 && spare_group != NULL
		 * Look for another array with spare > 0 and active == raid and same spare_group
		 *  if found, choose a device and hotremove/hotadd
		 */
		for (st = statelist; st; st=st->next)
			if (st->active < st->raid &&
			    st->spare == 0 &&
			    st->spare_group != NULL) {
				struct state *st2;
				for (st2=statelist ; st2 ; st2=st2->next)
					if (st2 != st &&
					    st2->spare > 0 &&
					    st2->active == st2->raid &&
					    st2->spare_group != NULL &&
					    strcmp(st->spare_group, st2->spare_group) == 0) {
						/* try to remove and add */
						int fd1 = open(st->devname, O_RDONLY);
						int fd2 = open(st2->devname, O_RDONLY);
						int dev = -1;
						int d;
						if (fd1 < 0 || fd2 < 0) {
							if (fd1>=0) close(fd1);
							if (fd2>=0) close(fd2);
							continue;
						}
						for (d=st2->raid; d < MaxDisks; d++) {
							if (st2->devid[d] > 0 &&
							    st2->devstate[d] == 0) {
								dev = st2->devid[d];
								break;
							}
						}
						if (dev > 0) {
							struct mddev_dev_s devlist;
							char devname[20];
							devlist.next = NULL;
							devlist.used = 0;
							devlist.re_add = 0;
							devlist.writemostly = 0;
							devlist.devname = devname;
							sprintf(devname, "%d:%d", major(dev), minor(dev));

							devlist.disposition = 'r';
							if (Manage_subdevs(st2->devname, fd2, &devlist, -1, 0) == 0) {
								devlist.disposition = 'a';
								if (Manage_subdevs(st->devname, fd1, &devlist, -1, 0) == 0) {
									alert("MoveSpare", st->devname, st2->devname, mailaddr, mailfrom, alert_cmd, dosyslog);
									close(fd1);
									close(fd2);
									break;
								}
								else Manage_subdevs(st2->devname, fd2, &devlist, -1, 0);
							}
						}
						close(fd1);
						close(fd2);
					}
			}
		if (!new_found) {
			if (oneshot)
				break;
			else
				mdstat_wait(period);
		}
		test = 0;
	}
	if (pidfile)
		unlink(pidfile);
	return 0;
}