Пример #1
0
void unblock_monitor(char *container, const int unfreeze)
{
	struct mdstat_ent *ent, *e;
	struct mdinfo *sra = NULL;
	int to_ping = 0;

	ent = mdstat_read(0, 0);
	if (!ent) {
		fprintf(stderr, Name
			": failed to read /proc/mdstat while unblocking container\n");
		return;
	}

	/* unfreeze container contents */
	for (e = ent; e; e = e->next) {
		if (!is_container_member(e, container))
			continue;
		sysfs_free(sra);
		sra = sysfs_read(-1, e->devnum, GET_VERSION|GET_LEVEL);
		if (sra->array.level > 0)
			to_ping++;
		if (unblock_subarray(sra, unfreeze))
			fprintf(stderr, Name ": Failed to unfreeze %s\n", e->dev);
	}
	if (to_ping)
		ping_monitor(container);

	sysfs_free(sra);
	free_mdstat(ent);
}
Пример #2
0
static void iov_cleanup(struct if_entry *entry)
{
	if (entry->pci_path)
		sysfs_free(entry->pci_path);

	if (entry->pci_physfn_path)
		sysfs_free(entry->pci_physfn_path);
}
Пример #3
0
void RebuildMap(void)
{
	struct mdstat_ent *mdstat = mdstat_read(0, 0);
	struct mdstat_ent *md;
	struct map_ent *map = NULL;
	int require_homehost;
	char sys_hostname[256];
	char *homehost = conf_get_homehost(&require_homehost);

	if (homehost == NULL || strcmp(homehost, "<system>")==0) {
		if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
			sys_hostname[sizeof(sys_hostname)-1] = 0;
			homehost = sys_hostname;
		}
	}

	for (md = mdstat ; md ; md = md->next) {
		struct mdinfo *sra = sysfs_read(-1, md->devnm, GET_DEVS);
		struct mdinfo *sd;

		if (!sra)
			continue;

		for (sd = sra->devs ; sd ; sd = sd->next) {
			char namebuf[100];
			char dn[30];
			int dfd;
			int ok;
			int devid;
			struct supertype *st;
			char *subarray = NULL;
			char *path;
			struct mdinfo *info;

			sprintf(dn, "%d:%d", sd->disk.major, sd->disk.minor);
			dfd = dev_open(dn, O_RDONLY);
			if (dfd < 0)
				continue;
			st = guess_super(dfd);
			if ( st == NULL)
				ok = -1;
			else {
				subarray = get_member_info(md);
				ok = st->ss->load_super(st, dfd, NULL);
			}
			close(dfd);
			if (ok != 0)
				continue;
			if (subarray)
				info = st->ss->container_content(st, subarray);
			else {
				info = xmalloc(sizeof(*info));
				st->ss->getinfo_super(st, info, NULL);
			}
			if (!info)
				continue;

			devid = devnm2devid(md->devnm);
			path = map_dev(major(devid), minor(devid), 0);
			if (path == NULL ||
			    strncmp(path, "/dev/md/", 8) != 0) {
				/* We would really like a name that provides
				 * an MD_DEVNAME for udev.
				 * The name needs to be unique both in /dev/md/
				 * and in this mapfile.
				 * It needs to match what -I or -As would come
				 * up with.
				 * That means:
				 *   Check if array is in mdadm.conf
				 *        - if so use that.
				 *   determine trustworthy from homehost etc
				 *   find a unique name based on metadata name.
				 *
				 */
				struct mddev_ident *match = conf_match(st, info,
								       NULL, 0,
								       NULL);
				struct stat stb;
				if (match && match->devname && match->devname[0] == '/') {
					path = match->devname;
					if (path[0] != '/') {
						strcpy(namebuf, "/dev/md/");
						strcat(namebuf, path);
						path = namebuf;
					}
				} else {
					int unum = 0;
					char *sep = "_";
					const char *name;
					int conflict = 1;
					if ((homehost == NULL ||
					     st->ss->match_home(st, homehost) != 1) &&
					    st->ss->match_home(st, "any") != 1 &&
					    (require_homehost
					     || ! conf_name_is_free(info->name)))
						/* require a numeric suffix */
						unum = 0;
					else
						/* allow name to be used as-is if no conflict */
						unum = -1;
					name = info->name;
					if (!*name) {
						name = st->ss->name;
						if (!isdigit(name[strlen(name)-1]) &&
						    unum == -1) {
							unum = 0;
							sep = "";
						}
					}
					if (strchr(name, ':')) {
						/* Probably a uniquifying
						 * hostname prefix.  Allow
						 * without a suffix, and strip
						 * hostname if it is us.
						 */
						if (homehost && unum == -1 &&
						    strncmp(name, homehost,
							    strlen(homehost)) == 0 &&
						    name[strlen(homehost)] == ':')
							name += strlen(homehost)+1;
						unum = -1;
					}

					while (conflict) {
						if (unum >= 0)
							sprintf(namebuf, "/dev/md/%s%s%d",
								name, sep, unum);
						else
							sprintf(namebuf, "/dev/md/%s",
								name);
						unum++;
						if (lstat(namebuf, &stb) != 0 &&
						    (map == NULL ||
						     !map_by_name(&map, namebuf+8)))
							conflict = 0;
					}
					path = namebuf;
				}
			}
			map_add(&map, md->devnm,
				info->text_version,
				info->uuid, path);
			st->ss->free_super(st);
			free(info);
			break;
		}
		sysfs_free(sra);
	}
	/* Only trigger a change if we wrote a new map file */
	if (map_write(map))
		for (md = mdstat ; md ; md = md->next) {
			struct mdinfo *sra = sysfs_read(-1, md->devnm,
							GET_VERSION);
			if (sra)
				sysfs_uevent(sra, "change");
			sysfs_free(sra);
		}
	map_free(map);
	free_mdstat(mdstat);
}
Пример #4
0
static int mdmon(char *devnm, int must_fork, int takeover)
{
	int mdfd;
	struct mdinfo *mdi, *di;
	struct supertype *container;
	sigset_t set;
	struct sigaction act;
	int pfd[2];
	int status;
	int ignore;
	pid_t victim = -1;
	int victim_sock = -1;

	dprintf("starting mdmon for %s\n", devnm);

	mdfd = open_dev(devnm);
	if (mdfd < 0) {
		pr_err("%s: %s\n", devnm, strerror(errno));
		return 1;
	}
	if (md_get_version(mdfd) < 0) {
		pr_err("%s: Not an md device\n", devnm);
		return 1;
	}

	/* Fork, and have the child tell us when they are ready */
	if (must_fork) {
		if (pipe(pfd) != 0) {
			pr_err("failed to create pipe\n");
			return 1;
		}
		switch(fork()) {
		case -1:
			pr_err("failed to fork: %s\n", strerror(errno));
			return 1;
		case 0: /* child */
			close(pfd[0]);
			break;
		default: /* parent */
			close(pfd[1]);
			if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
				wait(&status);
				status = WEXITSTATUS(status);
			}
			close(pfd[0]);
			return status;
		}
	} else
		pfd[0] = pfd[1] = -1;

	container = xcalloc(1, sizeof(*container));
	strcpy(container->devnm, devnm);
	container->arrays = NULL;
	container->sock = -1;

	mdi = sysfs_read(mdfd, container->devnm, GET_VERSION|GET_LEVEL|GET_DEVS);

	if (!mdi) {
		pr_err("failed to load sysfs info for %s\n", container->devnm);
		exit(3);
	}
	if (mdi->array.level != UnSet) {
		pr_err("%s is not a container - cannot monitor\n", devnm);
		exit(3);
	}
	if (mdi->array.major_version != -1 ||
	    mdi->array.minor_version != -2) {
		pr_err("%s does not use external metadata - cannot monitor\n",
			devnm);
		exit(3);
	}

	container->ss = version_to_superswitch(mdi->text_version);
	if (container->ss == NULL) {
		pr_err("%s uses unsupported metadata: %s\n",
			devnm, mdi->text_version);
		exit(3);
	}

	container->devs = NULL;
	for (di = mdi->devs; di; di = di->next) {
		struct mdinfo *cd = xmalloc(sizeof(*cd));
		*cd = *di;
		cd->next = container->devs;
		container->devs = cd;
	}
	sysfs_free(mdi);

	/* SIGUSR is sent between parent and child.  So both block it
	 * and enable it only with pselect.
	 */
	sigemptyset(&set);
	sigaddset(&set, SIGUSR1);
	sigaddset(&set, SIGTERM);
	sigprocmask(SIG_BLOCK, &set, NULL);
	act.sa_handler = wake_me;
	act.sa_flags = 0;
	sigaction(SIGUSR1, &act, NULL);
	act.sa_handler = term;
	sigaction(SIGTERM, &act, NULL);
	act.sa_handler = SIG_IGN;
	sigaction(SIGPIPE, &act, NULL);

	victim = mdmon_pid(container->devnm);
	if (victim >= 0)
		victim_sock = connect_monitor(container->devnm);

	ignore = chdir("/");
	if (!takeover && victim > 0 && victim_sock >= 0) {
		if (fping_monitor(victim_sock) == 0) {
			pr_err("%s already managed\n", container->devnm);
			exit(3);
		}
		close(victim_sock);
		victim_sock = -1;
	}
	if (container->ss->load_container(container, mdfd, devnm)) {
		pr_err("Cannot load metadata for %s\n", devnm);
		exit(3);
	}
	close(mdfd);

	/* Ok, this is close enough.  We can say goodbye to our parent now.
	 */
	if (victim > 0)
		remove_pidfile(devnm);
	if (make_pidfile(devnm) < 0) {
		exit(3);
	}
	container->sock = make_control_sock(devnm);

	status = 0;
	if (pfd[1] >= 0) {
		if (write(pfd[1], &status, sizeof(status)) < 0)
			pr_err("failed to notify our parent: %d\n",
			       getppid());
		close(pfd[1]);
	}

	mlockall(MCL_CURRENT | MCL_FUTURE);

	if (clone_monitor(container) < 0) {
		pr_err("failed to start monitor process: %s\n",
			strerror(errno));
		exit(2);
	}

	if (victim > 0) {
		try_kill_monitor(victim, container->devnm, victim_sock);
		if (victim_sock >= 0)
			close(victim_sock);
	}

	setsid();
	close(0);
	open("/dev/null", O_RDWR);
	close(1);
	ignore = dup(0);
#ifndef DEBUG
	close(2);
	ignore = dup(0);
#endif

	/* This silliness is to stop the compiler complaining
	 * that we ignore 'ignore'
	 */
	if (ignore)
		ignore++;

	do_manager(container);

	exit(0);
}
Пример #5
0
/**
 * block_monitor - prevent mdmon spare assignment
 * @container - container to block
 * @freeze - flag to additionally freeze sync_action
 *
 * This is used by the reshape code to freeze the container, and the
 * auto-rebuild implementation to atomically move spares.
 * In both cases we need to stop mdmon from assigning spares to replace
 * failed devices as we might have other plans for the spare.
 * For the reshape case we also need to 'freeze' sync_action so that
 * no recovery happens until we have fully prepared for the reshape.
 *
 * We tell mdmon that the array is frozen by marking the 'metadata' name
 * with a leading '-'.  The previously told mdmon "Don't make this array
 * read/write, leave it readonly".  Now it means a more general "Don't
 * reconfigure this array at all".
 * As older versions of mdmon (which might run from initrd) don't understand
 * this, we first check that the running mdmon is new enough.
 */
int block_monitor(char *container, const int freeze)
{
	int devnum = devname2devnum(container);
	struct mdstat_ent *ent, *e, *e2;
	struct mdinfo *sra = NULL;
	char *version = NULL;
	char buf[64];
	int rv = 0;

	if (!mdmon_running(devnum)) {
		/* if mdmon is not active we assume that any instance that is
		 * later started will match the current mdadm version, if this
		 * assumption is violated we may inadvertantly rebuild an array
		 * that was meant for reshape, or start rebuild on a spare that
		 * was to be moved to another container
		 */
		/* pass */;
	} else {
		int ver;

		version = ping_monitor_version(container);
		ver = version ? mdadm_version(version) : -1;
		free(version);
		if (ver < 3002000) {
			fprintf(stderr, Name
				": mdmon instance for %s cannot be disabled\n",
				container);
			return -1;
		}
	}

	ent = mdstat_read(0, 0);
	if (!ent) {
		fprintf(stderr, Name
			": failed to read /proc/mdstat while disabling mdmon\n");
		return -1;
	}

	/* freeze container contents */
	for (e = ent; e; e = e->next) {
		if (!is_container_member(e, container))
			continue;
		sysfs_free(sra);
		sra = sysfs_read(-1, e->devnum, GET_VERSION);
		if (!sra) {
			fprintf(stderr, Name
				": failed to read sysfs for subarray%s\n",
				to_subarray(e, container));
			break;
		}
		/* can't reshape an array that we can't monitor */
		if (sra->text_version[0] == '-')
			break;

		if (freeze && sysfs_freeze_array(sra) < 1)
			break;
		/* flag this array to not be modified by mdmon (close race with
		 * takeover in reshape case and spare reassignment in the
		 * auto-rebuild case)
		 */
		if (block_subarray(sra))
			break;
		ping_monitor(container);

		/* check that we did not race with recovery */
		if ((freeze &&
		     !sysfs_attribute_available(sra, NULL, "sync_action")) ||
		    (freeze &&
		     sysfs_attribute_available(sra, NULL, "sync_action") &&
		     sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 &&
		     strcmp(buf, "frozen\n") == 0))
			/* pass */;
		else {
			unblock_subarray(sra, 0);
			break;
		}
		/* Double check against races - there should be no spares
		 * or part-spares
		 */
		sysfs_free(sra);
		sra = sysfs_read(-1, e->devnum, GET_DEVS | GET_STATE);
		if (sra && sra->array.spare_disks > 0) {
			unblock_subarray(sra, freeze);
			break;
		}
	}

	if (e) {
		fprintf(stderr, Name ": failed to freeze subarray%s\n",
			to_subarray(e, container));

		/* thaw the partially frozen container */
		for (e2 = ent; e2 && e2 != e; e2 = e2->next) {
			if (!is_container_member(e2, container))
				continue;
			sysfs_free(sra);
			sra = sysfs_read(-1, e2->devnum, GET_VERSION);
			if (unblock_subarray(sra, freeze))
				fprintf(stderr, Name ": Failed to unfreeze %s\n", e2->dev);
		}

		ping_monitor(container); /* cleared frozen */
		rv = -1;
	}

	sysfs_free(sra);
	free_mdstat(ent);

	return rv;
}
Пример #6
0
static dev_t container_choose_spare(struct state *from, struct state *to,
				    struct domainlist *domlist,
				    unsigned long long min_size, int active)
{
	/* This is similar to choose_spare, but we cannot trust devstate,
	 * so we need to read the metadata instead
	 */
	struct mdinfo *list;
	struct supertype *st = from->metadata;
	int fd = open(from->devname, O_RDONLY);
	int err;
	dev_t dev = 0;

	if (fd < 0)
		return 0;
	if (!st->ss->getinfo_super_disks) {
		close(fd);
		return 0;
	}

	err = st->ss->load_container(st, fd, NULL);
	close(fd);
	if (err)
		return 0;

	if (from == to) {
		/* We must check if number of active disks has not increased
		 * since ioctl in main loop. mdmon may have added spare
		 * to subarray. If so we do not need to look for more spares
		 * so return non zero value */
		int active_cnt = 0;
		struct mdinfo *dp;
		list = st->ss->getinfo_super_disks(st);
		if (!list) {
			st->ss->free_super(st);
			return 1;
		}
		dp = list->devs;
		while (dp) {
			if (dp->disk.state & (1<<MD_DISK_SYNC) &&
			    !(dp->disk.state & (1<<MD_DISK_FAULTY)))
				active_cnt++;
			dp = dp->next;
		}
		sysfs_free(list);
		if (active < active_cnt) {
			/* Spare just activated.*/
			st->ss->free_super(st);
			return 1;
		}
	}

	/* We only need one spare so full list not needed */
	list = container_choose_spares(st, min_size, domlist, from->spare_group,
				       to->metadata->ss->name, 1);
	if (list) {
		struct mdinfo *disks = list->devs;
		if (disks)
			dev = makedev(disks->disk.major, disks->disk.minor);
		sysfs_free(list);
	}
	st->ss->free_super(st);
	return dev;
}
Пример #7
0
int WaitClean(char *dev, int sock, int verbose)
{
	int fd;
	struct mdinfo *mdi;
	int rv = 1;
	char devnm[32];

	fd = open(dev, O_RDONLY);
	if (fd < 0) {
		if (verbose)
			pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
		return 1;
	}

	strcpy(devnm, fd2devnm(fd));
	mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
	if (!mdi) {
		if (verbose)
			pr_err("Failed to read sysfs attributes for %s\n", dev);
		close(fd);
		return 0;
	}

	switch(mdi->array.level) {
	case LEVEL_LINEAR:
	case LEVEL_MULTIPATH:
	case 0:
		/* safemode delay is irrelevant for these levels */
		rv = 0;
	}

	/* for internal metadata the kernel handles the final clean
	 * transition, containers can never be dirty
	 */
	if (!is_subarray(mdi->text_version))
		rv = 0;

	/* safemode disabled ? */
	if (mdi->safe_mode_delay == 0)
		rv = 0;

	if (rv) {
		int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
		char buf[20];
		int delay = 5000;

		/* minimize the safe_mode_delay and prepare to wait up to 5s
		 * for writes to quiesce
		 */
		sysfs_set_safemode(mdi, 1);

		/* wait for array_state to be clean */
		while (1) {
			rv = read(state_fd, buf, sizeof(buf));
			if (rv < 0)
				break;
			if (sysfs_match_word(buf, clean_states) <= 4)
				break;
			rv = sysfs_wait(state_fd, &delay);
			if (rv < 0 && errno != EINTR)
				break;
			lseek(state_fd, 0, SEEK_SET);
		}
		if (rv < 0)
			rv = 1;
		else if (fping_monitor(sock) == 0 ||
			 ping_monitor(mdi->text_version) == 0) {
			/* we need to ping to close the window between array
			 * state transitioning to clean and the metadata being
			 * marked clean
			 */
			rv = 0;
		} else
			rv = 1;
		if (rv && verbose)
			pr_err("Error waiting for %s to be clean\n",
				dev);

		/* restore the original safe_mode_delay */
		sysfs_set_safemode(mdi, mdi->safe_mode_delay);
		close(state_fd);
	}

	sysfs_free(mdi);
	close(fd);

	return rv;
}
Пример #8
0
struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
{
    char fname[PATH_MAX];
    char buf[PATH_MAX];
    char *base;
    char *dbase;
    struct mdinfo *sra;
    struct mdinfo *dev, **devp;
    DIR *dir = NULL;
    struct dirent *de;

    sra = xcalloc(1, sizeof(*sra));
    sysfs_init(sra, fd, devnm);
    if (sra->sys_name[0] == 0) {
        free(sra);
        return NULL;
    }

    sprintf(fname, "/sys/block/%s/md/", sra->sys_name);
    base = fname + strlen(fname);

    sra->devs = NULL;
    if (options & GET_VERSION) {
        strcpy(base, "metadata_version");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        if (strncmp(buf, "none", 4) == 0) {
            sra->array.major_version =
                sra->array.minor_version = -1;
            strcpy(sra->text_version, "");
        } else if (strncmp(buf, "external:", 9) == 0) {
            sra->array.major_version = -1;
            sra->array.minor_version = -2;
            strcpy(sra->text_version, buf+9);
        } else {
            sscanf(buf, "%d.%d",
                   &sra->array.major_version,
                   &sra->array.minor_version);
            strcpy(sra->text_version, buf);
        }
    }
    if (options & GET_LEVEL) {
        strcpy(base, "level");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        sra->array.level = map_name(pers, buf);
    }
    if (options & GET_LAYOUT) {
        strcpy(base, "layout");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        sra->array.layout = strtoul(buf, NULL, 0);
    }
    if (options & GET_DISKS) {
        strcpy(base, "raid_disks");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        sra->array.raid_disks = strtoul(buf, NULL, 0);
    }
    if (options & GET_DEGRADED) {
        strcpy(base, "degraded");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        sra->array.failed_disks = strtoul(buf, NULL, 0);
    }
    if (options & GET_COMPONENT) {
        strcpy(base, "component_size");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        sra->component_size = strtoull(buf, NULL, 0);
        /* sysfs reports "K", but we want sectors */
        sra->component_size *= 2;
    }
    if (options & GET_CHUNK) {
        strcpy(base, "chunk_size");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        sra->array.chunk_size = strtoul(buf, NULL, 0);
    }
    if (options & GET_CACHE) {
        strcpy(base, "stripe_cache_size");
        if (load_sys(fname, buf, sizeof(buf)))
            /* Probably level doesn't support it */
            sra->cache_size = 0;
        else
            sra->cache_size = strtoul(buf, NULL, 0);
    }
    if (options & GET_MISMATCH) {
        strcpy(base, "mismatch_cnt");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        sra->mismatch_cnt = strtoul(buf, NULL, 0);
    }
    if (options & GET_SAFEMODE) {
        int scale = 1;
        int dot = 0;
        unsigned i;
        unsigned long msec;
        size_t len;

        strcpy(base, "safe_mode_delay");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;

        /* remove a period, and count digits after it */
        len = strlen(buf);
        for (i = 0; i < len; i++) {
            if (dot) {
                if (isdigit(buf[i])) {
                    buf[i-1] = buf[i];
                    scale *= 10;
                }
                buf[i] = 0;
            } else if (buf[i] == '.') {
                dot=1;
                buf[i] = 0;
            }
        }
        msec = strtoul(buf, NULL, 10);
        msec = (msec * 1000) / scale;
        sra->safe_mode_delay = msec;
    }
    if (options & GET_BITMAP_LOCATION) {
        strcpy(base, "bitmap/location");
        if (load_sys(fname, buf, sizeof(buf)))
            goto abort;
        if (strncmp(buf, "file", 4) == 0)
            sra->bitmap_offset = 1;
        else if (strncmp(buf, "none", 4) == 0)
            sra->bitmap_offset = 0;
        else if (buf[0] == '+')
            sra->bitmap_offset = strtol(buf+1, NULL, 10);
        else
            goto abort;
    }

    if (options & GET_ARRAY_STATE) {
        strcpy(base, "array_state");
        if (load_sys(fname, sra->sysfs_array_state,
                     sizeof(sra->sysfs_array_state)))
            goto abort;
    } else
        sra->sysfs_array_state[0] = 0;

    if (! (options & GET_DEVS))
        return sra;

    /* Get all the devices as well */
    *base = 0;
    dir = opendir(fname);
    if (!dir)
        goto abort;
    sra->array.spare_disks = 0;

    devp = &sra->devs;
    sra->devs = NULL;
    while ((de = readdir(dir)) != NULL) {
        char *ep;
        if (de->d_ino == 0 ||
                strncmp(de->d_name, "dev-", 4) != 0)
            continue;
        strcpy(base, de->d_name);
        dbase = base + strlen(base);
        *dbase++ = '/';

        dev = xcalloc(1, sizeof(*dev));

        /* Always get slot, major, minor */
        strcpy(dbase, "slot");
        if (load_sys(fname, buf, sizeof(buf))) {
            /* hmm... unable to read 'slot' maybe the device
             * is going away?
             */
            strcpy(dbase, "block");
            if (readlink(fname, buf, sizeof(buf)) < 0 &&
                    errno != ENAMETOOLONG) {
                /* ...yup device is gone */
                free(dev);
                continue;
            } else {
                /* slot is unreadable but 'block' link
                 * still intact... something bad is happening
                 * so abort
                 */
                free(dev);
                goto abort;
            }

        }
        strcpy(dev->sys_name, de->d_name);
        dev->disk.raid_disk = strtoul(buf, &ep, 10);
        if (*ep) dev->disk.raid_disk = -1;

        strcpy(dbase, "block/dev");
        if (load_sys(fname, buf, sizeof(buf))) {
            /* assume this is a stale reference to a hot
             * removed device
             */
            free(dev);
            continue;
        }
        sra->array.nr_disks++;
        sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);

        /* special case check for block devices that can go 'offline' */
        strcpy(dbase, "block/device/state");
        if (load_sys(fname, buf, sizeof(buf)) == 0 &&
                strncmp(buf, "offline", 7) == 0) {
            free(dev);
            continue;
        }

        /* finally add this disk to the array */
        *devp = dev;
        devp = & dev->next;
        dev->next = NULL;

        if (options & GET_OFFSET) {
            strcpy(dbase, "offset");
            if (load_sys(fname, buf, sizeof(buf)))
                goto abort;
            dev->data_offset = strtoull(buf, NULL, 0);
            strcpy(dbase, "new_offset");
            if (load_sys(fname, buf, sizeof(buf)) == 0)
                dev->new_data_offset = strtoull(buf, NULL, 0);
            else
                dev->new_data_offset = dev->data_offset;
        }
        if (options & GET_SIZE) {
            strcpy(dbase, "size");
            if (load_sys(fname, buf, sizeof(buf)))
                goto abort;
            dev->component_size = strtoull(buf, NULL, 0) * 2;
        }
        if (options & GET_STATE) {
            dev->disk.state = 0;
            strcpy(dbase, "state");
            if (load_sys(fname, buf, sizeof(buf)))
                goto abort;
            if (strstr(buf, "in_sync"))
                dev->disk.state |= (1<<MD_DISK_SYNC);
            if (strstr(buf, "faulty"))
                dev->disk.state |= (1<<MD_DISK_FAULTY);
            if (dev->disk.state == 0)
                sra->array.spare_disks++;
        }
        if (options & GET_ERROR) {
            strcpy(buf, "errors");
            if (load_sys(fname, buf, sizeof(buf)))
                goto abort;
            dev->errors = strtoul(buf, NULL, 0);
        }
    }
    closedir(dir);
    return sra;

abort:
    if (dir)
        closedir(dir);
    sysfs_free(sra);
    return NULL;
}
Пример #9
0
int Manage_subdevs(char *devname, int fd,
		   struct mddev_dev *devlist, int verbose, int test,
		   char *update)
{
	/* do something to each dev.
	 * devmode can be
	 *  'a' - add the device
	 *	   try HOT_ADD_DISK
	 *         If that fails EINVAL, try ADD_NEW_DISK
	 *  'r' - remove the device HOT_REMOVE_DISK
	 *        device can be 'faulty' or 'detached' in which case all
	 *	  matching devices are removed.
	 *  'f' - set the device faulty SET_DISK_FAULTY
	 *        device can be 'detached' in which case any device that
	 *	  is inaccessible will be marked faulty.
	 * For 'f' and 'r', the device can also be a kernel-internal
	 * name such as 'sdb'.
	 */
	struct mddev_dev *add_devlist = NULL;
	mdu_array_info_t array;
	mdu_disk_info_t disc;
	unsigned long long array_size;
	struct mddev_dev *dv, *next = NULL;
	struct stat stb;
	int j, jnext = 0;
	int tfd = -1;
	struct supertype *st, *tst;
	char *subarray = NULL;
	int duuid[4];
	int ouuid[4];
	int lfd = -1;
	int sysfd = -1;
	int count = 0; /* number of actions taken */

	if (ioctl(fd, GET_ARRAY_INFO, &array)) {
		fprintf(stderr, Name ": cannot get array info for %s\n",
			devname);
		return 1;
	}

	/* array.size is only 32 bit and may be truncated.
	 * So read from sysfs if possible, and record number of sectors
	 */

	array_size = get_component_size(fd);
	if (array_size <= 0)
		array_size = array.size * 2;

	tst = super_by_fd(fd, &subarray);
	if (!tst) {
		fprintf(stderr, Name ": unsupport array - version %d.%d\n",
			array.major_version, array.minor_version);
		return 1;
	}

	stb.st_rdev = 0;
	for (dv = devlist, j=0 ; dv; dv = next, j = jnext) {
		unsigned long long ldsize;
		char dvname[20];
		char *dnprintable = dv->devname;
		char *add_dev = dv->devname;
		int err;
		int re_add_failed = 0;

		next = dv->next;
		jnext = 0;

		if (strcmp(dv->devname, "failed")==0 ||
		    strcmp(dv->devname, "faulty")==0) {
			int remaining_disks = array.nr_disks;
			if (dv->disposition != 'r') {
				fprintf(stderr, Name ": %s only meaningful "
					"with -r, not -%c\n",
					dv->devname, dv->disposition);
				return 1;
			}
			for (; j < 1024 && remaining_disks > 0; j++) {
				unsigned dev;
				disc.number = j;
				if (ioctl(fd, GET_DISK_INFO, &disc))
					continue;
				if (disc.major == 0 && disc.minor == 0)
					continue;
				remaining_disks --;
				if ((disc.state & 1) == 0) /* faulty */
					continue;
				dev = makedev(disc.major, disc.minor);
				if (stb.st_rdev == dev)
					/* already did that one */
					continue;
				stb.st_rdev = dev;
				next = dv;
				/* same slot again next time - things might
				 * have reshuffled */
				jnext = j;
				sprintf(dvname,"%d:%d", disc.major, disc.minor);
				dnprintable = dvname;
				break;
			}
			if (next != dv)
				continue;
		} else if (strcmp(dv->devname, "detached") == 0) {
			int remaining_disks = array.nr_disks;
			if (dv->disposition != 'r' && dv->disposition != 'f') {
				fprintf(stderr, Name ": %s only meaningful "
					"with -r of -f, not -%c\n",
					dv->devname, dv->disposition);
				return 1;
			}
			for (; j < 1024 && remaining_disks > 0; j++) {
				int sfd;
				unsigned dev;
				disc.number = j;
				if (ioctl(fd, GET_DISK_INFO, &disc))
					continue;
				if (disc.major == 0 && disc.minor == 0)
					continue;
				remaining_disks --;
				sprintf(dvname,"%d:%d", disc.major, disc.minor);
				sfd = dev_open(dvname, O_RDONLY);
				if (sfd >= 0) {
					close(sfd);
					continue;
				}
				if (dv->disposition == 'f' &&
				    (disc.state & 1) == 1) /* already faulty */
					continue;
				if (errno != ENXIO)
					continue;
				dev = makedev(disc.major, disc.minor);
				if (stb.st_rdev == dev)
					/* already did that one */
					continue;
				stb.st_rdev = dev;
				next = dv;
				/* same slot again next time - things might
				 * have reshuffled */
				jnext = j;
				dnprintable = dvname;
				break;
			}
			if (next != dv)
				continue;
		} else if (strcmp(dv->devname, "missing") == 0) {
			if (dv->disposition != 'a' || dv->re_add == 0) {
				fprintf(stderr, Name ": 'missing' only meaningful "
					"with --re-add\n");
				return 1;
			}
			if (add_devlist == NULL)
				add_devlist = conf_get_devs();
			if (add_devlist == NULL) {
				fprintf(stderr, Name ": no devices to scan for missing members.");
				continue;
			}
			add_dev = add_devlist->devname;
			add_devlist = add_devlist->next;
			if (add_devlist != NULL)
				next = dv;
			if (stat(add_dev, &stb) < 0)
				continue;
		} else if (strchr(dv->devname, '/') == NULL &&
			   strchr(dv->devname, ':') == NULL &&
			   strlen(dv->devname) < 50) {
			/* Assume this is a kernel-internal name like 'sda1' */
			int found = 0;
			char dname[55];
			if (dv->disposition != 'r' && dv->disposition != 'f') {
				fprintf(stderr, Name ": %s only meaningful "
					"with -r or -f, not -%c\n",
					dv->devname, dv->disposition);
				return 1;
			}

			sprintf(dname, "dev-%s", dv->devname);
			sysfd = sysfs_open(fd2devnum(fd), dname, "block/dev");
			if (sysfd >= 0) {
				char dn[20];
				int mj,mn;
				if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
				    sscanf(dn, "%d:%d", &mj,&mn) == 2) {
					stb.st_rdev = makedev(mj,mn);
					found = 1;
				}
				close(sysfd);
				sysfd = -1;
			}
			if (!found) {
				sysfd = sysfs_open(fd2devnum(fd), dname, "state");
				if (sysfd < 0) {
					fprintf(stderr, Name ": %s does not appear "
						"to be a component of %s\n",
						dv->devname, devname);
					return 1;
				}
			}
		} else {
			j = 0;

			tfd = dev_open(dv->devname, O_RDONLY);
			if (tfd < 0 && dv->disposition == 'r' &&
			    lstat(dv->devname, &stb) == 0)
				/* Be happy, the lstat worked, that is
				 * enough for --remove
				 */
				;
			else {
				if (tfd < 0 || fstat(tfd, &stb) != 0) {
					fprintf(stderr, Name ": cannot find %s: %s\n",
						dv->devname, strerror(errno));
					if (tfd >= 0)
						close(tfd);
					return 1;
				}
				close(tfd);
				tfd = -1;
			}
			if ((stb.st_mode & S_IFMT) != S_IFBLK) {
				fprintf(stderr, Name ": %s is not a "
					"block device.\n",
					dv->devname);
				return 1;
			}
		}
		switch(dv->disposition){
		default:
			fprintf(stderr, Name ": internal error - devmode[%s]=%d\n",
				dv->devname, dv->disposition);
			return 1;
		case 'a':
			/* add the device */
			if (subarray) {
				fprintf(stderr, Name ": Cannot add disks to a"
					" \'member\' array, perform this"
					" operation on the parent container\n");
				return 1;
			}
			/* Make sure it isn't in use (in 2.6 or later) */
			tfd = dev_open(add_dev, O_RDONLY|O_EXCL|O_DIRECT);
			if (tfd < 0 && add_dev != dv->devname)
				continue;
			if (tfd < 0) {
				fprintf(stderr, Name ": Cannot open %s: %s\n",
					dv->devname, strerror(errno));
				return 1;
			}

			st = dup_super(tst);

			if (array.not_persistent==0)
				st->ss->load_super(st, tfd, NULL);

			if (add_dev == dv->devname) {
				if (!get_dev_size(tfd, dv->devname, &ldsize)) {
					close(tfd);
					return 1;
				}
			} else if (!get_dev_size(tfd, NULL, &ldsize)) {
				close(tfd);
				tfd = -1;
				continue;
			}

			if (!tst->ss->external &&
			    array.major_version == 0 &&
			    md_get_version(fd)%100 < 2) {
				close(tfd);
				tfd = -1;
				if (ioctl(fd, HOT_ADD_DISK,
					  (unsigned long)stb.st_rdev)==0) {
					if (verbose >= 0)
						fprintf(stderr, Name ": hot added %s\n",
							add_dev);
					continue;
				}

				fprintf(stderr, Name ": hot add failed for %s: %s\n",
					add_dev, strerror(errno));
				return 1;
			}

			if (array.not_persistent == 0 || tst->ss->external) {

				/* need to find a sample superblock to copy, and
				 * a spare slot to use.
				 * For 'external' array (well, container based),
				 * We can just load the metadata for the array.
				 */
				if (tst->sb)
					/* already loaded */;
				else if (tst->ss->external) {
					tst->ss->load_container(tst, fd, NULL);
				} else for (j = 0; j < tst->max_devs; j++) {
					char *dev;
					int dfd;
					disc.number = j;
					if (ioctl(fd, GET_DISK_INFO, &disc))
						continue;
					if (disc.major==0 && disc.minor==0)
						continue;
					if ((disc.state & 4)==0) continue; /* sync */
					/* Looks like a good device to try */
					dev = map_dev(disc.major, disc.minor, 1);
					if (!dev) continue;
					dfd = dev_open(dev, O_RDONLY);
					if (dfd < 0) continue;
					if (tst->ss->load_super(tst, dfd,
								NULL)) {
						close(dfd);
						continue;
					}
					close(dfd);
					break;
				}
				/* FIXME this is a bad test to be using */
				if (!tst->sb) {
					close(tfd);
					fprintf(stderr, Name ": cannot load array metadata from %s\n", devname);
					return 1;
				}

				/* Make sure device is large enough */
				if (tst->ss->avail_size(tst, ldsize/512) <
				    array_size) {
					close(tfd);
					tfd = -1;
					if (add_dev != dv->devname)
						continue;
					fprintf(stderr, Name ": %s not large enough to join array\n",
						dv->devname);
					return 1;
				}

				/* Possibly this device was recently part of the array
				 * and was temporarily removed, and is now being re-added.
				 * If so, we can simply re-add it.
				 */
				tst->ss->uuid_from_super(tst, duuid);

				if (st->sb) {
					struct mdinfo mdi;
					st->ss->getinfo_super(st, &mdi, NULL);
					st->ss->uuid_from_super(st, ouuid);
					if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
					    !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
					    memcmp(duuid, ouuid, sizeof(ouuid))==0) {
						/* look like it is worth a try.  Need to
						 * make sure kernel will accept it though.
						 */
						/* re-add doesn't work for version-1 superblocks
						 * before 2.6.18 :-(
						 */
						if (array.major_version == 1 &&
						    get_linux_version() <= 2006018)
							goto skip_re_add;
						disc.number = mdi.disk.number;
						if (ioctl(fd, GET_DISK_INFO, &disc) != 0
						    || disc.major != 0 || disc.minor != 0
						    || !enough_fd(fd))
							goto skip_re_add;
						disc.major = major(stb.st_rdev);
						disc.minor = minor(stb.st_rdev);
						disc.number = mdi.disk.number;
						disc.raid_disk = mdi.disk.raid_disk;
						disc.state = mdi.disk.state;
						if (dv->writemostly == 1)
							disc.state |= 1 << MD_DISK_WRITEMOSTLY;
						if (dv->writemostly == 2)
							disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
						remove_partitions(tfd);
						close(tfd);
						tfd = -1;
						if (update) {
							int rv = -1;
							tfd = dev_open(dv->devname, O_RDWR);

							if (tfd >= 0)
								rv = st->ss->update_super(
									st, NULL, update,
									devname, verbose, 0, NULL);
							if (rv == 0)
								rv = st->ss->store_super(st, tfd);
							close(tfd);
							tfd = -1;
							if (rv != 0) {
								fprintf(stderr, Name ": failed to update"
									" superblock during re-add\n");
								return 1;
							}
						}
						/* don't even try if disk is marked as faulty */
						errno = 0;
						if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
							if (verbose >= 0)
								fprintf(stderr, Name ": re-added %s\n", add_dev);
							count++;
							continue;
						}
						if (errno == ENOMEM || errno == EROFS) {
							fprintf(stderr, Name ": add new device failed for %s: %s\n",
								add_dev, strerror(errno));
							if (add_dev != dv->devname)
								continue;
							return 1;
						}
					skip_re_add:
						re_add_failed = 1;
					}
					st->ss->free_super(st);
				}
				if (add_dev != dv->devname) {
					if (verbose > 0)
						fprintf(stderr, Name
							": --re-add for %s to %s is not possible\n",
							add_dev, devname);
					if (tfd >= 0) {
						close(tfd);
						tfd = -1;
					}
					continue;
				}
				if (dv->re_add) {
					if (tfd >= 0)
						close(tfd);
					fprintf(stderr, Name
						": --re-add for %s to %s is not possible\n",
						dv->devname, devname);
					return 1;
				}
				if (re_add_failed) {
					fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
						dv->devname, devname);
					fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
						dv->devname);
					fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",	
						dv->devname);
					if (tfd >= 0)
						close(tfd);
					return 1;
				}
			} else {
				/* non-persistent. Must ensure that new drive
				 * is at least array.size big.
				 */
				if (ldsize/512 < array_size) {
					fprintf(stderr, Name ": %s not large enough to join array\n",
						dv->devname);
					if (tfd >= 0)
						close(tfd);
					return 1;
				}
			}
			/* committed to really trying this device now*/
			if (tfd >= 0) {
				remove_partitions(tfd);
				close(tfd);
				tfd = -1;
			}
			/* in 2.6.17 and earlier, version-1 superblocks won't
			 * use the number we write, but will choose a free number.
			 * we must choose the same free number, which requires
			 * starting at 'raid_disks' and counting up
			 */
			for (j = array.raid_disks; j< tst->max_devs; j++) {
				disc.number = j;
				if (ioctl(fd, GET_DISK_INFO, &disc))
					break;
				if (disc.major==0 && disc.minor==0)
					break;
				if (disc.state & 8) /* removed */
					break;
			}
			disc.major = major(stb.st_rdev);
			disc.minor = minor(stb.st_rdev);
			disc.number =j;
			disc.state = 0;
			if (array.not_persistent==0) {
				int dfd;
				if (dv->writemostly == 1)
					disc.state |= 1 << MD_DISK_WRITEMOSTLY;
				dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
				if (tst->ss->add_to_super(tst, &disc, dfd,
							  dv->devname)) {
					close(dfd);
					return 1;
				}
				if (tst->ss->write_init_super(tst)) {
					close(dfd);
					return 1;
				}
			} else if (dv->re_add) {
				/*  this had better be raid1.
				 * As we are "--re-add"ing we must find a spare slot
				 * to fill.
				 */
				char *used = malloc(array.raid_disks);
				memset(used, 0, array.raid_disks);
				for (j=0; j< tst->max_devs; j++) {
					mdu_disk_info_t disc2;
					disc2.number = j;
					if (ioctl(fd, GET_DISK_INFO, &disc2))
						continue;
					if (disc2.major==0 && disc2.minor==0)
						continue;
					if (disc2.state & 8) /* removed */
						continue;
					if (disc2.raid_disk < 0)
						continue;
					if (disc2.raid_disk > array.raid_disks)
						continue;
					used[disc2.raid_disk] = 1;
				}
				for (j=0 ; j<array.raid_disks; j++)
					if (!used[j]) {
						disc.raid_disk = j;
						disc.state |= (1<<MD_DISK_SYNC);
						break;
					}
				free(used);
			}
			if (dv->writemostly == 1)
				disc.state |= (1 << MD_DISK_WRITEMOSTLY);
			if (tst->ss->external) {
				/* add a disk
				 * to an external metadata container */
				struct mdinfo new_mdi;
				struct mdinfo *sra;
				int container_fd;
				int devnum = fd2devnum(fd);
				int dfd;

				container_fd = open_dev_excl(devnum);
				if (container_fd < 0) {
					fprintf(stderr, Name ": add failed for %s:"
						" could not get exclusive access to container\n",
						dv->devname);
					tst->ss->free_super(tst);
					return 1;
				}

				dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
				if (mdmon_running(tst->container_dev))
					tst->update_tail = &tst->updates;
				if (tst->ss->add_to_super(tst, &disc, dfd,
							  dv->devname)) {
					close(dfd);
					close(container_fd);
					return 1;
				}
				if (tst->update_tail)
					flush_metadata_updates(tst);
				else
					tst->ss->sync_metadata(tst);

				sra = sysfs_read(container_fd, -1, 0);
				if (!sra) {
					fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n",
						dv->devname);
					close(container_fd);
					tst->ss->free_super(tst);
					return 1;
				}
				sra->array.level = LEVEL_CONTAINER;
				/* Need to set data_offset and component_size */
				tst->ss->getinfo_super(tst, &new_mdi, NULL);
				new_mdi.disk.major = disc.major;
				new_mdi.disk.minor = disc.minor;
				new_mdi.recovery_start = 0;
				/* Make sure fds are closed as they are O_EXCL which
				 * would block add_disk */
				tst->ss->free_super(tst);
				if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
					fprintf(stderr, Name ": add new device to external metadata"
						" failed for %s\n", dv->devname);
					close(container_fd);
					sysfs_free(sra);
					return 1;
				}
				ping_monitor_by_id(devnum);
				sysfs_free(sra);
				close(container_fd);
			} else {
				tst->ss->free_super(tst);
				if (ioctl(fd, ADD_NEW_DISK, &disc)) {
					fprintf(stderr, Name ": add new device failed for %s as %d: %s\n",
						dv->devname, j, strerror(errno));
					return 1;
				}
			}
			if (verbose >= 0)
				fprintf(stderr, Name ": added %s\n", dv->devname);
			break;

		case 'r':
			/* hot remove */
			if (subarray) {
				fprintf(stderr, Name ": Cannot remove disks from a"
					" \'member\' array, perform this"
					" operation on the parent container\n");
				if (sysfd >= 0)
					close(sysfd);
				return 1;
			}
			if (tst->ss->external) {
				/* To remove a device from a container, we must
				 * check that it isn't in use in an array.
				 * This involves looking in the 'holders'
				 * directory - there must be just one entry,
				 * the container.
				 * To ensure that it doesn't get used as a
				 * hold spare while we are checking, we
				 * get an O_EXCL open on the container
				 */
				int dnum = fd2devnum(fd);
				lfd = open_dev_excl(dnum);
				if (lfd < 0) {
					fprintf(stderr, Name
						": Cannot get exclusive access "
						" to container - odd\n");
					if (sysfd >= 0)
						close(sysfd);
					return 1;
				}
				/* in the detached case it is not possible to
				 * check if we are the unique holder, so just
				 * rely on the 'detached' checks
				 */
				if (strcmp(dv->devname, "detached") == 0 ||
				    sysfd >= 0 ||
				    sysfs_unique_holder(dnum, stb.st_rdev))
					/* pass */;
				else {
					fprintf(stderr, Name
						": %s is %s, cannot remove.\n",
						dnprintable,
						errno == EEXIST ? "still in use":
						"not a member");
					close(lfd);
					return 1;
				}
			}
			/* FIXME check that it is a current member */
			if (sysfd >= 0) {
				/* device has been removed and we don't know
				 * the major:minor number
				 */
				int n = write(sysfd, "remove", 6);
				if (n != 6)
					err = -1;
				else
					err = 0;
				close(sysfd);
				sysfd = -1;
			} else {
				err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
				if (err && errno == ENODEV) {
					/* Old kernels rejected this if no personality
					 * registered */
					struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
					struct mdinfo *dv = NULL;
					if (sra)
						dv = sra->devs;
					for ( ; dv ; dv=dv->next)
						if (dv->disk.major == (int)major(stb.st_rdev) &&
						    dv->disk.minor == (int)minor(stb.st_rdev))
							break;
					if (dv)
						err = sysfs_set_str(sra, dv,
								    "state", "remove");
					else
						err = -1;
					if (sra)
						sysfs_free(sra);
				}
			}
			if (err) {
				fprintf(stderr, Name ": hot remove failed "
					"for %s: %s\n",	dnprintable,
					strerror(errno));
				if (lfd >= 0)
					close(lfd);
				return 1;
			}
			if (tst->ss->external) {
				/*
				 * Before dropping our exclusive open we make an
				 * attempt at preventing mdmon from seeing an
				 * 'add' event before reconciling this 'remove'
				 * event.
				 */
				char *name = devnum2devname(fd2devnum(fd));

				if (!name) {
					fprintf(stderr, Name ": unable to get container name\n");
					return 1;
				}

				ping_manager(name);
				free(name);
			}
			if (lfd >= 0)
				close(lfd);
			count++;
			if (verbose >= 0)
				fprintf(stderr, Name ": hot removed %s from %s\n",
					dnprintable, devname);
			break;

		case 'f': /* set faulty */
			/* FIXME check current member */
			if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
			    (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
						(unsigned long) stb.st_rdev))) {
				fprintf(stderr, Name ": set device faulty failed for %s:  %s\n",
					dnprintable, strerror(errno));
				if (sysfd >= 0)
					close(sysfd);
				return 1;
			}
			if (sysfd >= 0)
				close(sysfd);
			sysfd = -1;
			count++;
			if (verbose >= 0)
				fprintf(stderr, Name ": set %s faulty in %s\n",
					dnprintable, devname);
			break;
		}
	}
	if (test && count == 0)
		return 2;
	return 0;
}
Пример #10
0
int Manage_runstop(char *devname, int fd, int runstop, int quiet)
{
	/* Run or stop the array. array must already be configured
	 * required >= 0.90.0
	 * Only print failure messages if quiet == 0;
	 * quiet > 0 means really be quiet
	 * quiet < 0 means we will try again if it fails.
	 */
	mdu_param_t param; /* unused */

	if (runstop == -1 && md_get_version(fd) < 9000) {
		if (ioctl(fd, STOP_MD, 0)) {
			if (quiet == 0) fprintf(stderr,
						Name ": stopping device %s "
						"failed: %s\n",
						devname, strerror(errno));
			return 1;
		}
	}

	if (md_get_version(fd) < 9000) {
		fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
		return 1;
	}
	/*
	if (ioctl(fd, GET_ARRAY_INFO, &array)) {
		fprintf(stderr, Name ": %s does not appear to be active.\n",
			devname);
		return 1;
	}
	*/
	if (runstop>0) {
		if (ioctl(fd, RUN_ARRAY, &param)) {
			fprintf(stderr, Name ": failed to run array %s: %s\n",
				devname, strerror(errno));
			return 1;
		}
		if (quiet <= 0)
			fprintf(stderr, Name ": started %s\n", devname);
	} else if (runstop < 0){
		struct map_ent *map = NULL;
		struct stat stb;
		struct mdinfo *mdi;
		int devnum;
		int err;
		int count;
		/* If this is an mdmon managed array, just write 'inactive'
		 * to the array state and let mdmon clear up.
		 */
		devnum = fd2devnum(fd);
		/* Get EXCL access first.  If this fails, then attempting
		 * to stop is probably a bad idea.
		 */
		close(fd);
		fd = open(devname, O_RDONLY|O_EXCL);
		if (fd < 0 || fd2devnum(fd) != devnum) {
			if (fd >= 0)
				close(fd);
			fprintf(stderr,
				Name ": Cannot get exclusive access to %s:"
				"Perhaps a running "
				"process, mounted filesystem "
				"or active volume group?\n",
				devname);
			return 1;
		}
		mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
		if (mdi &&
		    mdi->array.level > 0 &&
		    is_subarray(mdi->text_version)) {
			int err;
			/* This is mdmon managed. */
			close(fd);

			count = 25;
			while (count &&
			       (err = sysfs_set_str(mdi, NULL,
						    "array_state",
						    "inactive")) < 0
			       && errno == EBUSY) {
				usleep(200000);
				count--;
			}
			if (err && !quiet) {
				fprintf(stderr, Name
					": failed to stop array %s: %s\n",
					devname, strerror(errno));
				return 1;
			}

			/* Give monitor a chance to act */
			ping_monitor(mdi->text_version);

			fd = open_dev_excl(devnum);
			if (fd < 0) {
				fprintf(stderr, Name
					": failed to completely stop %s"
					": Device is busy\n",
					devname);
				return 1;
			}
		} else if (mdi &&
			   mdi->array.major_version == -1 &&
			   mdi->array.minor_version == -2 &&
			   !is_subarray(mdi->text_version)) {
			struct mdstat_ent *mds, *m;
			/* container, possibly mdmon-managed.
			 * Make sure mdmon isn't opening it, which
			 * would interfere with the 'stop'
			 */
			ping_monitor(mdi->sys_name);

			/* now check that there are no existing arrays
			 * which are members of this array
			 */
			mds = mdstat_read(0, 0);
			for (m=mds; m; m=m->next)
				if (m->metadata_version &&
				    strncmp(m->metadata_version, "external:", 9)==0 &&
				    is_subarray(m->metadata_version+9) &&
				    devname2devnum(m->metadata_version+10) == devnum) {
					if (!quiet)
						fprintf(stderr, Name
							": Cannot stop container %s: "
							"member %s still active\n",
							devname, m->dev);
					free_mdstat(mds);
					if (mdi)
						sysfs_free(mdi);
					return 1;
				}
		}

		/* As we have an O_EXCL open, any use of the device
		 * which blocks STOP_ARRAY is probably a transient use,
		 * so it is reasonable to retry for a while - 5 seconds.
		 */
		count = 25; err = 0;
		while (count && fd >= 0
		       && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
		       && errno == EBUSY) {
			usleep(200000);
			count --;
		}
		if (fd >= 0 && err) {
			if (quiet == 0) {
				fprintf(stderr, Name
					": failed to stop array %s: %s\n",
					devname, strerror(errno));
				if (errno == EBUSY)
					fprintf(stderr, "Perhaps a running "
						"process, mounted filesystem "
						"or active volume group?\n");
			}
			if (mdi)
				sysfs_free(mdi);
			return 1;
		}
		/* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
		 * was stopped, so We'll do it here just to be sure.  Drop any
		 * partitions as well...
		 */
		if (fd >= 0)
			ioctl(fd, BLKRRPART, 0);
		if (mdi)
			sysfs_uevent(mdi, "change");

		
		if (devnum != NoMdDev &&
		    (stat("/dev/.udev", &stb) != 0 ||
		     check_env("MDADM_NO_UDEV"))) {
			struct map_ent *mp = map_by_devnum(&map, devnum);
			remove_devices(devnum, mp ? mp->path : NULL);
		}


		if (quiet <= 0)
			fprintf(stderr, Name ": stopped %s\n", devname);
		map_lock(&map);
		map_remove(&map, devnum);
		map_unlock(&map);
	}
	return 0;
}