void unblock_monitor(char *container, const int unfreeze) { struct mdstat_ent *ent, *e; struct mdinfo *sra = NULL; int to_ping = 0; ent = mdstat_read(0, 0); if (!ent) { fprintf(stderr, Name ": failed to read /proc/mdstat while unblocking container\n"); return; } /* unfreeze container contents */ for (e = ent; e; e = e->next) { if (!is_container_member(e, container)) continue; sysfs_free(sra); sra = sysfs_read(-1, e->devnum, GET_VERSION|GET_LEVEL); if (sra->array.level > 0) to_ping++; if (unblock_subarray(sra, unfreeze)) fprintf(stderr, Name ": Failed to unfreeze %s\n", e->dev); } if (to_ping) ping_monitor(container); sysfs_free(sra); free_mdstat(ent); }
static void iov_cleanup(struct if_entry *entry) { if (entry->pci_path) sysfs_free(entry->pci_path); if (entry->pci_physfn_path) sysfs_free(entry->pci_physfn_path); }
void RebuildMap(void) { struct mdstat_ent *mdstat = mdstat_read(0, 0); struct mdstat_ent *md; struct map_ent *map = NULL; int require_homehost; char sys_hostname[256]; char *homehost = conf_get_homehost(&require_homehost); if (homehost == NULL || strcmp(homehost, "<system>")==0) { if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) { sys_hostname[sizeof(sys_hostname)-1] = 0; homehost = sys_hostname; } } for (md = mdstat ; md ; md = md->next) { struct mdinfo *sra = sysfs_read(-1, md->devnm, GET_DEVS); struct mdinfo *sd; if (!sra) continue; for (sd = sra->devs ; sd ; sd = sd->next) { char namebuf[100]; char dn[30]; int dfd; int ok; int devid; struct supertype *st; char *subarray = NULL; char *path; struct mdinfo *info; sprintf(dn, "%d:%d", sd->disk.major, sd->disk.minor); dfd = dev_open(dn, O_RDONLY); if (dfd < 0) continue; st = guess_super(dfd); if ( st == NULL) ok = -1; else { subarray = get_member_info(md); ok = st->ss->load_super(st, dfd, NULL); } close(dfd); if (ok != 0) continue; if (subarray) info = st->ss->container_content(st, subarray); else { info = xmalloc(sizeof(*info)); st->ss->getinfo_super(st, info, NULL); } if (!info) continue; devid = devnm2devid(md->devnm); path = map_dev(major(devid), minor(devid), 0); if (path == NULL || strncmp(path, "/dev/md/", 8) != 0) { /* We would really like a name that provides * an MD_DEVNAME for udev. * The name needs to be unique both in /dev/md/ * and in this mapfile. * It needs to match what -I or -As would come * up with. * That means: * Check if array is in mdadm.conf * - if so use that. * determine trustworthy from homehost etc * find a unique name based on metadata name. * */ struct mddev_ident *match = conf_match(st, info, NULL, 0, NULL); struct stat stb; if (match && match->devname && match->devname[0] == '/') { path = match->devname; if (path[0] != '/') { strcpy(namebuf, "/dev/md/"); strcat(namebuf, path); path = namebuf; } } else { int unum = 0; char *sep = "_"; const char *name; int conflict = 1; if ((homehost == NULL || st->ss->match_home(st, homehost) != 1) && st->ss->match_home(st, "any") != 1 && (require_homehost || ! conf_name_is_free(info->name))) /* require a numeric suffix */ unum = 0; else /* allow name to be used as-is if no conflict */ unum = -1; name = info->name; if (!*name) { name = st->ss->name; if (!isdigit(name[strlen(name)-1]) && unum == -1) { unum = 0; sep = ""; } } if (strchr(name, ':')) { /* Probably a uniquifying * hostname prefix. Allow * without a suffix, and strip * hostname if it is us. */ if (homehost && unum == -1 && strncmp(name, homehost, strlen(homehost)) == 0 && name[strlen(homehost)] == ':') name += strlen(homehost)+1; unum = -1; } while (conflict) { if (unum >= 0) sprintf(namebuf, "/dev/md/%s%s%d", name, sep, unum); else sprintf(namebuf, "/dev/md/%s", name); unum++; if (lstat(namebuf, &stb) != 0 && (map == NULL || !map_by_name(&map, namebuf+8))) conflict = 0; } path = namebuf; } } map_add(&map, md->devnm, info->text_version, info->uuid, path); st->ss->free_super(st); free(info); break; } sysfs_free(sra); } /* Only trigger a change if we wrote a new map file */ if (map_write(map)) for (md = mdstat ; md ; md = md->next) { struct mdinfo *sra = sysfs_read(-1, md->devnm, GET_VERSION); if (sra) sysfs_uevent(sra, "change"); sysfs_free(sra); } map_free(map); free_mdstat(mdstat); }
static int mdmon(char *devnm, int must_fork, int takeover) { int mdfd; struct mdinfo *mdi, *di; struct supertype *container; sigset_t set; struct sigaction act; int pfd[2]; int status; int ignore; pid_t victim = -1; int victim_sock = -1; dprintf("starting mdmon for %s\n", devnm); mdfd = open_dev(devnm); if (mdfd < 0) { pr_err("%s: %s\n", devnm, strerror(errno)); return 1; } if (md_get_version(mdfd) < 0) { pr_err("%s: Not an md device\n", devnm); return 1; } /* Fork, and have the child tell us when they are ready */ if (must_fork) { if (pipe(pfd) != 0) { pr_err("failed to create pipe\n"); return 1; } switch(fork()) { case -1: pr_err("failed to fork: %s\n", strerror(errno)); return 1; case 0: /* child */ close(pfd[0]); break; default: /* parent */ close(pfd[1]); if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) { wait(&status); status = WEXITSTATUS(status); } close(pfd[0]); return status; } } else pfd[0] = pfd[1] = -1; container = xcalloc(1, sizeof(*container)); strcpy(container->devnm, devnm); container->arrays = NULL; container->sock = -1; mdi = sysfs_read(mdfd, container->devnm, GET_VERSION|GET_LEVEL|GET_DEVS); if (!mdi) { pr_err("failed to load sysfs info for %s\n", container->devnm); exit(3); } if (mdi->array.level != UnSet) { pr_err("%s is not a container - cannot monitor\n", devnm); exit(3); } if (mdi->array.major_version != -1 || mdi->array.minor_version != -2) { pr_err("%s does not use external metadata - cannot monitor\n", devnm); exit(3); } container->ss = version_to_superswitch(mdi->text_version); if (container->ss == NULL) { pr_err("%s uses unsupported metadata: %s\n", devnm, mdi->text_version); exit(3); } container->devs = NULL; for (di = mdi->devs; di; di = di->next) { struct mdinfo *cd = xmalloc(sizeof(*cd)); *cd = *di; cd->next = container->devs; container->devs = cd; } sysfs_free(mdi); /* SIGUSR is sent between parent and child. So both block it * and enable it only with pselect. */ sigemptyset(&set); sigaddset(&set, SIGUSR1); sigaddset(&set, SIGTERM); sigprocmask(SIG_BLOCK, &set, NULL); act.sa_handler = wake_me; act.sa_flags = 0; sigaction(SIGUSR1, &act, NULL); act.sa_handler = term; sigaction(SIGTERM, &act, NULL); act.sa_handler = SIG_IGN; sigaction(SIGPIPE, &act, NULL); victim = mdmon_pid(container->devnm); if (victim >= 0) victim_sock = connect_monitor(container->devnm); ignore = chdir("/"); if (!takeover && victim > 0 && victim_sock >= 0) { if (fping_monitor(victim_sock) == 0) { pr_err("%s already managed\n", container->devnm); exit(3); } close(victim_sock); victim_sock = -1; } if (container->ss->load_container(container, mdfd, devnm)) { pr_err("Cannot load metadata for %s\n", devnm); exit(3); } close(mdfd); /* Ok, this is close enough. We can say goodbye to our parent now. */ if (victim > 0) remove_pidfile(devnm); if (make_pidfile(devnm) < 0) { exit(3); } container->sock = make_control_sock(devnm); status = 0; if (pfd[1] >= 0) { if (write(pfd[1], &status, sizeof(status)) < 0) pr_err("failed to notify our parent: %d\n", getppid()); close(pfd[1]); } mlockall(MCL_CURRENT | MCL_FUTURE); if (clone_monitor(container) < 0) { pr_err("failed to start monitor process: %s\n", strerror(errno)); exit(2); } if (victim > 0) { try_kill_monitor(victim, container->devnm, victim_sock); if (victim_sock >= 0) close(victim_sock); } setsid(); close(0); open("/dev/null", O_RDWR); close(1); ignore = dup(0); #ifndef DEBUG close(2); ignore = dup(0); #endif /* This silliness is to stop the compiler complaining * that we ignore 'ignore' */ if (ignore) ignore++; do_manager(container); exit(0); }
/** * block_monitor - prevent mdmon spare assignment * @container - container to block * @freeze - flag to additionally freeze sync_action * * This is used by the reshape code to freeze the container, and the * auto-rebuild implementation to atomically move spares. * In both cases we need to stop mdmon from assigning spares to replace * failed devices as we might have other plans for the spare. * For the reshape case we also need to 'freeze' sync_action so that * no recovery happens until we have fully prepared for the reshape. * * We tell mdmon that the array is frozen by marking the 'metadata' name * with a leading '-'. The previously told mdmon "Don't make this array * read/write, leave it readonly". Now it means a more general "Don't * reconfigure this array at all". * As older versions of mdmon (which might run from initrd) don't understand * this, we first check that the running mdmon is new enough. */ int block_monitor(char *container, const int freeze) { int devnum = devname2devnum(container); struct mdstat_ent *ent, *e, *e2; struct mdinfo *sra = NULL; char *version = NULL; char buf[64]; int rv = 0; if (!mdmon_running(devnum)) { /* if mdmon is not active we assume that any instance that is * later started will match the current mdadm version, if this * assumption is violated we may inadvertantly rebuild an array * that was meant for reshape, or start rebuild on a spare that * was to be moved to another container */ /* pass */; } else { int ver; version = ping_monitor_version(container); ver = version ? mdadm_version(version) : -1; free(version); if (ver < 3002000) { fprintf(stderr, Name ": mdmon instance for %s cannot be disabled\n", container); return -1; } } ent = mdstat_read(0, 0); if (!ent) { fprintf(stderr, Name ": failed to read /proc/mdstat while disabling mdmon\n"); return -1; } /* freeze container contents */ for (e = ent; e; e = e->next) { if (!is_container_member(e, container)) continue; sysfs_free(sra); sra = sysfs_read(-1, e->devnum, GET_VERSION); if (!sra) { fprintf(stderr, Name ": failed to read sysfs for subarray%s\n", to_subarray(e, container)); break; } /* can't reshape an array that we can't monitor */ if (sra->text_version[0] == '-') break; if (freeze && sysfs_freeze_array(sra) < 1) break; /* flag this array to not be modified by mdmon (close race with * takeover in reshape case and spare reassignment in the * auto-rebuild case) */ if (block_subarray(sra)) break; ping_monitor(container); /* check that we did not race with recovery */ if ((freeze && !sysfs_attribute_available(sra, NULL, "sync_action")) || (freeze && sysfs_attribute_available(sra, NULL, "sync_action") && sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 && strcmp(buf, "frozen\n") == 0)) /* pass */; else { unblock_subarray(sra, 0); break; } /* Double check against races - there should be no spares * or part-spares */ sysfs_free(sra); sra = sysfs_read(-1, e->devnum, GET_DEVS | GET_STATE); if (sra && sra->array.spare_disks > 0) { unblock_subarray(sra, freeze); break; } } if (e) { fprintf(stderr, Name ": failed to freeze subarray%s\n", to_subarray(e, container)); /* thaw the partially frozen container */ for (e2 = ent; e2 && e2 != e; e2 = e2->next) { if (!is_container_member(e2, container)) continue; sysfs_free(sra); sra = sysfs_read(-1, e2->devnum, GET_VERSION); if (unblock_subarray(sra, freeze)) fprintf(stderr, Name ": Failed to unfreeze %s\n", e2->dev); } ping_monitor(container); /* cleared frozen */ rv = -1; } sysfs_free(sra); free_mdstat(ent); return rv; }
static dev_t container_choose_spare(struct state *from, struct state *to, struct domainlist *domlist, unsigned long long min_size, int active) { /* This is similar to choose_spare, but we cannot trust devstate, * so we need to read the metadata instead */ struct mdinfo *list; struct supertype *st = from->metadata; int fd = open(from->devname, O_RDONLY); int err; dev_t dev = 0; if (fd < 0) return 0; if (!st->ss->getinfo_super_disks) { close(fd); return 0; } err = st->ss->load_container(st, fd, NULL); close(fd); if (err) return 0; if (from == to) { /* We must check if number of active disks has not increased * since ioctl in main loop. mdmon may have added spare * to subarray. If so we do not need to look for more spares * so return non zero value */ int active_cnt = 0; struct mdinfo *dp; list = st->ss->getinfo_super_disks(st); if (!list) { st->ss->free_super(st); return 1; } dp = list->devs; while (dp) { if (dp->disk.state & (1<<MD_DISK_SYNC) && !(dp->disk.state & (1<<MD_DISK_FAULTY))) active_cnt++; dp = dp->next; } sysfs_free(list); if (active < active_cnt) { /* Spare just activated.*/ st->ss->free_super(st); return 1; } } /* We only need one spare so full list not needed */ list = container_choose_spares(st, min_size, domlist, from->spare_group, to->metadata->ss->name, 1); if (list) { struct mdinfo *disks = list->devs; if (disks) dev = makedev(disks->disk.major, disks->disk.minor); sysfs_free(list); } st->ss->free_super(st); return dev; }
int WaitClean(char *dev, int sock, int verbose) { int fd; struct mdinfo *mdi; int rv = 1; char devnm[32]; fd = open(dev, O_RDONLY); if (fd < 0) { if (verbose) pr_err("Couldn't open %s: %s\n", dev, strerror(errno)); return 1; } strcpy(devnm, fd2devnm(fd)); mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE); if (!mdi) { if (verbose) pr_err("Failed to read sysfs attributes for %s\n", dev); close(fd); return 0; } switch(mdi->array.level) { case LEVEL_LINEAR: case LEVEL_MULTIPATH: case 0: /* safemode delay is irrelevant for these levels */ rv = 0; } /* for internal metadata the kernel handles the final clean * transition, containers can never be dirty */ if (!is_subarray(mdi->text_version)) rv = 0; /* safemode disabled ? */ if (mdi->safe_mode_delay == 0) rv = 0; if (rv) { int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state"); char buf[20]; int delay = 5000; /* minimize the safe_mode_delay and prepare to wait up to 5s * for writes to quiesce */ sysfs_set_safemode(mdi, 1); /* wait for array_state to be clean */ while (1) { rv = read(state_fd, buf, sizeof(buf)); if (rv < 0) break; if (sysfs_match_word(buf, clean_states) <= 4) break; rv = sysfs_wait(state_fd, &delay); if (rv < 0 && errno != EINTR) break; lseek(state_fd, 0, SEEK_SET); } if (rv < 0) rv = 1; else if (fping_monitor(sock) == 0 || ping_monitor(mdi->text_version) == 0) { /* we need to ping to close the window between array * state transitioning to clean and the metadata being * marked clean */ rv = 0; } else rv = 1; if (rv && verbose) pr_err("Error waiting for %s to be clean\n", dev); /* restore the original safe_mode_delay */ sysfs_set_safemode(mdi, mdi->safe_mode_delay); close(state_fd); } sysfs_free(mdi); close(fd); return rv; }
struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options) { char fname[PATH_MAX]; char buf[PATH_MAX]; char *base; char *dbase; struct mdinfo *sra; struct mdinfo *dev, **devp; DIR *dir = NULL; struct dirent *de; sra = xcalloc(1, sizeof(*sra)); sysfs_init(sra, fd, devnm); if (sra->sys_name[0] == 0) { free(sra); return NULL; } sprintf(fname, "/sys/block/%s/md/", sra->sys_name); base = fname + strlen(fname); sra->devs = NULL; if (options & GET_VERSION) { strcpy(base, "metadata_version"); if (load_sys(fname, buf, sizeof(buf))) goto abort; if (strncmp(buf, "none", 4) == 0) { sra->array.major_version = sra->array.minor_version = -1; strcpy(sra->text_version, ""); } else if (strncmp(buf, "external:", 9) == 0) { sra->array.major_version = -1; sra->array.minor_version = -2; strcpy(sra->text_version, buf+9); } else { sscanf(buf, "%d.%d", &sra->array.major_version, &sra->array.minor_version); strcpy(sra->text_version, buf); } } if (options & GET_LEVEL) { strcpy(base, "level"); if (load_sys(fname, buf, sizeof(buf))) goto abort; sra->array.level = map_name(pers, buf); } if (options & GET_LAYOUT) { strcpy(base, "layout"); if (load_sys(fname, buf, sizeof(buf))) goto abort; sra->array.layout = strtoul(buf, NULL, 0); } if (options & GET_DISKS) { strcpy(base, "raid_disks"); if (load_sys(fname, buf, sizeof(buf))) goto abort; sra->array.raid_disks = strtoul(buf, NULL, 0); } if (options & GET_DEGRADED) { strcpy(base, "degraded"); if (load_sys(fname, buf, sizeof(buf))) goto abort; sra->array.failed_disks = strtoul(buf, NULL, 0); } if (options & GET_COMPONENT) { strcpy(base, "component_size"); if (load_sys(fname, buf, sizeof(buf))) goto abort; sra->component_size = strtoull(buf, NULL, 0); /* sysfs reports "K", but we want sectors */ sra->component_size *= 2; } if (options & GET_CHUNK) { strcpy(base, "chunk_size"); if (load_sys(fname, buf, sizeof(buf))) goto abort; sra->array.chunk_size = strtoul(buf, NULL, 0); } if (options & GET_CACHE) { strcpy(base, "stripe_cache_size"); if (load_sys(fname, buf, sizeof(buf))) /* Probably level doesn't support it */ sra->cache_size = 0; else sra->cache_size = strtoul(buf, NULL, 0); } if (options & GET_MISMATCH) { strcpy(base, "mismatch_cnt"); if (load_sys(fname, buf, sizeof(buf))) goto abort; sra->mismatch_cnt = strtoul(buf, NULL, 0); } if (options & GET_SAFEMODE) { int scale = 1; int dot = 0; unsigned i; unsigned long msec; size_t len; strcpy(base, "safe_mode_delay"); if (load_sys(fname, buf, sizeof(buf))) goto abort; /* remove a period, and count digits after it */ len = strlen(buf); for (i = 0; i < len; i++) { if (dot) { if (isdigit(buf[i])) { buf[i-1] = buf[i]; scale *= 10; } buf[i] = 0; } else if (buf[i] == '.') { dot=1; buf[i] = 0; } } msec = strtoul(buf, NULL, 10); msec = (msec * 1000) / scale; sra->safe_mode_delay = msec; } if (options & GET_BITMAP_LOCATION) { strcpy(base, "bitmap/location"); if (load_sys(fname, buf, sizeof(buf))) goto abort; if (strncmp(buf, "file", 4) == 0) sra->bitmap_offset = 1; else if (strncmp(buf, "none", 4) == 0) sra->bitmap_offset = 0; else if (buf[0] == '+') sra->bitmap_offset = strtol(buf+1, NULL, 10); else goto abort; } if (options & GET_ARRAY_STATE) { strcpy(base, "array_state"); if (load_sys(fname, sra->sysfs_array_state, sizeof(sra->sysfs_array_state))) goto abort; } else sra->sysfs_array_state[0] = 0; if (! (options & GET_DEVS)) return sra; /* Get all the devices as well */ *base = 0; dir = opendir(fname); if (!dir) goto abort; sra->array.spare_disks = 0; devp = &sra->devs; sra->devs = NULL; while ((de = readdir(dir)) != NULL) { char *ep; if (de->d_ino == 0 || strncmp(de->d_name, "dev-", 4) != 0) continue; strcpy(base, de->d_name); dbase = base + strlen(base); *dbase++ = '/'; dev = xcalloc(1, sizeof(*dev)); /* Always get slot, major, minor */ strcpy(dbase, "slot"); if (load_sys(fname, buf, sizeof(buf))) { /* hmm... unable to read 'slot' maybe the device * is going away? */ strcpy(dbase, "block"); if (readlink(fname, buf, sizeof(buf)) < 0 && errno != ENAMETOOLONG) { /* ...yup device is gone */ free(dev); continue; } else { /* slot is unreadable but 'block' link * still intact... something bad is happening * so abort */ free(dev); goto abort; } } strcpy(dev->sys_name, de->d_name); dev->disk.raid_disk = strtoul(buf, &ep, 10); if (*ep) dev->disk.raid_disk = -1; strcpy(dbase, "block/dev"); if (load_sys(fname, buf, sizeof(buf))) { /* assume this is a stale reference to a hot * removed device */ free(dev); continue; } sra->array.nr_disks++; sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor); /* special case check for block devices that can go 'offline' */ strcpy(dbase, "block/device/state"); if (load_sys(fname, buf, sizeof(buf)) == 0 && strncmp(buf, "offline", 7) == 0) { free(dev); continue; } /* finally add this disk to the array */ *devp = dev; devp = & dev->next; dev->next = NULL; if (options & GET_OFFSET) { strcpy(dbase, "offset"); if (load_sys(fname, buf, sizeof(buf))) goto abort; dev->data_offset = strtoull(buf, NULL, 0); strcpy(dbase, "new_offset"); if (load_sys(fname, buf, sizeof(buf)) == 0) dev->new_data_offset = strtoull(buf, NULL, 0); else dev->new_data_offset = dev->data_offset; } if (options & GET_SIZE) { strcpy(dbase, "size"); if (load_sys(fname, buf, sizeof(buf))) goto abort; dev->component_size = strtoull(buf, NULL, 0) * 2; } if (options & GET_STATE) { dev->disk.state = 0; strcpy(dbase, "state"); if (load_sys(fname, buf, sizeof(buf))) goto abort; if (strstr(buf, "in_sync")) dev->disk.state |= (1<<MD_DISK_SYNC); if (strstr(buf, "faulty")) dev->disk.state |= (1<<MD_DISK_FAULTY); if (dev->disk.state == 0) sra->array.spare_disks++; } if (options & GET_ERROR) { strcpy(buf, "errors"); if (load_sys(fname, buf, sizeof(buf))) goto abort; dev->errors = strtoul(buf, NULL, 0); } } closedir(dir); return sra; abort: if (dir) closedir(dir); sysfs_free(sra); return NULL; }
int Manage_subdevs(char *devname, int fd, struct mddev_dev *devlist, int verbose, int test, char *update) { /* do something to each dev. * devmode can be * 'a' - add the device * try HOT_ADD_DISK * If that fails EINVAL, try ADD_NEW_DISK * 'r' - remove the device HOT_REMOVE_DISK * device can be 'faulty' or 'detached' in which case all * matching devices are removed. * 'f' - set the device faulty SET_DISK_FAULTY * device can be 'detached' in which case any device that * is inaccessible will be marked faulty. * For 'f' and 'r', the device can also be a kernel-internal * name such as 'sdb'. */ struct mddev_dev *add_devlist = NULL; mdu_array_info_t array; mdu_disk_info_t disc; unsigned long long array_size; struct mddev_dev *dv, *next = NULL; struct stat stb; int j, jnext = 0; int tfd = -1; struct supertype *st, *tst; char *subarray = NULL; int duuid[4]; int ouuid[4]; int lfd = -1; int sysfd = -1; int count = 0; /* number of actions taken */ if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": cannot get array info for %s\n", devname); return 1; } /* array.size is only 32 bit and may be truncated. * So read from sysfs if possible, and record number of sectors */ array_size = get_component_size(fd); if (array_size <= 0) array_size = array.size * 2; tst = super_by_fd(fd, &subarray); if (!tst) { fprintf(stderr, Name ": unsupport array - version %d.%d\n", array.major_version, array.minor_version); return 1; } stb.st_rdev = 0; for (dv = devlist, j=0 ; dv; dv = next, j = jnext) { unsigned long long ldsize; char dvname[20]; char *dnprintable = dv->devname; char *add_dev = dv->devname; int err; int re_add_failed = 0; next = dv->next; jnext = 0; if (strcmp(dv->devname, "failed")==0 || strcmp(dv->devname, "faulty")==0) { int remaining_disks = array.nr_disks; if (dv->disposition != 'r') { fprintf(stderr, Name ": %s only meaningful " "with -r, not -%c\n", dv->devname, dv->disposition); return 1; } for (; j < 1024 && remaining_disks > 0; j++) { unsigned dev; disc.number = j; if (ioctl(fd, GET_DISK_INFO, &disc)) continue; if (disc.major == 0 && disc.minor == 0) continue; remaining_disks --; if ((disc.state & 1) == 0) /* faulty */ continue; dev = makedev(disc.major, disc.minor); if (stb.st_rdev == dev) /* already did that one */ continue; stb.st_rdev = dev; next = dv; /* same slot again next time - things might * have reshuffled */ jnext = j; sprintf(dvname,"%d:%d", disc.major, disc.minor); dnprintable = dvname; break; } if (next != dv) continue; } else if (strcmp(dv->devname, "detached") == 0) { int remaining_disks = array.nr_disks; if (dv->disposition != 'r' && dv->disposition != 'f') { fprintf(stderr, Name ": %s only meaningful " "with -r of -f, not -%c\n", dv->devname, dv->disposition); return 1; } for (; j < 1024 && remaining_disks > 0; j++) { int sfd; unsigned dev; disc.number = j; if (ioctl(fd, GET_DISK_INFO, &disc)) continue; if (disc.major == 0 && disc.minor == 0) continue; remaining_disks --; sprintf(dvname,"%d:%d", disc.major, disc.minor); sfd = dev_open(dvname, O_RDONLY); if (sfd >= 0) { close(sfd); continue; } if (dv->disposition == 'f' && (disc.state & 1) == 1) /* already faulty */ continue; if (errno != ENXIO) continue; dev = makedev(disc.major, disc.minor); if (stb.st_rdev == dev) /* already did that one */ continue; stb.st_rdev = dev; next = dv; /* same slot again next time - things might * have reshuffled */ jnext = j; dnprintable = dvname; break; } if (next != dv) continue; } else if (strcmp(dv->devname, "missing") == 0) { if (dv->disposition != 'a' || dv->re_add == 0) { fprintf(stderr, Name ": 'missing' only meaningful " "with --re-add\n"); return 1; } if (add_devlist == NULL) add_devlist = conf_get_devs(); if (add_devlist == NULL) { fprintf(stderr, Name ": no devices to scan for missing members."); continue; } add_dev = add_devlist->devname; add_devlist = add_devlist->next; if (add_devlist != NULL) next = dv; if (stat(add_dev, &stb) < 0) continue; } else if (strchr(dv->devname, '/') == NULL && strchr(dv->devname, ':') == NULL && strlen(dv->devname) < 50) { /* Assume this is a kernel-internal name like 'sda1' */ int found = 0; char dname[55]; if (dv->disposition != 'r' && dv->disposition != 'f') { fprintf(stderr, Name ": %s only meaningful " "with -r or -f, not -%c\n", dv->devname, dv->disposition); return 1; } sprintf(dname, "dev-%s", dv->devname); sysfd = sysfs_open(fd2devnum(fd), dname, "block/dev"); if (sysfd >= 0) { char dn[20]; int mj,mn; if (sysfs_fd_get_str(sysfd, dn, 20) > 0 && sscanf(dn, "%d:%d", &mj,&mn) == 2) { stb.st_rdev = makedev(mj,mn); found = 1; } close(sysfd); sysfd = -1; } if (!found) { sysfd = sysfs_open(fd2devnum(fd), dname, "state"); if (sysfd < 0) { fprintf(stderr, Name ": %s does not appear " "to be a component of %s\n", dv->devname, devname); return 1; } } } else { j = 0; tfd = dev_open(dv->devname, O_RDONLY); if (tfd < 0 && dv->disposition == 'r' && lstat(dv->devname, &stb) == 0) /* Be happy, the lstat worked, that is * enough for --remove */ ; else { if (tfd < 0 || fstat(tfd, &stb) != 0) { fprintf(stderr, Name ": cannot find %s: %s\n", dv->devname, strerror(errno)); if (tfd >= 0) close(tfd); return 1; } close(tfd); tfd = -1; } if ((stb.st_mode & S_IFMT) != S_IFBLK) { fprintf(stderr, Name ": %s is not a " "block device.\n", dv->devname); return 1; } } switch(dv->disposition){ default: fprintf(stderr, Name ": internal error - devmode[%s]=%d\n", dv->devname, dv->disposition); return 1; case 'a': /* add the device */ if (subarray) { fprintf(stderr, Name ": Cannot add disks to a" " \'member\' array, perform this" " operation on the parent container\n"); return 1; } /* Make sure it isn't in use (in 2.6 or later) */ tfd = dev_open(add_dev, O_RDONLY|O_EXCL|O_DIRECT); if (tfd < 0 && add_dev != dv->devname) continue; if (tfd < 0) { fprintf(stderr, Name ": Cannot open %s: %s\n", dv->devname, strerror(errno)); return 1; } st = dup_super(tst); if (array.not_persistent==0) st->ss->load_super(st, tfd, NULL); if (add_dev == dv->devname) { if (!get_dev_size(tfd, dv->devname, &ldsize)) { close(tfd); return 1; } } else if (!get_dev_size(tfd, NULL, &ldsize)) { close(tfd); tfd = -1; continue; } if (!tst->ss->external && array.major_version == 0 && md_get_version(fd)%100 < 2) { close(tfd); tfd = -1; if (ioctl(fd, HOT_ADD_DISK, (unsigned long)stb.st_rdev)==0) { if (verbose >= 0) fprintf(stderr, Name ": hot added %s\n", add_dev); continue; } fprintf(stderr, Name ": hot add failed for %s: %s\n", add_dev, strerror(errno)); return 1; } if (array.not_persistent == 0 || tst->ss->external) { /* need to find a sample superblock to copy, and * a spare slot to use. * For 'external' array (well, container based), * We can just load the metadata for the array. */ if (tst->sb) /* already loaded */; else if (tst->ss->external) { tst->ss->load_container(tst, fd, NULL); } else for (j = 0; j < tst->max_devs; j++) { char *dev; int dfd; disc.number = j; if (ioctl(fd, GET_DISK_INFO, &disc)) continue; if (disc.major==0 && disc.minor==0) continue; if ((disc.state & 4)==0) continue; /* sync */ /* Looks like a good device to try */ dev = map_dev(disc.major, disc.minor, 1); if (!dev) continue; dfd = dev_open(dev, O_RDONLY); if (dfd < 0) continue; if (tst->ss->load_super(tst, dfd, NULL)) { close(dfd); continue; } close(dfd); break; } /* FIXME this is a bad test to be using */ if (!tst->sb) { close(tfd); fprintf(stderr, Name ": cannot load array metadata from %s\n", devname); return 1; } /* Make sure device is large enough */ if (tst->ss->avail_size(tst, ldsize/512) < array_size) { close(tfd); tfd = -1; if (add_dev != dv->devname) continue; fprintf(stderr, Name ": %s not large enough to join array\n", dv->devname); return 1; } /* Possibly this device was recently part of the array * and was temporarily removed, and is now being re-added. * If so, we can simply re-add it. */ tst->ss->uuid_from_super(tst, duuid); if (st->sb) { struct mdinfo mdi; st->ss->getinfo_super(st, &mdi, NULL); st->ss->uuid_from_super(st, ouuid); if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) && !(mdi.disk.state & (1<<MD_DISK_FAULTY)) && memcmp(duuid, ouuid, sizeof(ouuid))==0) { /* look like it is worth a try. Need to * make sure kernel will accept it though. */ /* re-add doesn't work for version-1 superblocks * before 2.6.18 :-( */ if (array.major_version == 1 && get_linux_version() <= 2006018) goto skip_re_add; disc.number = mdi.disk.number; if (ioctl(fd, GET_DISK_INFO, &disc) != 0 || disc.major != 0 || disc.minor != 0 || !enough_fd(fd)) goto skip_re_add; disc.major = major(stb.st_rdev); disc.minor = minor(stb.st_rdev); disc.number = mdi.disk.number; disc.raid_disk = mdi.disk.raid_disk; disc.state = mdi.disk.state; if (dv->writemostly == 1) disc.state |= 1 << MD_DISK_WRITEMOSTLY; if (dv->writemostly == 2) disc.state &= ~(1 << MD_DISK_WRITEMOSTLY); remove_partitions(tfd); close(tfd); tfd = -1; if (update) { int rv = -1; tfd = dev_open(dv->devname, O_RDWR); if (tfd >= 0) rv = st->ss->update_super( st, NULL, update, devname, verbose, 0, NULL); if (rv == 0) rv = st->ss->store_super(st, tfd); close(tfd); tfd = -1; if (rv != 0) { fprintf(stderr, Name ": failed to update" " superblock during re-add\n"); return 1; } } /* don't even try if disk is marked as faulty */ errno = 0; if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) { if (verbose >= 0) fprintf(stderr, Name ": re-added %s\n", add_dev); count++; continue; } if (errno == ENOMEM || errno == EROFS) { fprintf(stderr, Name ": add new device failed for %s: %s\n", add_dev, strerror(errno)); if (add_dev != dv->devname) continue; return 1; } skip_re_add: re_add_failed = 1; } st->ss->free_super(st); } if (add_dev != dv->devname) { if (verbose > 0) fprintf(stderr, Name ": --re-add for %s to %s is not possible\n", add_dev, devname); if (tfd >= 0) { close(tfd); tfd = -1; } continue; } if (dv->re_add) { if (tfd >= 0) close(tfd); fprintf(stderr, Name ": --re-add for %s to %s is not possible\n", dv->devname, devname); return 1; } if (re_add_failed) { fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n", dv->devname, devname); fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n", dv->devname); fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n", dv->devname); if (tfd >= 0) close(tfd); return 1; } } else { /* non-persistent. Must ensure that new drive * is at least array.size big. */ if (ldsize/512 < array_size) { fprintf(stderr, Name ": %s not large enough to join array\n", dv->devname); if (tfd >= 0) close(tfd); return 1; } } /* committed to really trying this device now*/ if (tfd >= 0) { remove_partitions(tfd); close(tfd); tfd = -1; } /* in 2.6.17 and earlier, version-1 superblocks won't * use the number we write, but will choose a free number. * we must choose the same free number, which requires * starting at 'raid_disks' and counting up */ for (j = array.raid_disks; j< tst->max_devs; j++) { disc.number = j; if (ioctl(fd, GET_DISK_INFO, &disc)) break; if (disc.major==0 && disc.minor==0) break; if (disc.state & 8) /* removed */ break; } disc.major = major(stb.st_rdev); disc.minor = minor(stb.st_rdev); disc.number =j; disc.state = 0; if (array.not_persistent==0) { int dfd; if (dv->writemostly == 1) disc.state |= 1 << MD_DISK_WRITEMOSTLY; dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT); if (tst->ss->add_to_super(tst, &disc, dfd, dv->devname)) { close(dfd); return 1; } if (tst->ss->write_init_super(tst)) { close(dfd); return 1; } } else if (dv->re_add) { /* this had better be raid1. * As we are "--re-add"ing we must find a spare slot * to fill. */ char *used = malloc(array.raid_disks); memset(used, 0, array.raid_disks); for (j=0; j< tst->max_devs; j++) { mdu_disk_info_t disc2; disc2.number = j; if (ioctl(fd, GET_DISK_INFO, &disc2)) continue; if (disc2.major==0 && disc2.minor==0) continue; if (disc2.state & 8) /* removed */ continue; if (disc2.raid_disk < 0) continue; if (disc2.raid_disk > array.raid_disks) continue; used[disc2.raid_disk] = 1; } for (j=0 ; j<array.raid_disks; j++) if (!used[j]) { disc.raid_disk = j; disc.state |= (1<<MD_DISK_SYNC); break; } free(used); } if (dv->writemostly == 1) disc.state |= (1 << MD_DISK_WRITEMOSTLY); if (tst->ss->external) { /* add a disk * to an external metadata container */ struct mdinfo new_mdi; struct mdinfo *sra; int container_fd; int devnum = fd2devnum(fd); int dfd; container_fd = open_dev_excl(devnum); if (container_fd < 0) { fprintf(stderr, Name ": add failed for %s:" " could not get exclusive access to container\n", dv->devname); tst->ss->free_super(tst); return 1; } dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT); if (mdmon_running(tst->container_dev)) tst->update_tail = &tst->updates; if (tst->ss->add_to_super(tst, &disc, dfd, dv->devname)) { close(dfd); close(container_fd); return 1; } if (tst->update_tail) flush_metadata_updates(tst); else tst->ss->sync_metadata(tst); sra = sysfs_read(container_fd, -1, 0); if (!sra) { fprintf(stderr, Name ": add failed for %s: sysfs_read failed\n", dv->devname); close(container_fd); tst->ss->free_super(tst); return 1; } sra->array.level = LEVEL_CONTAINER; /* Need to set data_offset and component_size */ tst->ss->getinfo_super(tst, &new_mdi, NULL); new_mdi.disk.major = disc.major; new_mdi.disk.minor = disc.minor; new_mdi.recovery_start = 0; /* Make sure fds are closed as they are O_EXCL which * would block add_disk */ tst->ss->free_super(tst); if (sysfs_add_disk(sra, &new_mdi, 0) != 0) { fprintf(stderr, Name ": add new device to external metadata" " failed for %s\n", dv->devname); close(container_fd); sysfs_free(sra); return 1; } ping_monitor_by_id(devnum); sysfs_free(sra); close(container_fd); } else { tst->ss->free_super(tst); if (ioctl(fd, ADD_NEW_DISK, &disc)) { fprintf(stderr, Name ": add new device failed for %s as %d: %s\n", dv->devname, j, strerror(errno)); return 1; } } if (verbose >= 0) fprintf(stderr, Name ": added %s\n", dv->devname); break; case 'r': /* hot remove */ if (subarray) { fprintf(stderr, Name ": Cannot remove disks from a" " \'member\' array, perform this" " operation on the parent container\n"); if (sysfd >= 0) close(sysfd); return 1; } if (tst->ss->external) { /* To remove a device from a container, we must * check that it isn't in use in an array. * This involves looking in the 'holders' * directory - there must be just one entry, * the container. * To ensure that it doesn't get used as a * hold spare while we are checking, we * get an O_EXCL open on the container */ int dnum = fd2devnum(fd); lfd = open_dev_excl(dnum); if (lfd < 0) { fprintf(stderr, Name ": Cannot get exclusive access " " to container - odd\n"); if (sysfd >= 0) close(sysfd); return 1; } /* in the detached case it is not possible to * check if we are the unique holder, so just * rely on the 'detached' checks */ if (strcmp(dv->devname, "detached") == 0 || sysfd >= 0 || sysfs_unique_holder(dnum, stb.st_rdev)) /* pass */; else { fprintf(stderr, Name ": %s is %s, cannot remove.\n", dnprintable, errno == EEXIST ? "still in use": "not a member"); close(lfd); return 1; } } /* FIXME check that it is a current member */ if (sysfd >= 0) { /* device has been removed and we don't know * the major:minor number */ int n = write(sysfd, "remove", 6); if (n != 6) err = -1; else err = 0; close(sysfd); sysfd = -1; } else { err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev); if (err && errno == ENODEV) { /* Old kernels rejected this if no personality * registered */ struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS); struct mdinfo *dv = NULL; if (sra) dv = sra->devs; for ( ; dv ; dv=dv->next) if (dv->disk.major == (int)major(stb.st_rdev) && dv->disk.minor == (int)minor(stb.st_rdev)) break; if (dv) err = sysfs_set_str(sra, dv, "state", "remove"); else err = -1; if (sra) sysfs_free(sra); } } if (err) { fprintf(stderr, Name ": hot remove failed " "for %s: %s\n", dnprintable, strerror(errno)); if (lfd >= 0) close(lfd); return 1; } if (tst->ss->external) { /* * Before dropping our exclusive open we make an * attempt at preventing mdmon from seeing an * 'add' event before reconciling this 'remove' * event. */ char *name = devnum2devname(fd2devnum(fd)); if (!name) { fprintf(stderr, Name ": unable to get container name\n"); return 1; } ping_manager(name); free(name); } if (lfd >= 0) close(lfd); count++; if (verbose >= 0) fprintf(stderr, Name ": hot removed %s from %s\n", dnprintable, devname); break; case 'f': /* set faulty */ /* FIXME check current member */ if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) || (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY, (unsigned long) stb.st_rdev))) { fprintf(stderr, Name ": set device faulty failed for %s: %s\n", dnprintable, strerror(errno)); if (sysfd >= 0) close(sysfd); return 1; } if (sysfd >= 0) close(sysfd); sysfd = -1; count++; if (verbose >= 0) fprintf(stderr, Name ": set %s faulty in %s\n", dnprintable, devname); break; } } if (test && count == 0) return 2; return 0; }
int Manage_runstop(char *devname, int fd, int runstop, int quiet) { /* Run or stop the array. array must already be configured * required >= 0.90.0 * Only print failure messages if quiet == 0; * quiet > 0 means really be quiet * quiet < 0 means we will try again if it fails. */ mdu_param_t param; /* unused */ if (runstop == -1 && md_get_version(fd) < 9000) { if (ioctl(fd, STOP_MD, 0)) { if (quiet == 0) fprintf(stderr, Name ": stopping device %s " "failed: %s\n", devname, strerror(errno)); return 1; } } if (md_get_version(fd) < 9000) { fprintf(stderr, Name ": need md driver version 0.90.0 or later\n"); return 1; } /* if (ioctl(fd, GET_ARRAY_INFO, &array)) { fprintf(stderr, Name ": %s does not appear to be active.\n", devname); return 1; } */ if (runstop>0) { if (ioctl(fd, RUN_ARRAY, ¶m)) { fprintf(stderr, Name ": failed to run array %s: %s\n", devname, strerror(errno)); return 1; } if (quiet <= 0) fprintf(stderr, Name ": started %s\n", devname); } else if (runstop < 0){ struct map_ent *map = NULL; struct stat stb; struct mdinfo *mdi; int devnum; int err; int count; /* If this is an mdmon managed array, just write 'inactive' * to the array state and let mdmon clear up. */ devnum = fd2devnum(fd); /* Get EXCL access first. If this fails, then attempting * to stop is probably a bad idea. */ close(fd); fd = open(devname, O_RDONLY|O_EXCL); if (fd < 0 || fd2devnum(fd) != devnum) { if (fd >= 0) close(fd); fprintf(stderr, Name ": Cannot get exclusive access to %s:" "Perhaps a running " "process, mounted filesystem " "or active volume group?\n", devname); return 1; } mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION); if (mdi && mdi->array.level > 0 && is_subarray(mdi->text_version)) { int err; /* This is mdmon managed. */ close(fd); count = 25; while (count && (err = sysfs_set_str(mdi, NULL, "array_state", "inactive")) < 0 && errno == EBUSY) { usleep(200000); count--; } if (err && !quiet) { fprintf(stderr, Name ": failed to stop array %s: %s\n", devname, strerror(errno)); return 1; } /* Give monitor a chance to act */ ping_monitor(mdi->text_version); fd = open_dev_excl(devnum); if (fd < 0) { fprintf(stderr, Name ": failed to completely stop %s" ": Device is busy\n", devname); return 1; } } else if (mdi && mdi->array.major_version == -1 && mdi->array.minor_version == -2 && !is_subarray(mdi->text_version)) { struct mdstat_ent *mds, *m; /* container, possibly mdmon-managed. * Make sure mdmon isn't opening it, which * would interfere with the 'stop' */ ping_monitor(mdi->sys_name); /* now check that there are no existing arrays * which are members of this array */ mds = mdstat_read(0, 0); for (m=mds; m; m=m->next) if (m->metadata_version && strncmp(m->metadata_version, "external:", 9)==0 && is_subarray(m->metadata_version+9) && devname2devnum(m->metadata_version+10) == devnum) { if (!quiet) fprintf(stderr, Name ": Cannot stop container %s: " "member %s still active\n", devname, m->dev); free_mdstat(mds); if (mdi) sysfs_free(mdi); return 1; } } /* As we have an O_EXCL open, any use of the device * which blocks STOP_ARRAY is probably a transient use, * so it is reasonable to retry for a while - 5 seconds. */ count = 25; err = 0; while (count && fd >= 0 && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) { usleep(200000); count --; } if (fd >= 0 && err) { if (quiet == 0) { fprintf(stderr, Name ": failed to stop array %s: %s\n", devname, strerror(errno)); if (errno == EBUSY) fprintf(stderr, "Perhaps a running " "process, mounted filesystem " "or active volume group?\n"); } if (mdi) sysfs_free(mdi); return 1; } /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array * was stopped, so We'll do it here just to be sure. Drop any * partitions as well... */ if (fd >= 0) ioctl(fd, BLKRRPART, 0); if (mdi) sysfs_uevent(mdi, "change"); if (devnum != NoMdDev && (stat("/dev/.udev", &stb) != 0 || check_env("MDADM_NO_UDEV"))) { struct map_ent *mp = map_by_devnum(&map, devnum); remove_devices(devnum, mp ? mp->path : NULL); } if (quiet <= 0) fprintf(stderr, Name ": stopped %s\n", devname); map_lock(&map); map_remove(&map, devnum); map_unlock(&map); } return 0; }