void vinumstrategy(struct buf *bp) { int volno; struct volume *vol = NULL; switch (DEVTYPE(bp->b_dev)) { case VINUM_SD_TYPE: case VINUM_RAWSD_TYPE: sdio(bp); return; /* * In fact, vinum doesn't handle drives: they're * handled directly by the disk drivers */ case VINUM_DRIVE_TYPE: default: bp->b_error = EIO; /* I/O error */ bp->b_flags |= B_ERROR; biodone(bp); return; case VINUM_VOLUME_TYPE: /* volume I/O */ volno = Volno(bp->b_dev); vol = &VOL[volno]; if (vol->state != volume_up) { /* can't access this volume */ bp->b_error = EIO; /* I/O error */ bp->b_flags |= B_ERROR; biodone(bp); return; } if (vinum_bounds_check(bp, vol) <= 0) { /* don't like them bounds */ biodone(bp); /* have nothing to do with this */ return; } /* FALLTHROUGH */ /* * Plex I/O is pretty much the same as volume I/O * for a single plex. Indicate this by passing a NULL * pointer (set above) for the volume */ case VINUM_PLEX_TYPE: case VINUM_RAWPLEX_TYPE: bp->b_resid = bp->b_bcount; /* transfer everything */ vinumstart(bp, 0); return; } }
/* ioctl routine */ int vinumioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct thread *td) { unsigned int objno; int error = 0; struct sd *sd; struct plex *plex; struct volume *vol; unsigned int index; /* for transferring config info */ unsigned int sdno; /* for transferring config info */ int fe; /* free list element number */ struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */ /* First, decide what we're looking at */ switch (DEVTYPE(dev)) { case VINUM_SUPERDEV_TYPE: /* ordinary super device */ ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */ switch (cmd) { #ifdef VINUMDEBUG case VINUM_DEBUG: if (((struct debuginfo *) data)->changeit) /* change debug settings */ debug = (((struct debuginfo *) data)->param); else { if (debug & DEBUG_REMOTEGDB) boothowto |= RB_GDB; /* serial debug line */ else boothowto &= ~RB_GDB; /* local ddb */ Debugger("vinum debug"); } ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ ioctl_reply->error = 0; return 0; #endif case VINUM_CREATE: /* create a vinum object */ error = lock_config(); /* get the config for us alone */ if (error) /* can't do it, */ return error; /* give up */ error = setjmp(command_fail); /* come back here on error */ if (error == 0) /* first time, */ ioctl_reply->error = parse_user_config((char *) data, /* update the config */ &keyword_set); else if (ioctl_reply->error == 0) { /* longjmp, but no error status */ ioctl_reply->error = EINVAL; /* note that something's up */ ioctl_reply->msg[0] = '\0'; /* no message? */ } unlock_config(); return 0; /* must be 0 to return the real error info */ case VINUM_GETCONFIG: /* get the configuration information */ bcopy(&vinum_conf, data, sizeof(vinum_conf)); return 0; /* start configuring the subsystem */ case VINUM_STARTCONFIG: return start_config(*(int *) data); /* just lock it. Parameter is 'force' */ /* * Move the individual parts of the config to user space. * * Specify the index of the object in the first word of data, * and return the object there */ case VINUM_DRIVECONFIG: index = *(int *) data; /* get the index */ if (index >= (unsigned) vinum_conf.drives_allocated) /* can't do it */ return ENXIO; /* bang */ bcopy(&DRIVE[index], data, sizeof(struct _drive)); /* copy the config item out */ return 0; case VINUM_SDCONFIG: index = *(int *) data; /* get the index */ if (index >= (unsigned) vinum_conf.subdisks_allocated) /* can't do it */ return ENXIO; /* bang */ bcopy(&SD[index], data, sizeof(struct _sd)); /* copy the config item out */ return 0; case VINUM_PLEXCONFIG: index = *(int *) data; /* get the index */ if (index >= (unsigned) vinum_conf.plexes_allocated) /* can't do it */ return ENXIO; /* bang */ bcopy(&PLEX[index], data, sizeof(struct _plex)); /* copy the config item out */ return 0; case VINUM_VOLCONFIG: index = *(int *) data; /* get the index */ if (index >= (unsigned) vinum_conf.volumes_allocated) /* can't do it */ return ENXIO; /* bang */ bcopy(&VOL[index], data, sizeof(struct _volume)); /* copy the config item out */ return 0; case VINUM_PLEXSDCONFIG: index = *(int *) data; /* get the plex index */ sdno = ((int *) data)[1]; /* and the sd index */ if ((index >= (unsigned) vinum_conf.plexes_allocated) /* plex doesn't exist */ ||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */ return ENXIO; /* bang */ bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */ data, sizeof(struct _sd)); return 0; /* * We get called in two places: one from the * userland config routines, which call us * to complete the config and save it. This * call supplies the value 0 as a parameter. * * The other place is from the user "saveconfig" * routine, which can only work if we're *not* * configuring. In this case, supply parameter 1. */ case VINUM_SAVECONFIG: if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */ if (*(int *) data == 0) /* finish config */ finish_config(1); /* finish the configuration and update it */ else return EBUSY; /* can't do it now */ } save_config(); /* save configuration to disk */ return 0; case VINUM_RELEASECONFIG: /* release the config */ if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */ finish_config(0); /* finish the configuration, don't change it */ save_config(); /* save configuration to disk */ } else error = EINVAL; /* release what config? */ return error; case VINUM_INIT: ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ ioctl_reply->error = 0; return 0; case VINUM_RESETCONFIG: if (vinum_inactive(0)) { /* if the volumes are not active */ /* * Note the open count. We may be called from v, so we'll be open. * Keep the count so we don't underflow */ free_vinum(1); /* clean up everything */ log(LOG_NOTICE, "vinum: CONFIGURATION OBLITERATED\n"); ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ ioctl_reply->error = 0; return 0; } return EBUSY; case VINUM_SETSTATE: setstate((struct vinum_ioctl_msg *) data); /* set an object state */ return 0; /* * Set state by force, without changing * anything else. */ case VINUM_SETSTATE_FORCE: setstate_by_force((struct vinum_ioctl_msg *) data); /* set an object state */ return 0; #ifdef VINUMDEBUG case VINUM_MEMINFO: vinum_meminfo(data); return 0; case VINUM_MALLOCINFO: return vinum_mallocinfo(data); case VINUM_RQINFO: return vinum_rqinfo(data); #endif case VINUM_LABEL: /* label a volume */ ioctl_reply->error = write_volume_label(*(int *) data); /* index of the volume to label */ ioctl_reply->msg[0] = '\0'; /* no message */ return 0; case VINUM_REMOVE: remove((struct vinum_ioctl_msg *) data); /* remove an object */ return 0; case VINUM_GETFREELIST: /* get a drive free list element */ index = *(int *) data; /* get the drive index */ fe = ((int *) data)[1]; /* and the free list element */ if ((index >= (unsigned) vinum_conf.drives_allocated) /* plex doesn't exist */ ||(DRIVE[index].state == drive_unallocated)) return ENODEV; if (fe >= DRIVE[index].freelist_entries) /* no such entry */ return ENOENT; bcopy(&DRIVE[index].freelist[fe], data, sizeof(struct drive_freelist)); return 0; case VINUM_RESETSTATS: resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */ return 0; /* attach an object to a superordinate object */ case VINUM_ATTACH: attachobject((struct vinum_ioctl_msg *) data); return 0; /* detach an object from a superordinate object */ case VINUM_DETACH: detachobject((struct vinum_ioctl_msg *) data); return 0; /* rename an object */ case VINUM_RENAME: renameobject((struct vinum_rename_msg *) data); return 0; /* replace an object */ case VINUM_REPLACE: replaceobject((struct vinum_ioctl_msg *) data); return 0; case VINUM_DAEMON: vinum_daemon(); /* perform the daemon */ return 0; case VINUM_FINDDAEMON: /* check for presence of daemon */ return vinum_finddaemon(); return 0; case VINUM_SETDAEMON: /* set daemon flags */ return vinum_setdaemonopts(*(int *) data); case VINUM_GETDAEMON: /* get daemon flags */ *(int *) data = daemon_options; return 0; case VINUM_PARITYOP: /* check/rebuild RAID-4/5 parity */ parityops((struct vinum_ioctl_msg *) data); return 0; /* move an object */ case VINUM_MOVE: moveobject((struct vinum_ioctl_msg *) data); return 0; default: /* FALLTHROUGH */ break; } case VINUM_DRIVE_TYPE: default: log(LOG_WARNING, "vinumioctl: invalid ioctl from process %d (%s): %lx\n", curthread->td_proc->p_pid, curthread->td_proc->p_comm, cmd); return EINVAL; case VINUM_SD_TYPE: case VINUM_RAWSD_TYPE: objno = Sdno(dev); sd = &SD[objno]; switch (cmd) { case DIOCGDINFO: /* get disk label */ get_volume_label(sd->name, 1, sd->sectors, (struct disklabel *) data); break; /* * We don't have this stuff on hardware, * so just pretend to do it so that * utilities don't get upset. */ case DIOCWDINFO: /* write partition info */ case DIOCSDINFO: /* set partition info */ return 0; /* not a titty */ default: return ENOTTY; /* not my kind of ioctl */ } return 0; /* pretend we did it */ case VINUM_RAWPLEX_TYPE: case VINUM_PLEX_TYPE: objno = Plexno(dev); plex = &PLEX[objno]; switch (cmd) { case DIOCGDINFO: /* get disk label */ get_volume_label(plex->name, 1, plex->length, (struct disklabel *) data); break; /* * We don't have this stuff on hardware, * so just pretend to do it so that * utilities don't get upset. */ case DIOCWDINFO: /* write partition info */ case DIOCSDINFO: /* set partition info */ return 0; /* not a titty */ default: return ENOTTY; /* not my kind of ioctl */ } return 0; /* pretend we did it */ case VINUM_VOLUME_TYPE: objno = Volno(dev); if ((unsigned) objno >= (unsigned) vinum_conf.volumes_allocated) /* not a valid volume */ return ENXIO; vol = &VOL[objno]; if (vol->state != volume_up) /* not up, */ return EIO; /* I/O error */ switch (cmd) { case DIOCGMEDIASIZE: *(off_t *)data = vol->size << DEV_BSHIFT; break; case DIOCGSECTORSIZE: *(u_int *)data = DEV_BSIZE; break; /* * We don't have this stuff on hardware, * so just pretend to do it so that * utilities don't get upset. */ case DIOCWDINFO: /* write partition info */ case DIOCSDINFO: /* set partition info */ return 0; /* not a titty */ case DIOCWLABEL: /* set or reset label writeable */ if ((flag & FWRITE) == 0) /* not writeable? */ return EACCES; /* no, die */ if (*(int *) data != 0) /* set it? */ vol->flags |= VF_WLABEL; /* yes */ else vol->flags &= ~VF_WLABEL; /* no, reset */ break; default: return ENOTTY; /* not my kind of ioctl */ } break; } return 0; /* XXX */ }
/* ioctl routine */ int vinumioctl(struct dev_ioctl_args *ap) { cdev_t dev = ap->a_head.a_dev; u_long cmd = ap->a_cmd; caddr_t data = ap->a_data; int error; unsigned int index; /* for transferring config info */ unsigned int sdno; /* for transferring config info */ unsigned int objno; struct volume *vol; struct partinfo *dpart; int fe; /* free list element number */ struct _ioctl_reply *ioctl_reply; /* struct to return */ error = 0; /* First, decide what we're looking at */ switch (DEVTYPE(dev)) { case VINUM_SUPERDEV_TYPE: /* ordinary super device */ ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */ switch (cmd) { #ifdef VINUMDEBUG case VINUM_DEBUG: if (((struct debuginfo *) data)->changeit) /* change debug settings */ debug = (((struct debuginfo *) data)->param); else { if (debug & DEBUG_REMOTEGDB) boothowto |= RB_GDB; /* serial debug line */ else boothowto &= ~RB_GDB; /* local ddb */ Debugger("vinum debug"); } ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ ioctl_reply->error = 0; break; #endif case VINUM_CREATE: /* create a vinum object */ error = lock_config(); /* get the config for us alone */ if (error) /* can't do it, */ break; error = setjmp(command_fail); /* come back here on error */ if (error == 0) /* first time, */ ioctl_reply->error = parse_user_config((char *) data, /* update the config */ &keyword_set); else if (ioctl_reply->error == 0) { /* longjmp, but no error status */ error = 0; ioctl_reply->error = EINVAL; /* note that something's up */ ioctl_reply->msg[0] = '\0'; /* no message? */ } unlock_config(); break; case VINUM_GETCONFIG: /* get the configuration information */ bcopy(&vinum_conf, data, sizeof(vinum_conf)); break; /* start configuring the subsystem */ case VINUM_STARTCONFIG: error = start_config(*(int *) data); /* just lock it. Parameter is 'force' */ break; case VINUM_DRIVECONFIG: /* * Move the individual parts of the config to user space. * * Specify the index of the object in the first word of data, * and return the object there */ index = *(int *) data; if (index >= (unsigned)vinum_conf.drives_allocated) { error = ENXIO; } else { bcopy(&DRIVE[index], data, sizeof(struct drive)); } break; case VINUM_SDCONFIG: index = *(int *) data; if (index >= (unsigned) vinum_conf.subdisks_allocated) { error = ENXIO; } else { bcopy(&SD[index], data, sizeof(struct sd)); } break; case VINUM_PLEXCONFIG: index = *(int *) data; if (index >= (unsigned) vinum_conf.plexes_allocated) { error = ENXIO; } else { bcopy(&PLEX[index], data, sizeof(struct plex)); } break; case VINUM_VOLCONFIG: index = *(int *) data; if (index >= (unsigned) vinum_conf.volumes_allocated) { error = ENXIO; } else { bcopy(&VOL[index], data, sizeof(struct volume)); } break; case VINUM_PLEXSDCONFIG: index = ((int *)data)[0]; /* get the plex index */ sdno = ((int *)data)[1]; /* and the sd index */ if ((index >= (unsigned) vinum_conf.plexes_allocated) ||(sdno >= PLEX[index].subdisks)) { error = ENXIO; } else { bcopy(&SD[PLEX[index].sdnos[sdno]], data, sizeof(struct sd)); } break; case VINUM_SAVECONFIG: /* * We get called in two places: one from the * userland config routines, which call us * to complete the config and save it. This * call supplies the value 0 as a parameter. * * The other place is from the user "saveconfig" * routine, which can only work if we're *not* * configuring. In this case, supply parameter 1. */ if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */ if (*(int *) data == 0) /* finish config */ finish_config(1); /* finish the configuration and update it */ else error = EBUSY; } if (error == 0) save_config(); /* save configuration to disk */ break; case VINUM_RELEASECONFIG: /* release the config */ if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */ finish_config(0); /* finish the configuration, don't change it */ save_config(); /* save configuration to disk */ } else { error = EINVAL; /* release what config? */ } break; case VINUM_INIT: ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ ioctl_reply->error = 0; break; case VINUM_RESETCONFIG: if (vinum_inactive(0)) { /* if the volumes are not active */ /* * Note the open count. We may be called from v, so we'll be open. * Keep the count so we don't underflow */ free_vinum(1); /* clean up everything */ log(LOG_NOTICE, "vinum: CONFIGURATION OBLITERATED\n"); ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */ ioctl_reply->error = 0; } else { error = EBUSY; } case VINUM_SETSTATE: setstate((struct vinum_ioctl_msg *) data); /* set an object state */ break; /* * Set state by force, without changing * anything else. */ case VINUM_SETSTATE_FORCE: setstate_by_force((struct vinum_ioctl_msg *) data); /* set an object state */ break; #ifdef VINUMDEBUG case VINUM_MEMINFO: vinum_meminfo(data); break; case VINUM_MALLOCINFO: error = vinum_mallocinfo(data); break; case VINUM_RQINFO: error = vinum_rqinfo(data); break; #endif case VINUM_REMOVE: remove((struct vinum_ioctl_msg *) data); /* remove an object */ break; case VINUM_GETFREELIST: /* get a drive free list element */ index = *(int *) data; /* get the drive index */ fe = ((int *) data)[1]; /* and the free list element */ if ((index >= (unsigned) vinum_conf.drives_allocated) /* plex doesn't exist */ ||(DRIVE[index].state == drive_unallocated)) { error = ENODEV; } else if (fe >= DRIVE[index].freelist_entries) { error = ENOENT; } else { bcopy(&DRIVE[index].freelist[fe], data, sizeof(struct drive_freelist)); } break; case VINUM_RESETSTATS: resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */ break; /* attach an object to a superordinate object */ case VINUM_ATTACH: attachobject((struct vinum_ioctl_msg *) data); break; /* detach an object from a superordinate object */ case VINUM_DETACH: detachobject((struct vinum_ioctl_msg *) data); break; /* rename an object */ case VINUM_RENAME: renameobject((struct vinum_rename_msg *) data); break; /* replace an object */ case VINUM_REPLACE: replaceobject((struct vinum_ioctl_msg *) data); break; case VINUM_DAEMON: vinum_daemon(); /* perform the daemon */ break; case VINUM_FINDDAEMON: /* check for presence of daemon */ error = vinum_finddaemon(); break; case VINUM_SETDAEMON: /* set daemon flags */ error = vinum_setdaemonopts(*(int *) data); break; case VINUM_GETDAEMON: /* get daemon flags */ *(int *) data = daemon_options; break; case VINUM_PARITYOP: /* check/rebuild RAID-4/5 parity */ parityops((struct vinum_ioctl_msg *) data); break; /* move an object */ case VINUM_MOVE: moveobject((struct vinum_ioctl_msg *) data); break; default: error = EINVAL; break; } break; case VINUM_LABEL: case VINUM_DRIVE_TYPE: case VINUM_SD_TYPE: case VINUM_RAWSD_TYPE: case VINUM_RAWPLEX_TYPE: case VINUM_PLEX_TYPE: error = EINVAL; break; case VINUM_VOLUME_TYPE: objno = Volno(dev); if ((unsigned)objno >= (unsigned)vinum_conf.volumes_allocated) { error = ENXIO; break; } vol = &VOL[objno]; if (vol->state != volume_up) { error = EIO; break; } switch(cmd) { case DIOCGPART: dpart = (void *)data; bzero(dpart, sizeof(*dpart)); dpart->media_offset = 0; dpart->media_size = (u_int64_t)vol->size * DEV_BSIZE; dpart->media_blocks = vol->size; dpart->media_blksize = DEV_BSIZE; dpart->fstype = FS_BSDFFS; break; default: error = EINVAL; } break; default: error = EINVAL; break; } if (error) { log(LOG_WARNING, "vinumioctl: invalid ioctl from process %d (%s): %lx\n", curproc->p_pid, curproc->p_comm, cmd); } return error; }
/* * Start a transfer. Return -1 on error, * 0 if OK, 1 if we need to retry. * Parameter reviveok is set when doing * transfers for revives: it allows transfers to * be started immediately when a revive is in * progress. During revive, normal transfers * are queued if they share address space with * a currently active revive operation. */ int vinumstart(struct buf *bp, int reviveok) { int plexno; int maxplex; /* maximum number of plexes to handle */ struct volume *vol; struct request *rq; /* build up our request here */ enum requeststatus status; #if VINUMDEBUG if (debug & DEBUG_LASTREQS) logrq(loginfo_user_bp, (union rqinfou) bp, bp); #endif if ((bp->b_bcount % DEV_BSIZE) != 0) { /* bad length */ bp->b_error = EINVAL; /* invalid size */ bp->b_flags |= B_ERROR; biodone(bp); return -1; } rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */ if (rq == NULL) { /* can't do it */ bp->b_error = ENOMEM; /* can't get memory */ bp->b_flags |= B_ERROR; biodone(bp); return -1; } bzero(rq, sizeof(struct request)); /* * Note the volume ID. This can be NULL, which * the request building functions use as an * indication for single plex I/O */ rq->bp = bp; /* and the user buffer struct */ if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) { /* it's a volume, */ rq->volplex.volno = Volno(bp->b_dev); /* get the volume number */ vol = &VOL[rq->volplex.volno]; /* and point to it */ vol->active++; /* one more active request */ maxplex = vol->plexes; /* consider all its plexes */ } else { vol = NULL; /* no volume */ rq->volplex.plexno = Plexno(bp->b_dev); /* point to the plex */ rq->isplex = 1; /* note that it's a plex */ maxplex = 1; /* just the one plex */ } if (bp->b_flags & B_READ) { /* * This is a read request. Decide * which plex to read from. * * There's a potential race condition here, * since we're not locked, and we could end * up multiply incrementing the round-robin * counter. This doesn't have any serious * effects, however. */ if (vol != NULL) { vol->reads++; plexno = vol->preferred_plex; /* get the plex to use */ if (plexno < 0) { /* round robin */ plexno = vol->last_plex_read; vol->last_plex_read++; if (vol->last_plex_read >= vol->plexes) /* got the the end? */ vol->last_plex_read = 0; /* wrap around */ } status = build_read_request(rq, plexno); /* build a request */ } else { daddr_t diskaddr = bp->b_blkno; /* start offset of transfer */ status = bre(rq, /* build a request list */ rq->volplex.plexno, &diskaddr, diskaddr + (bp->b_bcount / DEV_BSIZE)); } if ((status > REQUEST_RECOVERED) /* can't satisfy it */ ||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */ if (status == REQUEST_DOWN) { /* not enough subdisks */ bp->b_error = EIO; /* I/O error */ bp->b_flags |= B_ERROR; } biodone(bp); freerq(rq); return -1; } return launch_requests(rq, reviveok); /* now start the requests if we can */ } else /* * This is a write operation. We write to all plexes. If this is * a RAID-4 or RAID-5 plex, we must also update the parity stripe. */ { if (vol != NULL) { vol->writes++; status = build_write_request(rq); /* Not all the subdisks are up */ } else { /* plex I/O */ daddr_t diskstart; diskstart = bp->b_blkno; /* start offset of transfer */ status = bre(rq, Plexno(bp->b_dev), &diskstart, bp->b_blkno + (bp->b_bcount / DEV_BSIZE)); /* build requests for the plex */ } if ((status > REQUEST_RECOVERED) /* can't satisfy it */ ||(bp->b_flags & B_DONE)) { /* XXX shouldn't get this without bad status */ if (status == REQUEST_DOWN) { /* not enough subdisks */ bp->b_error = EIO; /* I/O error */ bp->b_flags |= B_ERROR; } if ((bp->b_flags & B_DONE) == 0) biodone(bp); freerq(rq); return -1; } return launch_requests(rq, reviveok); /* now start the requests if we can */ } }