/* * see if the daemon is running. Return 0 (no error) * if it is, ESRCH otherwise */ int vinum_finddaemon() { int result; if (daemonpid != 0) { /* we think we have a daemon, */ queue_daemon_request(daemonrq_ping, (union daemoninfo) NULL); /* queue a ping */ result = tsleep(&vinum_finddaemon, PUSER, "reap", 2 * hz); if (result == 0) /* yup, the daemon's up and running */ return 0; } /* no daemon, or we couldn't talk to it: start it */ vinum_daemon(); /* start the daemon */ return 0; }
/* * Take a completed buffer, transfer the data back if * it's a read, and complete the high-level request * if this is the last subrequest. * * The bp parameter is in fact a struct rqelement, which * includes a couple of extras at the end. */ void complete_rqe(struct bio *bio) { union daemoninfo di; struct buf *bp = bio->bio_buf; struct rqelement *rqe; struct request *rq; struct rqgroup *rqg; struct bio *ubio; /* user buffer */ struct drive *drive; struct sd *sd; char *gravity; /* for error messages */ get_mplock(); rqe = (struct rqelement *) bp; /* point to the element that completed */ rqg = rqe->rqg; /* and the request group */ rq = rqg->rq; /* and the complete request */ ubio = rq->bio; /* user buffer */ #ifdef VINUMDEBUG if (debug & DEBUG_LASTREQS) logrq(loginfo_iodone, (union rqinfou) rqe, ubio); #endif drive = &DRIVE[rqe->driveno]; drive->active--; /* one less outstanding I/O on this drive */ vinum_conf.active--; /* one less outstanding I/O globally */ if ((drive->active == (DRIVE_MAXACTIVE - 1)) /* we were at the drive limit */ ||(vinum_conf.active == VINUM_MAXACTIVE)) /* or the global limit */ wakeup(&launch_requests); /* let another one at it */ if ((bp->b_flags & B_ERROR) != 0) { /* transfer in error */ gravity = ""; sd = &SD[rqe->sdno]; if (bp->b_error != 0) /* did it return a number? */ rq->error = bp->b_error; /* yes, put it in. */ else if (rq->error == 0) /* no: do we have one already? */ rq->error = EIO; /* no: catchall "I/O error" */ sd->lasterror = rq->error; if (bp->b_cmd == BUF_CMD_READ) { if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) { gravity = " fatal"; set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* subdisk is crashed */ } log(LOG_ERR, "%s:%s read error, offset %lld for %d bytes\n", gravity, sd->name, (long long)bio->bio_offset, bp->b_bcount); } else { /* write operation */ if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) { gravity = "fatal "; set_sd_state(rqe->sdno, sd_stale, setstate_force); /* subdisk is stale */ } log(LOG_ERR, "%s:%s write error, offset %lld for %d bytes\n", gravity, sd->name, (long long)bio->bio_offset, bp->b_bcount); } log(LOG_ERR, "%s: user buffer offset %lld for %d bytes\n", sd->name, (long long)ubio->bio_offset, ubio->bio_buf->b_bcount); if (rq->error == ENXIO) { /* the drive's down too */ log(LOG_ERR, "%s: fatal drive I/O error, offset %lld for %d bytes\n", DRIVE[rqe->driveno].label.name, (long long)bio->bio_offset, bp->b_bcount); DRIVE[rqe->driveno].lasterror = rq->error; set_drive_state(rqe->driveno, /* take the drive down */ drive_down, setstate_force); } } /* Now update the statistics */ if (bp->b_cmd == BUF_CMD_READ) { /* read operation */ DRIVE[rqe->driveno].reads++; DRIVE[rqe->driveno].bytes_read += bp->b_bcount; SD[rqe->sdno].reads++; SD[rqe->sdno].bytes_read += bp->b_bcount; PLEX[rqe->rqg->plexno].reads++; PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount; if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */ VOL[PLEX[rqe->rqg->plexno].volno].reads++; VOL[PLEX[rqe->rqg->plexno].volno].bytes_read += bp->b_bcount; } } else { /* write operation */ DRIVE[rqe->driveno].writes++; DRIVE[rqe->driveno].bytes_written += bp->b_bcount; SD[rqe->sdno].writes++; SD[rqe->sdno].bytes_written += bp->b_bcount; PLEX[rqe->rqg->plexno].writes++; PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount; if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */ VOL[PLEX[rqe->rqg->plexno].volno].writes++; VOL[PLEX[rqe->rqg->plexno].volno].bytes_written += bp->b_bcount; } } if (rqg->flags & XFR_RECOVERY_READ) { /* recovery read, */ int *sdata; /* source */ int *data; /* and group data */ int length; /* and count involved */ int count; /* loop counter */ struct rqelement *urqe = &rqg->rqe[rqg->badsdno]; /* rqe of the bad subdisk */ /* XOR destination is the user data */ sdata = (int *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* old data contents */ data = (int *) &urqe->b.b_data[urqe->groupoffset << DEV_BSHIFT]; /* destination */ length = urqe->grouplen * (DEV_BSIZE / sizeof(int)); /* and number of ints */ for (count = 0; count < length; count++) data[count] ^= sdata[count]; /* * In a normal read, we will normally read directly * into the user buffer. This doesn't work if * we're also doing a recovery, so we have to * copy it */ if (rqe->flags & XFR_NORMAL_READ) { /* normal read as well, */ char *src = &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* read data is here */ char *dst; dst = (char *) ubio->bio_buf->b_data + (rqe->useroffset << DEV_BSHIFT); /* where to put it in user buffer */ length = rqe->datalen << DEV_BSHIFT; /* and count involved */ bcopy(src, dst, length); /* move it */ } } else if ((rqg->flags & (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE)) /* RAID 4/5 group write operation */ &&(rqg->active == 1)) /* and this is the last active request */ complete_raid5_write(rqe); /* * This is the earliest place where we can be * sure that the request has really finished, * since complete_raid5_write can issue new * requests. */ rqg->active--; /* this request now finished */ if (rqg->active == 0) { /* request group finished, */ rq->active--; /* one less */ if (rqg->lock) { /* got a lock? */ unlockrange(rqg->plexno, rqg->lock); /* yes, free it */ rqg->lock = 0; } } if (rq->active == 0) { /* request finished, */ #ifdef VINUMDEBUG if (debug & DEBUG_RESID) { if (ubio->bio_buf->b_resid != 0) /* still something to transfer? */ Debugger("resid"); } #endif if (rq->error) { /* did we have an error? */ if (rq->isplex) { /* plex operation, */ ubio->bio_buf->b_flags |= B_ERROR; /* yes, propagate to user */ ubio->bio_buf->b_error = rq->error; } else { /* try to recover */ di.rq = rq; queue_daemon_request(daemonrq_ioerror, di); /* let the daemon complete */ } } else { ubio->bio_buf->b_resid = 0; /* completed our transfer */ if (rq->isplex == 0) /* volume request, */ VOL[rq->volplex.volno].active--; /* another request finished */ biodone(ubio); /* top level buffer completed */ freerq(rq); /* return the request storage */ } } rel_mplock(); }