static void freeblock(struct buf *bp) { ASSERT(bp->lmfs_count == 0); /* If the block taken is dirty, make it clean by writing it to the disk. * Avoid hysteresis by flushing all other dirty blocks for the same device. */ if (bp->lmfs_dev != NO_DEV) { if (!lmfs_isclean(bp)) lmfs_flushdev(bp->lmfs_dev); assert(bp->lmfs_bytes > 0); bp->lmfs_dev = NO_DEV; } /* Fill in block's parameters and add it to the hash chain where it goes. */ MARKCLEAN(bp); /* NO_DEV blocks may be marked dirty */ if(bp->lmfs_bytes > 0) { assert(bp->data); munmap_t(bp->data, bp->lmfs_bytes); bp->lmfs_bytes = 0; bp->data = NULL; } else assert(!bp->data); }
/*===========================================================================* * lmfs_rw_scattered * *===========================================================================*/ void lmfs_rw_scattered( dev_t dev, /* major-minor device number */ struct buf **bufq, /* pointer to array of buffers */ int bufqsize, /* number of buffers */ int rw_flag /* READING or WRITING */ ) { /* Read or write scattered data from a device. */ register struct buf *bp; int gap; register int i; register iovec_t *iop; static iovec_t iovec[NR_IOREQS]; off_t pos; int iov_per_block; int start_in_use = bufs_in_use, start_bufqsize = bufqsize; assert(bufqsize >= 0); if(bufqsize == 0) return; /* for READING, check all buffers on the list are obtained and held * (count > 0) */ if (rw_flag == READING) { for(i = 0; i < bufqsize; i++) { assert(bufq[i] != NULL); assert(bufq[i]->lmfs_count > 0); } /* therefore they are all 'in use' and must be at least this many */ assert(start_in_use >= start_bufqsize); } assert(dev != NO_DEV); assert(fs_block_size > 0); iov_per_block = roundup(fs_block_size, PAGE_SIZE) / PAGE_SIZE; assert(iov_per_block < NR_IOREQS); /* (Shell) sort buffers on lmfs_blocknr. */ gap = 1; do gap = 3 * gap + 1; while (gap <= bufqsize); while (gap != 1) { int j; gap /= 3; for (j = gap; j < bufqsize; j++) { for (i = j - gap; i >= 0 && bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; i -= gap) { bp = bufq[i]; bufq[i] = bufq[i + gap]; bufq[i + gap] = bp; } } } /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything * went fine, otherwise the error code for the first failed transfer. */ while (bufqsize > 0) { int nblocks = 0, niovecs = 0; int r; for (iop = iovec; nblocks < bufqsize; nblocks++) { int p; vir_bytes vdata, blockrem; bp = bufq[nblocks]; if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + nblocks) break; if(niovecs >= NR_IOREQS-iov_per_block) break; vdata = (vir_bytes) bp->data; blockrem = fs_block_size; for(p = 0; p < iov_per_block; p++) { vir_bytes chunk = blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; iop->iov_addr = vdata; iop->iov_size = chunk; vdata += PAGE_SIZE; blockrem -= chunk; iop++; niovecs++; } assert(p == iov_per_block); assert(blockrem == 0); } assert(nblocks > 0); assert(niovecs > 0); pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; if (rw_flag == READING) r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); else r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); /* Harvest the results. The driver may have returned an error, or it * may have done less than what we asked for. */ if (r < 0) { printf("fs cache: I/O error %d on device %d/%d, block %u\n", r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); } for (i = 0; i < nblocks; i++) { bp = bufq[i]; if (r < (ssize_t) fs_block_size) { /* Transfer failed. */ if (i == 0) { bp->lmfs_dev = NO_DEV; /* Invalidate block */ } break; } if (rw_flag == READING) { bp->lmfs_dev = dev; /* validate block */ lmfs_put_block(bp, PARTIAL_DATA_BLOCK); } else { MARKCLEAN(bp); } r -= fs_block_size; } bufq += i; bufqsize -= i; if (rw_flag == READING) { /* Don't bother reading more than the device is willing to * give at this time. Don't forget to release those extras. */ while (bufqsize > 0) { lmfs_put_block(*bufq++, PARTIAL_DATA_BLOCK); bufqsize--; } } if (rw_flag == WRITING && i == 0) { /* We're not making progress, this means we might keep * looping. Buffers remain dirty if un-written. Buffers are * lost if invalidate()d or LRU-removed while dirty. This * is better than keeping unwritable blocks around forever.. */ break; } } if(rw_flag == READING) { assert(start_in_use >= start_bufqsize); /* READING callers assume all bufs are released. */ assert(start_in_use - start_bufqsize == bufs_in_use); } }
/*===========================================================================* * rw_scattered * *===========================================================================*/ static void rw_scattered( dev_t dev, /* major-minor device number */ struct buf **bufq, /* pointer to array of buffers */ unsigned int bufqsize, /* number of buffers */ int rw_flag /* READING or WRITING */ ) { /* Read or write scattered data from a device. */ register struct buf *bp; register iovec_t *iop; static iovec_t iovec[NR_IOREQS]; off_t pos; unsigned int i, iov_per_block; #if !defined(NDEBUG) unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize; #endif /* !defined(NDEBUG) */ if(bufqsize == 0) return; #if !defined(NDEBUG) /* for READING, check all buffers on the list are obtained and held * (count > 0) */ if (rw_flag == READING) { assert(bufqsize <= LMFS_MAX_PREFETCH); for(i = 0; i < bufqsize; i++) { assert(bufq[i] != NULL); assert(bufq[i]->lmfs_count > 0); } /* therefore they are all 'in use' and must be at least this many */ assert(start_in_use >= start_bufqsize); } assert(dev != NO_DEV); assert(fs_block_size > 0); assert(howmany(fs_block_size, PAGE_SIZE) <= NR_IOREQS); #endif /* !defined(NDEBUG) */ /* For WRITING, (Shell) sort buffers on lmfs_blocknr. * For READING, the buffers are already sorted. */ if (rw_flag == WRITING) sort_blocks(bufq, bufqsize); /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything * went fine, otherwise the error code for the first failed transfer. */ while (bufqsize > 0) { unsigned int p, nblocks = 0, niovecs = 0; int r; for (iop = iovec; nblocks < bufqsize; nblocks++) { vir_bytes vdata, blockrem; bp = bufq[nblocks]; if (bp->lmfs_blocknr != bufq[0]->lmfs_blocknr + nblocks) break; blockrem = bp->lmfs_bytes; iov_per_block = howmany(blockrem, PAGE_SIZE); if (niovecs > NR_IOREQS - iov_per_block) break; vdata = (vir_bytes) bp->data; for(p = 0; p < iov_per_block; p++) { vir_bytes chunk = blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; iop->iov_addr = vdata; iop->iov_size = chunk; vdata += PAGE_SIZE; blockrem -= chunk; iop++; niovecs++; } assert(p == iov_per_block); assert(blockrem == 0); } assert(nblocks > 0); assert(niovecs > 0 && niovecs <= NR_IOREQS); pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; if (rw_flag == READING) r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); else r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); /* Harvest the results. The driver may have returned an error, or it * may have done less than what we asked for. */ if (r < 0) { printf("fs cache: I/O error %d on device %d/%d, " "block %"PRIu64"\n", r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); } for (i = 0; i < nblocks; i++) { bp = bufq[i]; if (r < (ssize_t)bp->lmfs_bytes) { /* Transfer failed. */ if (i == 0) { bp->lmfs_dev = NO_DEV; /* Invalidate block */ } break; } if (rw_flag == READING) { lmfs_put_block(bp); } else { MARKCLEAN(bp); } r -= bp->lmfs_bytes; } bufq += i; bufqsize -= i; if (rw_flag == READING) { /* Don't bother reading more than the device is willing to * give at this time. Don't forget to release those extras. */ while (bufqsize > 0) { bp = *bufq++; bp->lmfs_dev = NO_DEV; /* invalidate block */ lmfs_put_block(bp); bufqsize--; } } if (rw_flag == WRITING && i == 0) { /* We're not making progress, this means we might keep * looping. Buffers remain dirty if un-written. Buffers are * lost if invalidate()d or LRU-removed while dirty. This * is better than keeping unwritable blocks around forever.. */ break; } } #if !defined(NDEBUG) if(rw_flag == READING) { assert(start_in_use >= start_bufqsize); /* READING callers assume all bufs are released. */ assert(start_in_use - start_bufqsize == bufs_in_use); } #endif /* !defined(NDEBUG) */ }