/*===========================================================================* * read_block * *===========================================================================*/ static void read_block( struct buf *bp /* buffer pointer */ ) { /* Read or write a disk block. This is the only routine in which actual disk * I/O is invoked. If an error occurs, a message is printed here, but the error * is not reported to the caller. If the error occurred while purging a block * from the cache, it is not clear what the caller could do about it anyway. */ int r, op_failed; off_t pos; dev_t dev = bp->lmfs_dev; op_failed = 0; assert(dev != NO_DEV); ASSERT(bp->lmfs_bytes == fs_block_size); ASSERT(fs_block_size > 0); pos = (off_t)bp->lmfs_blocknr * fs_block_size; if(fs_block_size > PAGE_SIZE) { #define MAXPAGES 20 vir_bytes blockrem, vaddr = (vir_bytes) bp->data; int p = 0; static iovec_t iovec[MAXPAGES]; blockrem = fs_block_size; while(blockrem > 0) { vir_bytes chunk = blockrem >= PAGE_SIZE ? PAGE_SIZE : blockrem; iovec[p].iov_addr = vaddr; iovec[p].iov_size = chunk; vaddr += chunk; blockrem -= chunk; p++; } r = bdev_gather(dev, pos, iovec, p, BDEV_NOFLAGS); } else { r = bdev_read(dev, pos, bp->data, fs_block_size, BDEV_NOFLAGS); } if (r < 0) { printf("fs cache: I/O error on device %d/%d, block %u\n", major(dev), minor(dev), bp->lmfs_blocknr); op_failed = 1; } else if (r != (ssize_t) fs_block_size) { r = END_OF_FILE; op_failed = 1; } if (op_failed) { bp->lmfs_dev = NO_DEV; /* invalidate block */ /* Report read errors to interested parties. */ rdwt_err = r; } }
/*===========================================================================* * read_block * *===========================================================================*/ static int read_block(struct buf *bp, size_t block_size) { /* Read a disk block of 'size' bytes. The given size is always the FS block * size, except for the last block of a device. If an I/O error occurs, * invalidate the block and return an error code. */ ssize_t r; off_t pos; dev_t dev = bp->lmfs_dev; assert(dev != NO_DEV); ASSERT(bp->lmfs_bytes == block_size); ASSERT(fs_block_size > 0); pos = (off_t)bp->lmfs_blocknr * fs_block_size; if (block_size > PAGE_SIZE) { #define MAXPAGES 20 vir_bytes blockrem, vaddr = (vir_bytes) bp->data; int p = 0; static iovec_t iovec[MAXPAGES]; blockrem = block_size; while(blockrem > 0) { vir_bytes chunk = blockrem >= PAGE_SIZE ? PAGE_SIZE : blockrem; iovec[p].iov_addr = vaddr; iovec[p].iov_size = chunk; vaddr += chunk; blockrem -= chunk; p++; } r = bdev_gather(dev, pos, iovec, p, BDEV_NOFLAGS); } else { r = bdev_read(dev, pos, bp->data, block_size, BDEV_NOFLAGS); } if (r != (ssize_t)block_size) { /* Aesthetics: do not report EOF errors on superblock reads, because * this is a fairly common occurrence, e.g. during system installation. */ if (bp->lmfs_blocknr != 0 /*first block*/ || r != 0 /*EOF*/) printf("fs cache: I/O error on device %d/%d, block %"PRIu64 " (%zd)\n", major(dev), minor(dev), bp->lmfs_blocknr, r); if (r >= 0) r = EIO; /* TODO: retry retrieving (just) the remaining part */ bp->lmfs_dev = NO_DEV; /* invalidate block */ return r; } return OK; }
/*===========================================================================* * lmfs_rw_scattered * *===========================================================================*/ void lmfs_rw_scattered( dev_t dev, /* major-minor device number */ struct buf **bufq, /* pointer to array of buffers */ int bufqsize, /* number of buffers */ int rw_flag /* READING or WRITING */ ) { /* Read or write scattered data from a device. */ register struct buf *bp; int gap; register int i; register iovec_t *iop; static iovec_t iovec[NR_IOREQS]; off_t pos; int iov_per_block; int start_in_use = bufs_in_use, start_bufqsize = bufqsize; assert(bufqsize >= 0); if(bufqsize == 0) return; /* for READING, check all buffers on the list are obtained and held * (count > 0) */ if (rw_flag == READING) { for(i = 0; i < bufqsize; i++) { assert(bufq[i] != NULL); assert(bufq[i]->lmfs_count > 0); } /* therefore they are all 'in use' and must be at least this many */ assert(start_in_use >= start_bufqsize); } assert(dev != NO_DEV); assert(fs_block_size > 0); iov_per_block = roundup(fs_block_size, PAGE_SIZE) / PAGE_SIZE; assert(iov_per_block < NR_IOREQS); /* (Shell) sort buffers on lmfs_blocknr. */ gap = 1; do gap = 3 * gap + 1; while (gap <= bufqsize); while (gap != 1) { int j; gap /= 3; for (j = gap; j < bufqsize; j++) { for (i = j - gap; i >= 0 && bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; i -= gap) { bp = bufq[i]; bufq[i] = bufq[i + gap]; bufq[i + gap] = bp; } } } /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything * went fine, otherwise the error code for the first failed transfer. */ while (bufqsize > 0) { int nblocks = 0, niovecs = 0; int r; for (iop = iovec; nblocks < bufqsize; nblocks++) { int p; vir_bytes vdata, blockrem; bp = bufq[nblocks]; if (bp->lmfs_blocknr != (block_t) bufq[0]->lmfs_blocknr + nblocks) break; if(niovecs >= NR_IOREQS-iov_per_block) break; vdata = (vir_bytes) bp->data; blockrem = fs_block_size; for(p = 0; p < iov_per_block; p++) { vir_bytes chunk = blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; iop->iov_addr = vdata; iop->iov_size = chunk; vdata += PAGE_SIZE; blockrem -= chunk; iop++; niovecs++; } assert(p == iov_per_block); assert(blockrem == 0); } assert(nblocks > 0); assert(niovecs > 0); pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; if (rw_flag == READING) r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); else r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); /* Harvest the results. The driver may have returned an error, or it * may have done less than what we asked for. */ if (r < 0) { printf("fs cache: I/O error %d on device %d/%d, block %u\n", r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); } for (i = 0; i < nblocks; i++) { bp = bufq[i]; if (r < (ssize_t) fs_block_size) { /* Transfer failed. */ if (i == 0) { bp->lmfs_dev = NO_DEV; /* Invalidate block */ } break; } if (rw_flag == READING) { bp->lmfs_dev = dev; /* validate block */ lmfs_put_block(bp, PARTIAL_DATA_BLOCK); } else { MARKCLEAN(bp); } r -= fs_block_size; } bufq += i; bufqsize -= i; if (rw_flag == READING) { /* Don't bother reading more than the device is willing to * give at this time. Don't forget to release those extras. */ while (bufqsize > 0) { lmfs_put_block(*bufq++, PARTIAL_DATA_BLOCK); bufqsize--; } } if (rw_flag == WRITING && i == 0) { /* We're not making progress, this means we might keep * looping. Buffers remain dirty if un-written. Buffers are * lost if invalidate()d or LRU-removed while dirty. This * is better than keeping unwritable blocks around forever.. */ break; } } if(rw_flag == READING) { assert(start_in_use >= start_bufqsize); /* READING callers assume all bufs are released. */ assert(start_in_use - start_bufqsize == bufs_in_use); } }
/*===========================================================================* * rw_scattered * *===========================================================================*/ static void rw_scattered( dev_t dev, /* major-minor device number */ struct buf **bufq, /* pointer to array of buffers */ unsigned int bufqsize, /* number of buffers */ int rw_flag /* READING or WRITING */ ) { /* Read or write scattered data from a device. */ register struct buf *bp; register iovec_t *iop; static iovec_t iovec[NR_IOREQS]; off_t pos; unsigned int i, iov_per_block; #if !defined(NDEBUG) unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize; #endif /* !defined(NDEBUG) */ if(bufqsize == 0) return; #if !defined(NDEBUG) /* for READING, check all buffers on the list are obtained and held * (count > 0) */ if (rw_flag == READING) { assert(bufqsize <= LMFS_MAX_PREFETCH); for(i = 0; i < bufqsize; i++) { assert(bufq[i] != NULL); assert(bufq[i]->lmfs_count > 0); } /* therefore they are all 'in use' and must be at least this many */ assert(start_in_use >= start_bufqsize); } assert(dev != NO_DEV); assert(fs_block_size > 0); assert(howmany(fs_block_size, PAGE_SIZE) <= NR_IOREQS); #endif /* !defined(NDEBUG) */ /* For WRITING, (Shell) sort buffers on lmfs_blocknr. * For READING, the buffers are already sorted. */ if (rw_flag == WRITING) sort_blocks(bufq, bufqsize); /* Set up I/O vector and do I/O. The result of bdev I/O is OK if everything * went fine, otherwise the error code for the first failed transfer. */ while (bufqsize > 0) { unsigned int p, nblocks = 0, niovecs = 0; int r; for (iop = iovec; nblocks < bufqsize; nblocks++) { vir_bytes vdata, blockrem; bp = bufq[nblocks]; if (bp->lmfs_blocknr != bufq[0]->lmfs_blocknr + nblocks) break; blockrem = bp->lmfs_bytes; iov_per_block = howmany(blockrem, PAGE_SIZE); if (niovecs > NR_IOREQS - iov_per_block) break; vdata = (vir_bytes) bp->data; for(p = 0; p < iov_per_block; p++) { vir_bytes chunk = blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE; iop->iov_addr = vdata; iop->iov_size = chunk; vdata += PAGE_SIZE; blockrem -= chunk; iop++; niovecs++; } assert(p == iov_per_block); assert(blockrem == 0); } assert(nblocks > 0); assert(niovecs > 0 && niovecs <= NR_IOREQS); pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; if (rw_flag == READING) r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS); else r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS); /* Harvest the results. The driver may have returned an error, or it * may have done less than what we asked for. */ if (r < 0) { printf("fs cache: I/O error %d on device %d/%d, " "block %"PRIu64"\n", r, major(dev), minor(dev), bufq[0]->lmfs_blocknr); } for (i = 0; i < nblocks; i++) { bp = bufq[i]; if (r < (ssize_t)bp->lmfs_bytes) { /* Transfer failed. */ if (i == 0) { bp->lmfs_dev = NO_DEV; /* Invalidate block */ } break; } if (rw_flag == READING) { lmfs_put_block(bp); } else { MARKCLEAN(bp); } r -= bp->lmfs_bytes; } bufq += i; bufqsize -= i; if (rw_flag == READING) { /* Don't bother reading more than the device is willing to * give at this time. Don't forget to release those extras. */ while (bufqsize > 0) { bp = *bufq++; bp->lmfs_dev = NO_DEV; /* invalidate block */ lmfs_put_block(bp); bufqsize--; } } if (rw_flag == WRITING && i == 0) { /* We're not making progress, this means we might keep * looping. Buffers remain dirty if un-written. Buffers are * lost if invalidate()d or LRU-removed while dirty. This * is better than keeping unwritable blocks around forever.. */ break; } } #if !defined(NDEBUG) if(rw_flag == READING) { assert(start_in_use >= start_bufqsize); /* READING callers assume all bufs are released. */ assert(start_in_use - start_bufqsize == bufs_in_use); } #endif /* !defined(NDEBUG) */ }
/*===========================================================================* * rw_scattered * *===========================================================================*/ void rw_scattered( dev_t dev, /* major-minor device number */ struct buf **bufq, /* pointer to array of buffers */ int bufqsize, /* number of buffers */ int rw_flag /* READING or WRITING */ ) { /* Read or write scattered data from a device. */ register struct buf *bp; int gap; register int i; register iovec_t *iop; static iovec_t *iovec = NULL; u64_t pos; int j, r; STATICINIT(iovec, NR_IOREQS); assert(bufq != NULL); /* (Shell) sort buffers on b_blocknr. */ gap = 1; do gap = 3 * gap + 1; while (gap <= bufqsize); while (gap != 1) { gap /= 3; for (j = gap; j < bufqsize; j++) { for (i = j - gap; i >= 0 && bufq[i]->b_blocknr > bufq[i + gap]->b_blocknr; i -= gap) { bp = bufq[i]; bufq[i] = bufq[i + gap]; bufq[i + gap] = bp; } } } /* Set up I/O vector and do I/O. The result of dev_io is OK if everything * went fine, otherwise the error code for the first failed transfer. */ while (bufqsize > 0) { for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) { bp = bufq[j]; if (bp->b_blocknr != (block_t) bufq[0]->b_blocknr + j) break; iop->iov_addr = (vir_bytes) bp->b_data; iop->iov_size = (vir_bytes) fs_block_size; } pos = mul64u(bufq[0]->b_blocknr, fs_block_size); if (rw_flag == READING) r = bdev_gather(dev, pos, iovec, j, BDEV_NOFLAGS); else r = bdev_scatter(dev, pos, iovec, j, BDEV_NOFLAGS); /* Harvest the results. The driver may have returned an error, or it * may have done less than what we asked for. */ if (r < 0) { printf("ext2: I/O error %d on device %d/%d, block %u\n", r, major(dev), minor(dev), bufq[0]->b_blocknr); } for (i = 0; i < j; i++) { bp = bufq[i]; if (r < (ssize_t) fs_block_size) { /* Transfer failed. */ if (i == 0) { bp->b_dev = NO_DEV; /* invalidate block */ vm_forgetblocks(); } break; } if (rw_flag == READING) { bp->b_dev = dev; /* validate block */ put_block(bp, PARTIAL_DATA_BLOCK); } else { bp->b_dirt = CLEAN; } r -= fs_block_size; } bufq += i; bufqsize -= i; if (rw_flag == READING) { /* Don't bother reading more than the device is willing to * give at this time. Don't forget to release those extras. */ while (bufqsize > 0) { put_block(*bufq++, PARTIAL_DATA_BLOCK); bufqsize--; } } if (rw_flag == WRITING && i == 0) { /* We're not making progress, this means we might keep * looping. Buffers remain dirty if un-written. Buffers are * lost if invalidate()d or LRU-removed while dirty. This * is better than keeping unwritable blocks around forever.. */ break; } } }