/* * Perform block I/O, on "dev", starting from offset "pos", for a total of * "bytes" bytes. Reading, writing, and peeking are highly similar, and thus, * this function implements all of them. The "call" parameter indicates the * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK). For read and write calls, * "data" will identify the user buffer to use; for peek calls, "data" is set * to NULL. In all cases, this function returns the number of bytes * successfully transferred, 0 on end-of-file conditions, and a negative error * code if no bytes could be transferred due to an error. Dirty data is not * flushed immediately, and thus, a successful write only indicates that the * data have been taken in by the cache (for immediate I/O, a character device * would have to be used, but MINIX3 no longer supports this), which may be * follwed later by silent failures, including undetected end-of-file cases. * In particular, write requests may or may not return 0 (EOF) immediately when * writing at or beyond the block device's size. i Since block I/O takes place * at block granularity, block-unaligned writes have to read a block from disk * before updating it, and that is the only possible source of actual I/O * errors for write calls. * TODO: reconsider the buffering-only approach, or see if we can at least * somehow throw accurate EOF errors without reading in each block first. */ ssize_t lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos, int call) { block_t block, blocks_left; size_t block_size, off, block_off, chunk; struct buf *bp; int r, write, how; if (dev == NO_DEV) return EINVAL; block_size = lmfs_fs_block_size(); write = (call == FSC_WRITE); assert(block_size > 0); /* FIXME: block_t is 32-bit, so we have to impose a limit here. */ if (pos < 0 || pos / block_size > UINT32_MAX || bytes > SSIZE_MAX) return EINVAL; off = 0; block = pos / block_size; block_off = (size_t)(pos % block_size); blocks_left = howmany(block_off + bytes, block_size); lmfs_reset_rdwt_err(); r = OK; for (off = 0; off < bytes; off += chunk) { chunk = block_size - block_off; if (chunk > bytes - off) chunk = bytes - off; /* * For read requests, help the block driver form larger I/O * requests. */ if (!write) block_prefetch(dev, block, blocks_left); /* * Do not read the block from disk if we will end up * overwriting all of its contents. */ how = (write && chunk == block_size) ? NO_READ : NORMAL; bp = lmfs_get_block(dev, block, how); assert(bp); r = lmfs_rdwt_err(); if (r == OK && data != NULL) { assert(lmfs_dev(bp) != NO_DEV); if (write) { r = fsdriver_copyin(data, off, (char *)bp->data + block_off, chunk); /* * Mark the block as dirty even if the copy * failed, since the copy may in fact have * succeeded partially. This is an interface * issue that should be resolved at some point, * but for now we do not want the cache to be * desynchronized from the disk contents. */ lmfs_markdirty(bp); } else r = fsdriver_copyout(data, off, (char *)bp->data + block_off, chunk); } lmfs_put_block(bp, FULL_DATA_BLOCK); if (r != OK) break; block++; block_off = 0; blocks_left--; } /* * If we were not able to do any I/O, return the error (or EOF, even * for writes). Otherwise, return how many bytes we did manage to * transfer. */ if (r != OK && off == 0) return (r == END_OF_FILE) ? 0 : r; return off; }
ssize_t fs_read(ino_t ino_nr, struct fsdriver_data *data, size_t bytes, off_t pos, int __unused call) { size_t off, chunk, block_size, cum_io; off_t f_size; struct inode *i_node; struct buf *bp; int r; /* Try to get inode according to its index. */ if ((i_node = find_inode(ino_nr)) == NULL) return EINVAL; /* No inode found. */ f_size = i_node->i_stat.st_size; if (pos >= f_size) return 0; /* EOF */ /* Limit the request to the remainder of the file size. */ if ((off_t)bytes > f_size - pos) bytes = (size_t)(f_size - pos); block_size = v_pri.logical_block_size_l; cum_io = 0; lmfs_reset_rdwt_err(); r = OK; /* Split the transfer into chunks that don't span two blocks. */ while (bytes > 0) { off = pos % block_size; chunk = block_size - off; if (chunk > bytes) chunk = bytes; /* Read 'chunk' bytes. */ bp = read_extent_block(i_node->extent, pos / block_size); if (bp == NULL) panic("bp not valid in rw_chunk; this can't happen"); r = fsdriver_copyout(data, cum_io, b_data(bp)+off, chunk); lmfs_put_block(bp, FULL_DATA_BLOCK); if (r != OK) break; /* EOF reached. */ if (lmfs_rdwt_err() < 0) break; /* Update counters and pointers. */ bytes -= chunk; /* Bytes yet to be read. */ cum_io += chunk; /* Bytes read so far. */ pos += chunk; /* Position within the file. */ } if (lmfs_rdwt_err() != OK) r = lmfs_rdwt_err(); /* Check for disk error. */ if (lmfs_rdwt_err() == END_OF_FILE) r = OK; return (r == OK) ? cum_io : r; }
/*===========================================================================* * fs_readwrite * *===========================================================================*/ int fs_readwrite(void) { int r, rw_flag, block_spec; int regular; cp_grant_id_t gid; off_t position, f_size, bytes_left; unsigned int off, cum_io, block_size, chunk; mode_t mode_word; int completed; struct inode *rip; size_t nrbytes; r = OK; /* Find the inode referred */ if ((rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) return(EINVAL); mode_word = rip->i_mode & I_TYPE; regular = (mode_word == I_REGULAR || mode_word == I_NAMED_PIPE); block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0); /* Determine blocksize */ if (block_spec) { block_size = get_block_size( (dev_t) rip->i_zone[0]); f_size = MAX_FILE_POS; } else { block_size = rip->i_sp->s_block_size; f_size = rip->i_size; } /* Get the values from the request message */ rw_flag = (fs_m_in.m_type == REQ_READ ? READING : WRITING); gid = (cp_grant_id_t) fs_m_in.REQ_GRANT; position = (off_t) fs_m_in.REQ_SEEK_POS_LO; nrbytes = (size_t) fs_m_in.REQ_NBYTES; lmfs_reset_rdwt_err(); /* If this is file i/o, check we can write */ if (rw_flag == WRITING && !block_spec) { if(rip->i_sp->s_rd_only) return EROFS; /* Check in advance to see if file will grow too big. */ if (position > (off_t) (rip->i_sp->s_max_size - nrbytes)) return(EFBIG); /* Clear the zone containing present EOF if hole about * to be created. This is necessary because all unwritten * blocks prior to the EOF must read as zeros. */ if(position > f_size) clear_zone(rip, f_size, 0); } /* If this is block i/o, check we can write */ if(block_spec && rw_flag == WRITING && (dev_t) rip->i_zone[0] == superblock.s_dev && superblock.s_rd_only) return EROFS; cum_io = 0; /* Split the transfer into chunks that don't span two blocks. */ while (nrbytes > 0) { off = ((unsigned int) position) % block_size; /* offset in blk*/ chunk = min(nrbytes, block_size - off); if (rw_flag == READING) { bytes_left = f_size - position; if (position >= f_size) break; /* we are beyond EOF */ if (chunk > (unsigned int) bytes_left) chunk = bytes_left; } /* Read or write 'chunk' bytes. */ r = rw_chunk(rip, cvul64((unsigned long) position), off, chunk, nrbytes, rw_flag, gid, cum_io, block_size, &completed); if (r != OK) break; /* EOF reached */ if (lmfs_rdwt_err() < 0) break; /* Update counters and pointers. */ nrbytes -= chunk; /* bytes yet to be read */ cum_io += chunk; /* bytes read so far */ position += (off_t) chunk; /* position within the file */ } fs_m_out.RES_SEEK_POS_LO = position; /* It might change later and the VFS has to know this value */ /* On write, update file size and access time. */ if (rw_flag == WRITING) { if (regular || mode_word == I_DIRECTORY) { if (position > f_size) rip->i_size = position; } } rip->i_seek = NO_SEEK; if (lmfs_rdwt_err() != OK) r = lmfs_rdwt_err(); /* check for disk error */ if (lmfs_rdwt_err() == END_OF_FILE) r = OK; /* even on a ROFS, writing to a device node on it is fine, * just don't update the inode stats for it. And dito for reading. */ if (r == OK && !rip->i_sp->s_rd_only) { if (rw_flag == READING) rip->i_update |= ATIME; if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME; IN_MARKDIRTY(rip); /* inode is thus now dirty */ } fs_m_out.RES_NBYTES = cum_io; return(r); }
/*===========================================================================* * fs_readwrite * *===========================================================================*/ ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes, off_t position, int call) { int r; int regular; off_t f_size, bytes_left; size_t off, cum_io, block_size, chunk; mode_t mode_word; int completed; struct inode *rip; r = OK; /* Find the inode referred */ if ((rip = find_inode(fs_dev, ino_nr)) == NULL) return(EINVAL); mode_word = rip->i_mode & I_TYPE; regular = (mode_word == I_REGULAR); /* Determine blocksize */ block_size = rip->i_sp->s_block_size; f_size = rip->i_size; lmfs_reset_rdwt_err(); /* If this is file i/o, check we can write */ if (call == FSC_WRITE) { if(rip->i_sp->s_rd_only) return EROFS; /* Check in advance to see if file will grow too big. */ if (position > (off_t) (rip->i_sp->s_max_size - nrbytes)) return(EFBIG); /* Clear the zone containing present EOF if hole about * to be created. This is necessary because all unwritten * blocks prior to the EOF must read as zeros. */ if(position > f_size) clear_zone(rip, f_size, 0); } cum_io = 0; /* Split the transfer into chunks that don't span two blocks. */ while (nrbytes > 0) { off = ((unsigned int) position) % block_size; /* offset in blk*/ chunk = block_size - off; if (chunk > nrbytes) chunk = nrbytes; if (call == FSC_READ) { bytes_left = f_size - position; if (position >= f_size) break; /* we are beyond EOF */ if (chunk > (unsigned int) bytes_left) chunk = bytes_left; } /* Read or write 'chunk' bytes. */ r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk, nrbytes, call, data, cum_io, block_size, &completed); if (r != OK) break; /* EOF reached */ if (lmfs_rdwt_err() < 0) break; /* Update counters and pointers. */ nrbytes -= chunk; /* bytes yet to be read */ cum_io += chunk; /* bytes read so far */ position += (off_t) chunk; /* position within the file */ } /* On write, update file size and access time. */ if (call == FSC_WRITE) { if (regular || mode_word == I_DIRECTORY) { if (position > f_size) rip->i_size = position; } } rip->i_seek = NO_SEEK; if (lmfs_rdwt_err() != OK) r = lmfs_rdwt_err(); /* check for disk error */ if (lmfs_rdwt_err() == END_OF_FILE) r = OK; if (r != OK) return r; /* even on a ROFS, writing to a device node on it is fine, * just don't update the inode stats for it. And dito for reading. */ if (!rip->i_sp->s_rd_only) { if (call == FSC_READ) rip->i_update |= ATIME; if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME; IN_MARKDIRTY(rip); /* inode is thus now dirty */ } return cum_io; }
/*===========================================================================* * fs_breadwrite * *===========================================================================*/ int fs_breadwrite(void) { int r, rw_flag, completed; cp_grant_id_t gid; u64_t position; unsigned int off, cum_io, chunk, block_size; size_t nrbytes; dev_t target_dev; /* Pseudo inode for rw_chunk */ struct inode rip; r = OK; target_dev = (dev_t) fs_m_in.REQ_DEV2; /* Get the values from the request message */ rw_flag = (fs_m_in.m_type == REQ_BREAD ? READING : WRITING); gid = (cp_grant_id_t) fs_m_in.REQ_GRANT; position = make64((unsigned long) fs_m_in.REQ_SEEK_POS_LO, (unsigned long) fs_m_in.REQ_SEEK_POS_HI); nrbytes = (size_t) fs_m_in.REQ_NBYTES; block_size = get_block_size(target_dev); /* Don't block-write to a RO-mounted filesystem. */ if(superblock.s_dev == target_dev && superblock.s_rd_only) return EROFS; rip.i_zone[0] = (zone_t) target_dev; rip.i_mode = I_BLOCK_SPECIAL; rip.i_size = 0; lmfs_reset_rdwt_err(); cum_io = 0; /* Split the transfer into chunks that don't span two blocks. */ while (nrbytes > 0) { off = rem64u(position, block_size); /* offset in blk*/ chunk = min(nrbytes, block_size - off); /* Read or write 'chunk' bytes. */ r = rw_chunk(&rip, position, off, chunk, nrbytes, rw_flag, gid, cum_io, block_size, &completed); if (r != OK) break; /* EOF reached */ if (lmfs_rdwt_err() < 0) break; /* Update counters and pointers. */ nrbytes -= chunk; /* bytes yet to be read */ cum_io += chunk; /* bytes read so far */ position = add64ul(position, chunk); /* position within the file */ } fs_m_out.RES_SEEK_POS_LO = ex64lo(position); fs_m_out.RES_SEEK_POS_HI = ex64hi(position); if (lmfs_rdwt_err() != OK) r = lmfs_rdwt_err(); /* check for disk error */ if (lmfs_rdwt_err() == END_OF_FILE) r = OK; fs_m_out.RES_NBYTES = cum_io; return(r); }