int main(void) { const off_t offset = 0xdefaceddeadbeefLL; char *buf = tail_alloc(LEN); struct iovec *iov = tail_alloc(sizeof(*iov)); iov->iov_base = buf; iov->iov_len = LEN; (void) close(0); if (open("/dev/zero", O_RDONLY)) perror_msg_and_fail("open"); if (preadv(0, iov, 1, offset) != LEN) perror_msg_and_fail("preadv"); printf("preadv(0, "); print_iovec(iov, 1); printf(", 1, %lld) = %u\n", (long long) offset, LEN); if (preadv(0, iov, 1, -1) != -1) perror_msg_and_fail("preadv"); printf("preadv(0, [{iov_base=%p, iov_len=%zu}], 1, -1) = " "-1 EINVAL (%m)\n", iov->iov_base, iov->iov_len); if (preadv(0, NULL, 1, -2) != -1) perror_msg_and_fail("preadv"); printf("preadv(0, NULL, 1, -2) = -1 EINVAL (%m)\n"); if (preadv(0, iov, 0, -3) != -1) perror_msg_and_fail("preadv"); printf("preadv(0, [], 0, -3) = -1 EINVAL (%m)\n"); static const char tmp[] = "preadv-tmpfile"; int fd = open(tmp, O_RDWR | O_CREAT | O_TRUNC, 0600); if (fd < 0) perror_msg_and_fail("open"); if (unlink(tmp)) perror_msg_and_fail("unlink"); static const char w[] = "0123456789abcde"; if (write(fd, w, LENGTH_OF(w)) != LENGTH_OF(w)) perror_msg_and_fail("write"); static const char r0_c[] = "01234567"; static const char r1_c[] = "89abcde"; const unsigned int r_len = (LENGTH_OF(w) + 1) / 2; void *r0 = tail_alloc(r_len); const struct iovec r0_iov_[] = { { .iov_base = r0, .iov_len = r_len } };
int BufferList::read_fd(int fd, uint64_t offset){ ssize_t read; read = preadv( fd, _buffers, _index, offset ); if (read < _total_bytes) { return errno; } }
static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_req *br; int err; br = be->be_req; err = 0; switch (be->be_op) { case BOP_READ: if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset) < 0) err = errno; break; case BOP_WRITE: if (bc->bc_rdonly) err = EROFS; else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset) < 0) err = errno; break; case BOP_FLUSH: break; case BOP_CANCEL: err = EINTR; break; default: err = EINVAL; break; } (*br->br_callback)(br, err); }
static int do_preadv( int fd, off64_t offset, ssize_t count, ssize_t buffer_size) { int vecs = 0; ssize_t oldlen = 0; ssize_t bytes = 0; /* trim the iovec if necessary */ if (count < buffersize) { size_t len = 0; while (len + iov[vecs].iov_len < count) { len += iov[vecs].iov_len; vecs++; } oldlen = iov[vecs].iov_len; iov[vecs].iov_len = count - len; vecs++; } else { vecs = vectors; } bytes = preadv(fd, iov, vectors, offset); /* restore trimmed iov */ if (oldlen) iov[vecs - 1].iov_len = oldlen; return bytes; }
tb_long_t tb_file_preadv(tb_file_ref_t file, tb_iovec_t const* list, tb_size_t size, tb_hize_t offset) { // check tb_assert_and_check_return_val(file && list && size, -1); // check iovec tb_assert_static(sizeof(tb_iovec_t) == sizeof(struct iovec)); tb_assert(tb_memberof_eq(tb_iovec_t, data, struct iovec, iov_base)); tb_assert(tb_memberof_eq(tb_iovec_t, size, struct iovec, iov_len)); // read it #ifdef TB_CONFIG_POSIX_HAVE_PREADV return preadv(tb_file2fd(file), (struct iovec const*)list, size, offset); #else // FIXME: lock it // save offset tb_hong_t current = tb_file_offset(file); tb_assert_and_check_return_val(current >= 0, -1); // seek it if (current != offset && tb_file_seek(file, offset, TB_FILE_SEEK_BEG) != offset) return -1; // read it tb_long_t real = tb_file_readv(file, list, size); // restore offset if (current != offset && tb_file_seek(file, current, TB_FILE_SEEK_BEG) != current) return -1; // ok return real; #endif }
static void test(int use_preadv) { char name[] = "/tmp/rr-readv-XXXXXX"; int fd = mkstemp(name); struct { char ch[7]; }* part1; struct { char ch[10]; }* part2; struct iovec iovs[2]; test_assert(fd >= 0); test_assert(0 == unlink(name)); test_assert(sizeof(data) == write(fd, data, sizeof(data))); ALLOCATE_GUARD(part1, 'x'); ALLOCATE_GUARD(part2, 'y'); iovs[0].iov_base = part1; iovs[0].iov_len = sizeof(*part1); iovs[1].iov_base = part2; iovs[1].iov_len = sizeof(*part2); if (use_preadv) { test_assert(sizeof(data) == preadv(fd, iovs, 2, 0)); } else { test_assert(0 == lseek(fd, 0, SEEK_SET)); test_assert(sizeof(data) == readv(fd, iovs, 2)); } test_assert(0 == memcmp(part1, data, sizeof(*part1))); test_assert( 0 == memcmp(part2, data + sizeof(*part1), sizeof(data) - sizeof(*part1))); test_assert(part2->ch[sizeof(data) - sizeof(*part1)] == 'y'); VERIFY_GUARD(part1); VERIFY_GUARD(part2); }
Sint64 efile_preadv(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) { efile_unix_t *u = (efile_unix_t*)d; Uint64 bytes_read; Sint64 result; #if !defined(HAVE_PREADV) && !defined(HAVE_PREAD) /* This function is documented as leaving the file position undefined, but * the old driver always reset it so there's probably code in the wild that * relies on this behavior. */ off_t original_position = lseek(u->fd, 0, SEEK_CUR); if(original_position < 0 || lseek(u->fd, offset, SEEK_SET) < 0) { u->common.posix_errno = errno; return -1; } #endif bytes_read = 0; do { if(iovlen < 1) { result = 0; break; } #if defined(HAVE_PREADV) result = preadv(u->fd, iov, MIN(IOV_MAX, iovlen), offset); #elif defined(HAVE_PREAD) result = pread(u->fd, iov->iov_base, iov->iov_len, offset); #else result = read(u->fd, iov->iov_base, iov->iov_len); #endif if(result > 0) { shift_iov(&iov, &iovlen, result); bytes_read += result; offset += result; } } while(result > 0 || (result < 0 && errno == EINTR)); u->common.posix_errno = errno; #if !defined(HAVE_PREADV) && !defined(HAVE_PREAD) if(result >= 0) { if(lseek(u->fd, original_position, SEEK_SET) < 0) { u->common.posix_errno = errno; return -1; } } #endif if(result == 0 && bytes_read > 0) { return bytes_read; } return result; }
static ssize_t preadv_wrapper(int d, void *buf, size_t nbytes) { struct iovec iov; iov.iov_base = buf; iov.iov_len = nbytes; return (preadv(d, &iov, 1, 0)); }
static ssize_t uv__fs_read(uv_fs_t* req) { #if defined(__linux__) static int no_preadv; #endif unsigned int iovmax; ssize_t result; iovmax = uv__getiovmax(); if (req->nbufs > iovmax) req->nbufs = iovmax; if (req->off < 0) { if (req->nbufs == 1) result = read(req->file, req->bufs[0].base, req->bufs[0].len); else result = readv(req->file, (struct iovec*) req->bufs, req->nbufs); } else { if (req->nbufs == 1) { result = pread(req->file, req->bufs[0].base, req->bufs[0].len, req->off); goto done; } #if HAVE_PREADV result = preadv(req->file, (struct iovec*) req->bufs, req->nbufs, req->off); #else # if defined(__linux__) if (no_preadv) retry: # endif { result = pread(req->file, req->bufs[0].base, req->bufs[0].len, req->off); } # if defined(__linux__) else { result = uv__preadv(req->file, (struct iovec*)req->bufs, req->nbufs, req->off); if (result == -1 && errno == ENOSYS) { no_preadv = 1; goto retry; } } # endif #endif } done: /* Early cleanup of bufs allocation, since we're done with it. */ if (req->bufs != req->bufsml) uv__free(req->bufs); req->bufs = NULL; req->nbufs = 0; return result; }
/* Same as preadv(2) except that this function never returns EAGAIN or EINTR. */ ssize_t xpreadv(int fd, const struct iovec *iov, int iovcnt, off_t offset) { ssize_t nr; restart: nr = preadv(fd, iov, iovcnt, offset); if ((nr < 0) && ((errno == EAGAIN) || (errno == EINTR))) goto restart; return nr; }
/* Since we define no flags for preadv2 just route to preadv. */ ssize_t preadv2 (int fd, const struct iovec *vector, int count, off_t offset, int flags) { if (flags != 0) { __set_errno (ENOTSUP); return -1; } return preadv (fd, vector, count, offset); }
static ssize_t uv__fs_read(uv_fs_t* req) { #if defined(__linux__) static int no_preadv; #endif ssize_t result; #if defined(_AIX) struct stat buf; if(fstat(req->file, &buf)) return -1; if(S_ISDIR(buf.st_mode)) { errno = EISDIR; return -1; } #endif /* defined(_AIX) */ if (req->off < 0) { if (req->nbufs == 1) result = read(req->file, req->bufs[0].base, req->bufs[0].len); else result = readv(req->file, (struct iovec*) req->bufs, req->nbufs); } else { if (req->nbufs == 1) { result = pread(req->file, req->bufs[0].base, req->bufs[0].len, req->off); goto done; } #if HAVE_PREADV result = preadv(req->file, (struct iovec*) req->bufs, req->nbufs, req->off); #else # if defined(__linux__) if (no_preadv) retry: # endif { result = pread(req->file, req->bufs[0].base, req->bufs[0].len, req->off); } # if defined(__linux__) else { result = uv__preadv(req->file, (struct iovec*)req->bufs, req->nbufs, req->off); if (result == -1 && errno == ENOSYS) { no_preadv = 1; goto retry; } } # endif #endif } done: return result; }
int file_read(int fd, void *buf, size_t sz) { struct iovec iov[1]; iov[0].iov_base = buf; iov[0].iov_len = sz; if (preadv (fd, iov, 1, 0) != sz) { /* Returns -1 on read failure */ return -1; } /* Returns 0 on a successful buffer fill. */ return 0; }
static ssize_t local_preadv(FsContext *ctx, int fd, const struct iovec *iov, int iovcnt, off_t offset) { #ifdef CONFIG_PREADV return preadv(fd, iov, iovcnt, offset); #else int err = lseek(fd, offset, SEEK_SET); if (err == -1) { return err; } else { return readv(fd, iov, iovcnt); } #endif }
int main(void) { tprintf("%s", ""); static char tmp[] = "preadv-pwritev-tmpfile"; if (open(tmp, O_CREAT|O_RDONLY|O_TRUNC, 0600) != 0) perror_msg_and_fail("creat: %s", tmp); if (open(tmp, O_WRONLY) != 1) perror_msg_and_fail("open: %s", tmp); if (unlink(tmp)) perror_msg_and_fail("unlink: %s", tmp); static const char w0_c[] = "012"; const char *w0_d = hexdump_strdup(w0_c); void *w0 = tail_memdup(w0_c, LENGTH_OF(w0_c)); const void *efault = w0 + LENGTH_OF(w0_c); static const char w1_c[] = "34567"; const char *w1_d = hexdump_strdup(w1_c); void *w1 = tail_memdup(w1_c, LENGTH_OF(w1_c)); static const char w2_c[] = "89abcde"; const char *w2_d = hexdump_strdup(w2_c); void *w2 = tail_memdup(w2_c, LENGTH_OF(w2_c)); long rc; rc = pwritev(1, efault, 42, 0); tprintf("pwritev(1, %p, 42, 0) = %ld %s (%m)\n", efault, rc, errno2name()); rc = preadv(0, efault, 42, 0); tprintf("preadv(0, %p, 42, 0) = %ld %s (%m)\n", efault, rc, errno2name()); static const char r0_c[] = "01234567"; const char *r0_d = hexdump_strdup(r0_c); static const char r1_c[] = "89abcde"; const char *r1_d = hexdump_strdup(r1_c); const struct iovec w_iov_[] = { { .iov_base = w0, .iov_len = LENGTH_OF(w0_c) }, { .iov_base = w1,
static void verify_preadv(unsigned int n) { struct tcase *tc = &tcases[n]; TEST(preadv(*tc->fd, tc->name, tc->count, tc->offset)); if (TEST_RETURN == 0) { tst_res(TFAIL, "preadv() succeeded unexpectedly"); return; } if (TEST_ERRNO == tc->exp_err) { tst_res(TPASS | TTERRNO, "preadv() failed as expected"); return; } tst_res(TFAIL | TTERRNO, "preadv() failed unexpectedly, expected %s", tst_strerrno(tc->exp_err)); }
void verify_preadv(unsigned int n) { int i; char *vec; struct tcase *tc = &tcases[n]; vec = rd_iovec[0].iov_base; memset(vec, 0x00, CHUNK); SAFE_LSEEK(fd, 0, SEEK_SET); TEST(preadv(fd, rd_iovec, tc->count, tc->offset)); if (TST_RET < 0) { tst_res(TFAIL | TTERRNO, "Preadv(2) failed"); return; } if (TST_RET != tc->size) { tst_res(TFAIL, "Preadv(2) read %li bytes, expected %zi", TST_RET, tc->size); return; } for (i = 0; i < tc->size; i++) { if (vec[i] != tc->content) break; } if (i < tc->size) { tst_res(TFAIL, "Buffer wrong at %i have %02x expected %02x", i, vec[i], tc->content); return; } if (SAFE_LSEEK(fd, 0, SEEK_CUR) != 0) { tst_res(TFAIL, "Preadv(2) has changed file offset"); return; } tst_res(TPASS, "Preadv(2) read %zi bytes successfully " "with content '%c' expectedly", tc->size, tc->content); }
err_t sys_preadv(fd_t fd, const struct iovec* iov, int iovcnt, off_t seek_to_offset, ssize_t* num_read_out) { ssize_t got; ssize_t got_total; err_t err_out; int i; int niovs = IOV_MAX; STARTING_SLOW_SYSCALL; err_out = 0; got_total = 0; for( i = 0; i < iovcnt; i += niovs ) { niovs = iovcnt - i; if( niovs > IOV_MAX ) niovs = IOV_MAX; // Some systems preadv doesn't take a const struct iovec*, hence the cast got = preadv(fd, (struct iovec*) &iov[i], niovs, seek_to_offset + got_total); #ifdef __CYGWIN__ if( got == -1 && errno == ENODATA ) got = 0; #endif if( got != -1 ) { got_total += got; } else { err_out = errno; break; } if( got != sys_iov_total_bytes(&iov[i], niovs) ) { break; } } if( err_out == 0 && got_total == 0 && sys_iov_total_bytes(iov, iovcnt) != 0 ) err_out = EEOF; *num_read_out = got_total; DONE_SLOW_SYSCALL; return err_out; }
int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller) { struct iovec *iov = alloca(n * sizeof(struct iovec)); struct iovec *iovbase = iov; struct timeval st, et, tt; uint64_t b = block; size_t size = 0; size_t i; int ret; for (i = 0; i < n; i++) { bhs[i] = bget(sdp, b++); if (bhs[i] == NULL) return -1; *iov++ = bhs[i]->iov; size += bhs[i]->iov.iov_len; } gettimeofday(&st, NULL); ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize); gettimeofday(&et, NULL); if (ret != size) { fprintf(stderr, "bad read: %s from %s:%d: block " "%llu (0x%llx)\n", strerror(errno), caller, line, (unsigned long long)block, (unsigned long long)block); exit(-1); } if (brdfp) { unsigned long usecs; timersub(&et, &st, &tt); usecs = tt.tv_usec + 1000000*tt.tv_sec; fprintf(brdfp, "%lu\t%d\t%s\n", usecs, line, caller); } return 0; }
void pfl_odt_read(struct pfl_odt *t, int64_t n, void *p, struct pfl_odt_slotftr *f) { ssize_t expect = 0; struct pfl_odt_hdr *h; struct iovec iov[3]; ssize_t rc, pad; int nio = 0; off_t off; memset(iov, 0, sizeof(iov)); h = t->odt_hdr; pad = h->odth_slotsz - h->odth_itemsz - sizeof(*f); pfl_assert(!pad); pfl_odt_zerobuf_ensurelen(pad); off = h->odth_start + n * h->odth_slotsz; if (p) PACK_IOV(p, h->odth_itemsz); else off += h->odth_itemsz; if (p && f) PACK_IOV(pfl_odt_zerobuf, pad); else off += pad; if (f) PACK_IOV(f, sizeof(*f)); rc = preadv(t->odt_fd, iov, nio, off); pfl_assert(rc == expect); }
static int fio_pvsyncio_queue(struct thread_data *td, struct io_u *io_u) { struct syncio_data *sd = td->io_ops->data; struct iovec *iov = &sd->iovecs[0]; struct fio_file *f = io_u->file; int ret; fio_ro_check(td, io_u); iov->iov_base = io_u->xfer_buf; iov->iov_len = io_u->xfer_buflen; if (io_u->ddir == DDIR_READ) ret = preadv(f->fd, iov, 1, io_u->offset); else if (io_u->ddir == DDIR_WRITE) ret = pwritev(f->fd, iov, 1, io_u->offset); else if (io_u->ddir == DDIR_TRIM) { do_io_u_trim(td, io_u); return FIO_Q_COMPLETED; } else ret = do_io_u_sync(td, io_u); return fio_io_end(td, io_u, ret); }
// FIXME: protect from reading past page static void *extstore_io_thread(void *arg) { store_io_thread *me = (store_io_thread *)arg; store_engine *e = me->e; while (1) { obj_io *io_stack = NULL; pthread_mutex_lock(&me->mutex); if (me->queue == NULL) { pthread_cond_wait(&me->cond, &me->mutex); } // Pull and disconnect a batch from the queue if (me->queue != NULL) { int i; obj_io *end = NULL; io_stack = me->queue; end = io_stack; for (i = 1; i < e->io_depth; i++) { if (end->next) { end = end->next; } else { break; } } me->depth -= i; me->queue = end->next; end->next = NULL; } pthread_mutex_unlock(&me->mutex); obj_io *cur_io = io_stack; while (cur_io) { // We need to note next before the callback in case the obj_io // gets reused. obj_io *next = cur_io->next; int ret = 0; int do_op = 1; store_page *p = &e->pages[cur_io->page_id]; // TODO: loop if not enough bytes were read/written. switch (cur_io->mode) { case OBJ_IO_READ: // Page is currently open. deal if read is past the end. pthread_mutex_lock(&p->mutex); if (!p->free && !p->closed && p->version == cur_io->page_version) { if (p->active && cur_io->offset >= p->written) { ret = _read_from_wbuf(p, cur_io); do_op = 0; } else { p->refcount++; } STAT_L(e); e->stats.bytes_read += cur_io->len; e->stats.objects_read++; STAT_UL(e); } else { do_op = 0; ret = -2; // TODO: enum in IO for status? } pthread_mutex_unlock(&p->mutex); if (do_op) { if (cur_io->iov == NULL) { ret = pread(p->fd, cur_io->buf, cur_io->len, p->offset + cur_io->offset); } else { ret = preadv(p->fd, cur_io->iov, cur_io->iovcnt, p->offset + cur_io->offset); } } break; case OBJ_IO_WRITE: do_op = 0; // FIXME: Should hold refcount during write. doesn't // currently matter since page can't free while active. ret = pwrite(p->fd, cur_io->buf, cur_io->len, p->offset + cur_io->offset); break; } if (ret == 0) { E_DEBUG("read returned nothing\n"); } #ifdef EXTSTORE_DEBUG if (ret == -1) { perror("read/write op failed"); } #endif cur_io->cb(e, cur_io, ret); if (do_op) { pthread_mutex_lock(&p->mutex); p->refcount--; pthread_mutex_unlock(&p->mutex); } cur_io = next; } } return NULL; }
static void pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq) { struct iovec iov[VTBLK_MAXSEGS]; struct virtio_blk_hdr *vbh; struct virtio_desc *vd, *vid; struct virtio_used *vu; uint8_t *status; int i; int err; int iolen; int nsegs; int uidx, aidx, didx; int writeop; off_t offset; uidx = *hq->hq_used_idx; aidx = hq->hq_cur_aidx; didx = hq->hq_avail_ring[aidx % hq->hq_size]; assert(didx >= 0 && didx < hq->hq_size); vd = &hq->hq_dtable[didx]; /* * Verify that the descriptor is indirect, and obtain * the pointer to the indirect descriptor. * There has to be space for at least 3 descriptors * in the indirect descriptor array: the block header, * 1 or more data descriptors, and a status byte. */ assert(vd->vd_flags & VRING_DESC_F_INDIRECT); nsegs = vd->vd_len / sizeof(struct virtio_desc); assert(nsegs >= 3); assert(nsegs < VTBLK_MAXSEGS + 2); vid = paddr_guest2host(vd->vd_addr); assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0); /* * The first descriptor will be the read-only fixed header */ vbh = paddr_guest2host(vid[0].vd_addr); assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr)); assert(vid[0].vd_flags & VRING_DESC_F_NEXT); assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0); writeop = (vbh->vbh_type == VBH_OP_WRITE); offset = vbh->vbh_sector * DEV_BSIZE; /* * Build up the iovec based on the guest's data descriptors */ for (i = 1, iolen = 0; i < nsegs - 1; i++) { iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr); iov[i-1].iov_len = vid[i].vd_len; iolen += vid[i].vd_len; assert(vid[i].vd_flags & VRING_DESC_F_NEXT); assert((vid[i].vd_flags & VRING_DESC_F_INDIRECT) == 0); /* * - write op implies read-only descriptor, * - read op implies write-only descriptor, * therefore test the inverse of the descriptor bit * to the op. */ assert(((vid[i].vd_flags & VRING_DESC_F_WRITE) == 0) == writeop); } /* Lastly, get the address of the status byte */ status = paddr_guest2host(vid[nsegs - 1].vd_addr); assert(vid[nsegs - 1].vd_len == 1); assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0); assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE); DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", writeop ? "write" : "read", iolen, nsegs - 2, offset)); if (writeop){ err = pwritev(sc->vbsc_fd, iov, nsegs - 2, offset); } else { err = preadv(sc->vbsc_fd, iov, nsegs - 2, offset); } *status = err < 0 ? VTBLK_S_IOERR : VTBLK_S_OK; /* * Return the single indirect descriptor back to the host */ vu = &hq->hq_used_ring[uidx % hq->hq_size]; vu->vu_idx = didx; vu->vu_tlen = 1; hq->hq_cur_aidx++; *hq->hq_used_idx += 1; }
static void pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) { struct virtio_blk_hdr *vbh; uint8_t *status; int i, n; int err; int iolen; int writeop, type; off_t offset; struct iovec iov[VTBLK_MAXSEGS + 2]; uint16_t flags[VTBLK_MAXSEGS + 2]; n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); /* * The first descriptor will be the read-only fixed header, * and the last is for status (hence +2 above and below). * The remaining iov's are the actual data I/O vectors. * * XXX - note - this fails on crash dump, which does a * VIRTIO_BLK_T_FLUSH with a zero transfer length */ assert(n >= 2 && n <= VTBLK_MAXSEGS + 2); assert((flags[0] & VRING_DESC_F_WRITE) == 0); assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); vbh = iov[0].iov_base; status = iov[--n].iov_base; assert(iov[n].iov_len == 1); assert(flags[n] & VRING_DESC_F_WRITE); /* * XXX * The guest should not be setting the BARRIER flag because * we don't advertise the capability. */ type = vbh->vbh_type & ~VBH_FLAG_BARRIER; writeop = (type == VBH_OP_WRITE); offset = vbh->vbh_sector * DEV_BSIZE; iolen = 0; for (i = 1; i < n; i++) { /* * - write op implies read-only descriptor, * - read/ident op implies write-only descriptor, * therefore test the inverse of the descriptor bit * to the op. */ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); iolen += iov[i].iov_len; } DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", writeop ? "write" : "read/ident", iolen, i - 1, offset)); switch (type) { case VBH_OP_WRITE: err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); break; case VBH_OP_READ: err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); break; case VBH_OP_IDENT: /* Assume a single buffer */ strlcpy(iov[1].iov_base, sc->vbsc_ident, MIN(iov[1].iov_len, sizeof(sc->vbsc_ident))); err = 0; break; case VBH_OP_FLUSH: case VBH_OP_FLUSH_OUT: err = fsync(sc->vbsc_fd); break; default: err = -ENOSYS; break; } /* convert errno into a virtio block error return */ if (err < 0) { if (err == -ENOSYS) *status = VTBLK_S_UNSUPP; else *status = VTBLK_S_IOERR; } else *status = VTBLK_S_OK; /* * Return the descriptor back to the host. * We wrote 1 byte (our status) to host. */ vq_relchain(vq, 1); }
static const char r0_c[] = "01234567"; static const char r1_c[] = "89abcde"; const unsigned int r_len = (LENGTH_OF(w) + 1) / 2; void *r0 = tail_alloc(r_len); const struct iovec r0_iov_[] = { { .iov_base = r0, .iov_len = r_len } }; const struct iovec *r_iov = tail_memdup(r0_iov_, sizeof(r0_iov_)); long rc; rc = preadv(fd, r_iov, ARRAY_SIZE(r0_iov_), 0); if (rc != (int) r_len) perror_msg_and_fail("preadv: expected %u, returned %ld", r_len, rc); printf("preadv(%d, [{iov_base=\"%s\", iov_len=%u}], %u, 0) = %u\n", fd, r0_c, r_len, ARRAY_SIZE(r0_iov_), r_len); void *r1 = tail_alloc(r_len); void *r2 = tail_alloc(LENGTH_OF(w)); const struct iovec r1_iov_[] = { { .iov_base = r1, .iov_len = r_len }, { .iov_base = r2,
static ssize_t qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) { return preadv(fd, iov, nr_iov, offset); }
static ssize_t uv__fs_read(uv_fs_t* req) { ssize_t result; #if defined(_AIX) struct stat buf; if(fstat(req->file, &buf)) return -1; if(S_ISDIR(buf.st_mode)) { errno = EISDIR; return -1; } #endif /* defined(_AIX) */ if (req->off < 0) { if (req->nbufs == 1) result = read(req->file, req->bufs[0].base, req->bufs[0].len); else result = readv(req->file, (struct iovec*) req->bufs, req->nbufs); } else { if (req->nbufs == 1) { result = pread(req->file, req->bufs[0].base, req->bufs[0].len, req->off); goto done; } #if HAVE_PREADV result = preadv(req->file, (struct iovec*) req->bufs, req->nbufs, req->off); #else # if defined(__linux__) static int no_preadv; if (no_preadv) # endif { off_t nread; size_t index; # if defined(__linux__) retry: # endif nread = 0; index = 0; result = 1; do { if (req->bufs[index].len > 0) { result = pread(req->file, req->bufs[index].base, req->bufs[index].len, req->off + nread); if (result > 0) nread += result; } index++; } while (index < req->nbufs && result > 0); if (nread > 0) result = nread; } # if defined(__linux__) else { result = uv__preadv(req->file, (struct iovec*)req->bufs, req->nbufs, req->off); if (result == -1 && errno == ENOSYS) { no_preadv = 1; goto retry; } } # endif #endif } done: if (req->bufs != req->bufsml) free(req->bufs); return result; }
int main(int argc, char *argv[]) { sd_memfd *m; char *s, *name; uint64_t sz; int r, fd; FILE *f; char buf[3] = {}; struct iovec iov[3] = {}; char bufv[3][3] = {}; log_set_max_level(LOG_DEBUG); r = sd_memfd_new(NULL, &m); if (r == -ENOENT) return EXIT_TEST_SKIP; assert_se(r >= 0); assert_se(sd_memfd_get_name(m, &name) >= 0); log_info("name: %s", name); free(name); r = sd_memfd_map(m, 0, 12, (void**) &s); assert_se(r >= 0); strcpy(s, "----- world"); r = sd_memfd_set_sealed(m, 1); assert_se(r == -ETXTBSY); assert_se(write(sd_memfd_get_fd(m), "he", 2) == 2); assert_se(write(sd_memfd_get_fd(m), "XXX", 3) == 3); assert_se(streq(s, "heXXX world")); /* fix "hello" */ assert_se(lseek(sd_memfd_get_fd(m), 2, SEEK_SET) == 2); assert_se(write(sd_memfd_get_fd(m), "ll", 2) == 2); assert_se(sd_memfd_get_file(m, &f) >= 0); fputc('o', f); fflush(f); /* check content */ assert_se(streq(s, "hello world")); assert_se(munmap(s, 12) == 0); r = sd_memfd_get_sealed(m); assert_se(r == 0); r = sd_memfd_get_size(m, &sz); assert_se(r >= 0); assert_se(sz = page_size()); /* truncate it */ r = sd_memfd_set_size(m, 6); assert_se(r >= 0); /* get back new value */ r = sd_memfd_get_size(m, &sz); assert_se(r >= 0); assert_se(sz == 6); r = sd_memfd_set_sealed(m, 1); assert_se(r >= 0); r = sd_memfd_get_sealed(m); assert_se(r == 1); fd = sd_memfd_dup_fd(m); assert_se(fd >= 0); sd_memfd_free(m); /* new sd_memfd, same underlying memfd */ r = sd_memfd_make(fd, &m); assert_se(r >= 0); /* we did truncate it to 6 */ r = sd_memfd_get_size(m, &sz); assert_se(r >= 0 && sz == 6); /* map it, check content */ r = sd_memfd_map(m, 0, 12, (void **)&s); assert_se(r >= 0); /* we only see the truncated size */ assert_se(streq(s, "hello ")); /* it was already sealed */ r = sd_memfd_set_sealed(m, 1); assert_se(r == -EALREADY); /* we cannot break the seal, it is mapped */ r = sd_memfd_set_sealed(m, 0); assert_se(r == -ETXTBSY); /* unmap it; become the single owner */ assert_se(munmap(s, 12) == 0); /* now we can do flip the sealing */ r = sd_memfd_set_sealed(m, 0); assert_se(r == 0); r = sd_memfd_get_sealed(m); assert_se(r == 0); r = sd_memfd_set_sealed(m, 1); assert_se(r == 0); r = sd_memfd_get_sealed(m); assert_se(r == 1); r = sd_memfd_set_sealed(m, 0); assert_se(r == 0); r = sd_memfd_get_sealed(m); assert_se(r == 0); /* seek at 2, read() 2 bytes */ assert_se(lseek(fd, 2, SEEK_SET) == 2); assert_se(read(fd, buf, 2) == 2); /* check content */ assert_se(memcmp(buf, "ll", 2) == 0); /* writev it out*/ iov[0].iov_base = (char *)"ABC"; iov[0].iov_len = 3; iov[1].iov_base = (char *)"DEF"; iov[1].iov_len = 3; iov[2].iov_base = (char *)"GHI"; iov[2].iov_len = 3; assert_se(pwritev(fd, iov, 3, 0) == 9); /* readv it back */ iov[0].iov_base = bufv[0]; iov[0].iov_len = 3; iov[1].iov_base = bufv[1]; iov[1].iov_len = 3; iov[2].iov_base = bufv[2]; iov[2].iov_len = 3; assert_se(preadv(fd, iov, 3, 0) == 9); /* check content */ assert_se(memcmp(bufv[0], "ABC", 3) == 0); assert_se(memcmp(bufv[1], "DEF", 3) == 0); assert_se(memcmp(bufv[2], "GHI", 3) == 0); sd_memfd_free(m); return 0; }