/** * qemu_co_queue_run_restart: * * Enter each coroutine that was previously marked for restart by * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is * invoked by the core coroutine code when the current coroutine yields or * terminates. */ void qemu_co_queue_run_restart(Coroutine *co) { Coroutine *next; trace_qemu_co_queue_run_restart(co); while ((next = QSIMPLEQ_FIRST(&co->co_queue_wakeup))) { QSIMPLEQ_REMOVE_HEAD(&co->co_queue_wakeup, co_queue_next); qemu_coroutine_enter(next); } }
void nbd_export_close(NBDExport *exp) { while (!QSIMPLEQ_EMPTY(&exp->requests)) { NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests); QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry); qemu_vfree(first->data); g_free(first); } bdrv_close(exp->bs); g_free(exp); }
static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) { QEDAIOCB *acb; assert(s->allocating_write_reqs_plugged); s->allocating_write_reqs_plugged = false; acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); if (acb) { qed_aio_next_io(acb, 0); } }
bool qemu_co_enter_next(CoQueue *queue) { Coroutine *next; next = QSIMPLEQ_FIRST(&queue->entries); if (!next) { return false; } QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next); qemu_coroutine_enter(next); return true; }
static void qed_need_check_timer_cb(void *opaque) { BDRVQEDState *s = opaque; /* The timer should only fire when allocating writes have drained */ assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs)); trace_qed_need_check_timer_cb(s); qed_plug_allocating_write_reqs(s); /* Ensure writes are on disk before clearing flag */ bdrv_aio_flush(s->bs, qed_clear_need_check, s); }
static void bitmap_list_free(Qcow2BitmapList *bm_list) { Qcow2Bitmap *bm; if (bm_list == NULL) { return; } while ((bm = QSIMPLEQ_FIRST(bm_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(bm_list, entry); bitmap_free(bm); } g_free(bm_list); }
static NBDRequest *nbd_request_get(NBDClient *client) { NBDRequest *req; NBDExport *exp = client->exp; assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); client->nb_requests++; if (QSIMPLEQ_EMPTY(&exp->requests)) { req = g_malloc0(sizeof(NBDRequest)); req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE); } else { req = QSIMPLEQ_FIRST(&exp->requests); QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry); } nbd_client_get(client); req->client = client; return req; }
static bool qemu_co_queue_do_restart(CoQueue *queue, bool single) { Coroutine *self = qemu_coroutine_self(); Coroutine *next; if (QSIMPLEQ_EMPTY(&queue->entries)) { return false; } while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next); QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next); trace_qemu_co_queue_next(next); if (single) { break; } } return true; }
static void entropy_available(void *opaque) { RndRandom *s = RNG_RANDOM(opaque); while (!QSIMPLEQ_EMPTY(&s->parent.requests)) { RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests); ssize_t len; len = read(s->fd, req->data, req->size); if (len < 0 && errno == EAGAIN) { return; } g_assert(len != -1); req->receive_entropy(req->opaque, req->data, len); rng_backend_finalize_request(&s->parent, req); } /* We've drained all requests, the fd handler can be reset. */ qemu_set_fd_handler(s->fd, NULL, NULL, NULL); }
/* Submit async read while handling COW. * Returns: The number of bytes copied after and including offset, * excluding any bytes copied prior to offset due to alignment. * This will be @bytes if no alignment is necessary, or * (new_end - offset) if tail is rounded up or down due to * alignment or buffer limit. */ static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset, uint64_t bytes) { BlockBackend *source = s->common.blk; int nb_chunks; uint64_t ret; MirrorOp *op; uint64_t max_bytes; max_bytes = s->granularity * s->max_iov; /* We can only handle as much as buf_size at a time. */ bytes = MIN(s->buf_size, MIN(max_bytes, bytes)); assert(bytes); assert(bytes < BDRV_REQUEST_MAX_BYTES); ret = bytes; if (s->cow_bitmap) { ret += mirror_cow_align(s, &offset, &bytes); } assert(bytes <= s->buf_size); /* The offset is granularity-aligned because: * 1) Caller passes in aligned values; * 2) mirror_cow_align is used only when target cluster is larger. */ assert(QEMU_IS_ALIGNED(offset, s->granularity)); /* The range is sector-aligned, since bdrv_getlength() rounds up. */ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); nb_chunks = DIV_ROUND_UP(bytes, s->granularity); while (s->buf_free_count < nb_chunks) { trace_mirror_yield_in_flight(s, offset, s->in_flight); mirror_wait_for_io(s); } /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_new(MirrorOp, 1); op->s = s; op->offset = offset; op->bytes = bytes; /* Now make a QEMUIOVector taking enough granularity-sized chunks * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); size_t remaining = bytes - op->qiov.size; QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining)); } /* Copy the dirty cluster. */ s->in_flight++; s->bytes_in_flight += bytes; trace_mirror_one_iteration(s, offset, bytes); blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op); return ret; }
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; int nb_sectors, sectors_per_chunk, nb_chunks; int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; uint64_t delay_ns = 0; MirrorOp *op; int pnum; int64_t ret; s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); s->sector_num = hbitmap_iter_next(&s->hbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(s->sector_num >= 0); } hbitmap_next_sector = s->sector_num; sector_num = s->sector_num; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; end = s->bdev_length / BDRV_SECTOR_SIZE; /* Extend the QEMUIOVector to include all adjacent blocks that will * be copied in this operation. * * We have to do this if we have no backing file yet in the destination, * and the cluster size is very large. Then we need to do COW ourselves. * The first time a cluster is copied, copy it entirely. Note that, * because both the granularity and the cluster size are powers of two, * the number of sectors to copy cannot exceed one cluster. * * We also want to extend the QEMUIOVector to include more adjacent * dirty blocks if possible, to limit the number of I/O operations and * run efficiently even with a small granularity. */ nb_chunks = 0; nb_sectors = 0; next_sector = sector_num; next_chunk = sector_num / sectors_per_chunk; /* Wait for I/O to this cluster (from a previous iteration) to be done. */ while (test_bit(next_chunk, s->in_flight_bitmap)) { trace_mirror_yield_in_flight(s, sector_num, s->in_flight); s->waiting_for_io = true; qemu_coroutine_yield(); s->waiting_for_io = false; } do { int added_sectors, added_chunks; if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) || test_bit(next_chunk, s->in_flight_bitmap)) { assert(nb_sectors > 0); break; } added_sectors = sectors_per_chunk; if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) { bdrv_round_to_clusters(s->target, next_sector, added_sectors, &next_sector, &added_sectors); /* On the first iteration, the rounding may make us copy * sectors before the first dirty one. */ if (next_sector < sector_num) { assert(nb_sectors == 0); sector_num = next_sector; next_chunk = next_sector / sectors_per_chunk; } } added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors)); added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk; /* When doing COW, it may happen that there is not enough space for * a full cluster. Wait if that is the case. */ while (nb_chunks == 0 && s->buf_free_count < added_chunks) { trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); s->waiting_for_io = true; qemu_coroutine_yield(); s->waiting_for_io = false; } if (s->buf_free_count < nb_chunks + added_chunks) { trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); break; } if (IOV_MAX < nb_chunks + added_chunks) { trace_mirror_break_iov_max(s, nb_chunks, added_chunks); break; } /* We have enough free space to copy these sectors. */ bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); nb_sectors += added_sectors; nb_chunks += added_chunks; next_sector += added_sectors; next_chunk += added_chunks; if (!s->synced && s->common.speed) { delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors); } } while (delay_ns == 0 && next_sector < end); /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_new(MirrorOp, 1); op->s = s; op->sector_num = sector_num; op->nb_sectors = nb_sectors; /* Now make a QEMUIOVector taking enough granularity-sized chunks * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); next_sector = sector_num; while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size; QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining)); /* Advance the HBitmapIter in parallel, so that we do not examine * the same sector twice. */ if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) { hbitmap_next_sector = hbitmap_iter_next(&s->hbi); } next_sector += sectors_per_chunk; } bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors); /* Copy the dirty cluster. */ s->in_flight++; s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); ret = bdrv_get_block_status_above(source, NULL, sector_num, nb_sectors, &pnum); if (ret < 0 || pnum < nb_sectors || (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) { bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, mirror_read_complete, op); } else if (ret & BDRV_BLOCK_ZERO) { bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors, s->unmap ? BDRV_REQ_MAY_UNMAP : 0, mirror_write_complete, op); } else { assert(!(ret & BDRV_BLOCK_DATA)); bdrv_aio_discard(s->target, sector_num, op->nb_sectors, mirror_write_complete, op); } return delay_ns; }
static void coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; int nb_sectors, sectors_per_chunk, nb_chunks; int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; MirrorOp *op; s->sector_num = hbitmap_iter_next(&s->hbi); if (s->sector_num < 0) { bdrv_dirty_iter_init(source, &s->hbi); s->sector_num = hbitmap_iter_next(&s->hbi); trace_mirror_restart_iter(s, bdrv_get_dirty_count(source)); assert(s->sector_num >= 0); } hbitmap_next_sector = s->sector_num; sector_num = s->sector_num; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; end = s->common.len >> BDRV_SECTOR_BITS; /* Extend the QEMUIOVector to include all adjacent blocks that will * be copied in this operation. * * We have to do this if we have no backing file yet in the destination, * and the cluster size is very large. Then we need to do COW ourselves. * The first time a cluster is copied, copy it entirely. Note that, * because both the granularity and the cluster size are powers of two, * the number of sectors to copy cannot exceed one cluster. * * We also want to extend the QEMUIOVector to include more adjacent * dirty blocks if possible, to limit the number of I/O operations and * run efficiently even with a small granularity. */ nb_chunks = 0; nb_sectors = 0; next_sector = sector_num; next_chunk = sector_num / sectors_per_chunk; /* Wait for I/O to this cluster (from a previous iteration) to be done. */ while (test_bit(next_chunk, s->in_flight_bitmap)) { trace_mirror_yield_in_flight(s, sector_num, s->in_flight); qemu_coroutine_yield(); } do { int added_sectors, added_chunks; if (!bdrv_get_dirty(source, next_sector) || test_bit(next_chunk, s->in_flight_bitmap)) { assert(nb_sectors > 0); break; } added_sectors = sectors_per_chunk; if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) { bdrv_round_to_clusters(s->target, next_sector, added_sectors, &next_sector, &added_sectors); /* On the first iteration, the rounding may make us copy * sectors before the first dirty one. */ if (next_sector < sector_num) { assert(nb_sectors == 0); sector_num = next_sector; next_chunk = next_sector / sectors_per_chunk; } } added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors)); added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk; /* When doing COW, it may happen that there is not enough space for * a full cluster. Wait if that is the case. */ while (nb_chunks == 0 && s->buf_free_count < added_chunks) { trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); qemu_coroutine_yield(); } if (s->buf_free_count < nb_chunks + added_chunks) { trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); break; } /* We have enough free space to copy these sectors. */ bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); nb_sectors += added_sectors; nb_chunks += added_chunks; next_sector += added_sectors; next_chunk += added_chunks; } while (next_sector < end); /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_slice_new(MirrorOp); op->s = s; op->sector_num = sector_num; op->nb_sectors = nb_sectors; /* Now make a QEMUIOVector taking enough granularity-sized chunks * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); next_sector = sector_num; while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; qemu_iovec_add(&op->qiov, buf, s->granularity); /* Advance the HBitmapIter in parallel, so that we do not examine * the same sector twice. */ if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) { hbitmap_next_sector = hbitmap_iter_next(&s->hbi); } next_sector += sectors_per_chunk; } bdrv_reset_dirty(source, sector_num, nb_sectors); /* Copy the dirty cluster. */ s->in_flight++; trace_mirror_one_iteration(s, sector_num, nb_sectors); bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, mirror_read_complete, op); }
bool qemu_co_queue_empty(CoQueue *queue) { return QSIMPLEQ_FIRST(&queue->entries) == NULL; }