void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) { size_t l; if (f->last_error) { return; } while (size > 0) { l = IO_BUF_SIZE - f->buf_index; if (l > size) { l = size; } memcpy(f->buf + f->buf_index, buf, l); f->bytes_xfer += l; add_to_iovec(f, f->buf + f->buf_index, l); f->buf_index += l; if (f->buf_index == IO_BUF_SIZE) { qemu_fflush(f); } if (qemu_file_get_error(f)) { break; } buf += l; size -= l; } }
static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { MigrationState *s = opaque; ssize_t error; DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos); error = qemu_file_get_error(s->file); if (error) { DPRINTF("flush when error, bailing: %s\n", strerror(-error)); return error; } if (size <= 0) { return size; } if (size > (s->buffer_capacity - s->buffer_size)) { DPRINTF("increasing buffer capacity from %zu by %zu\n", s->buffer_capacity, size + 1024); s->buffer_capacity += size + 1024; s->buffer = g_realloc(s->buffer, s->buffer_capacity); } memcpy(s->buffer + s->buffer_size, buf, size); s->buffer_size += size; return size; }
static int buffered_close(void *opaque) { QEMUFileBuffered *s = opaque; ssize_t ret = 0; int ret2; DPRINTF("closing\n"); s->xfer_limit = INT_MAX; while (!qemu_file_get_error(s->file) && s->buffer_size) { ret = buffered_flush(s); if (ret < 0) { break; } if (s->freeze_output) { ret = migrate_fd_wait_for_unfreeze(s->migration_state); if (ret < 0) { break; } } } ret2 = migrate_fd_close(s->migration_state); if (ret >= 0) { ret = ret2; } qemu_del_timer(s->timer); qemu_free_timer(s->timer); g_free(s->buffer); g_free(s); return ret; }
static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { QEMUFileBuffered *s = opaque; int offset = 0, error; ssize_t ret; DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos); error = qemu_file_get_error(s->file); if (error) { DPRINTF("flush when error, bailing: %s\n", strerror(-error)); return error; } DPRINTF("unfreezing output\n"); s->freeze_output = 0; buffered_flush(s); while (!s->freeze_output && offset < size) { if (s->bytes_xfer > s->xfer_limit) { DPRINTF("transfer limit exceeded when putting\n"); break; } ret = s->put_buffer(s->opaque, buf + offset, size - offset); if (ret == -EAGAIN) { DPRINTF("backend not ready, freezing\n"); s->freeze_output = 1; break; } if (ret <= 0) { DPRINTF("error putting\n"); qemu_file_set_error(s->file, ret); offset = -EINVAL; break; } DPRINTF("put %zd byte(s)\n", ret); offset += ret; s->bytes_xfer += ret; } if (offset >= 0) { DPRINTF("buffering %d bytes\n", size - offset); buffered_append(s, buf + offset, size - offset); offset = size; } if (pos == 0 && size == 0) { DPRINTF("file is ready\n"); if (s->bytes_xfer <= s->xfer_limit) { DPRINTF("notifying client\n"); s->put_ready(s->opaque); } } return offset; }
int qemu_file_rate_limit(QEMUFile *f) { if (qemu_file_get_error(f)) { return 1; } if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) { return 1; } return 0; }
static void save_vmstate(const VMStateDescription *desc, void *obj) { QEMUFile *f = open_test_file(true); /* Save file with vmstate */ vmstate_save_state(f, desc, obj); qemu_put_byte(f, QEMU_VM_EOF); g_assert(!qemu_file_get_error(f)); qemu_fclose(f); }
static void migrate_fd_put_notify(void *opaque) { MigrationState *s = opaque; qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); qemu_file_put_notify(s->file); if (s->file && qemu_file_get_error(s->file)) { migrate_fd_error(s); } }
static int load_vmstate_one(const VMStateDescription *desc, void *obj, int version, uint8_t *wire, size_t size) { QEMUFile *f; int ret; f = open_test_file(true); qemu_put_buffer(f, wire, size); qemu_fclose(f); f = open_test_file(false); ret = vmstate_load_state(f, desc, obj, version); if (ret) { g_assert(qemu_file_get_error(f)); } else{ g_assert(!qemu_file_get_error(f)); } qemu_fclose(f); return ret; }
static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate) { MigrationState *s = opaque; if (qemu_file_get_error(s->file)) { goto out; } if (new_rate > SIZE_MAX) { new_rate = SIZE_MAX; } s->xfer_limit = new_rate / XFER_LIMIT_RATIO; out: return s->xfer_limit; }
static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate) { QEMUFileBuffered *s = opaque; if (qemu_file_get_error(s->file)) { goto out; } if (new_rate > SIZE_MAX) { new_rate = SIZE_MAX; } s->xfer_limit = new_rate / 10; out: return s->xfer_limit; }
/* * The meaning of the return values is: * 0: We can continue sending * 1: Time to stop * negative: There has been an error */ static int buffered_rate_limit(void *opaque) { MigrationState *s = opaque; int ret; ret = qemu_file_get_error(s->file); if (ret) { return ret; } if (s->bytes_xfer >= s->xfer_limit) { return 1; } return 0; }
static int buffered_rate_limit(void *opaque) { QEMUFileBuffered *s = opaque; int ret; ret = qemu_file_get_error(s->file); if (ret) { return ret; } if (s->freeze_output) return 1; if (s->bytes_xfer > s->xfer_limit) return 1; return 0; }
static void buffered_rate_tick(void *opaque) { QEMUFileBuffered *s = opaque; if (qemu_file_get_error(s->file)) { buffered_close(s); return; } qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100); if (s->freeze_output) return; s->bytes_xfer = 0; buffered_put_buffer(s, NULL, 0, 0); }
static COLOMessage colo_receive_message(QEMUFile *f, Error **errp) { COLOMessage msg; int ret; msg = qemu_get_be32(f); ret = qemu_file_get_error(f); if (ret < 0) { error_setg_errno(errp, -ret, "Can't receive COLO message"); return msg; } if (msg >= COLO_MESSAGE__MAX) { error_setg(errp, "%s: Invalid message", __func__); return msg; } trace_colo_receive_message(COLOMessage_lookup[msg]); return msg; }
static void colo_send_message(QEMUFile *f, COLOMessage msg, Error **errp) { int ret; if (msg >= COLO_MESSAGE__MAX) { error_setg(errp, "%s: Invalid message", __func__); return; } qemu_put_be32(f, msg); qemu_fflush(f); ret = qemu_file_get_error(f); if (ret < 0) { error_setg_errno(errp, -ret, "Can't send COLO message"); } trace_colo_send_message(COLOMessage_lookup[msg]); }
/* * Give a QEMUFile* off the same socket but data in the opposite * direction. */ static QEMUFile *socket_get_return_path(void *opaque) { QEMUFileSocket *forward = opaque; QEMUFileSocket *reverse; if (qemu_file_get_error(forward->file)) { /* If the forward file is in error, don't try and open a return */ return NULL; } reverse = g_malloc0(sizeof(QEMUFileSocket)); reverse->fd = forward->fd; /* I don't think there's a better way to tell which direction 'this' is */ if (forward->file->ops->get_buffer != NULL) { /* being called from the read side, so we need to be able to write */ return qemu_fopen_ops(reverse, &socket_return_write_ops); } else { return qemu_fopen_ops(reverse, &socket_return_read_ops); } }
static void colo_send_message_value(QEMUFile *f, COLOMessage msg, uint64_t value, Error **errp) { Error *local_err = NULL; int ret; colo_send_message(f, msg, &local_err); if (local_err) { error_propagate(errp, local_err); return; } qemu_put_be64(f, value); qemu_fflush(f); ret = qemu_file_get_error(f); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to send value for message:%s", COLOMessage_lookup[msg]); } }
/* * Give a QEMUFile* off the same socket but data in the opposite * direction. */ static QEMUFile *socket_dup_return_path(void *opaque) { QEMUFileSocket *qfs = opaque; int revfd; bool this_is_read; QEMUFile *result; if (qemu_file_get_error(qfs->file)) { /* If the forward file is in error, don't try and open a return */ return NULL; } /* I don't think there's a better way to tell which direction 'this' is */ this_is_read = qfs->file->ops->get_buffer != NULL; revfd = dup(qfs->fd); if (revfd == -1) { error_report("Error duplicating fd for return path: %s", strerror(errno)); return NULL; } result = qemu_fopen_socket(revfd, this_is_read ? "wb" : "rb"); if (!result) { close(revfd); } if (this_is_read) { /* The qemu_fopen_socket "wb" will mark the socket blocking, * which would be OK for the return path, but the semantics * of non-blocking is that it follows the underlying connection * not the fd number, and thus setting the return path non-blocking * ends up setting the forward path blocking, which we don't want */ qemu_set_nonblock(revfd); } return result; }
static void compare_vmstate(uint8_t *wire, size_t size) { QEMUFile *f = open_test_file(false); uint8_t result[size]; /* read back as binary */ g_assert_cmpint(qemu_get_buffer(f, result, sizeof(result)), ==, sizeof(result)); g_assert(!qemu_file_get_error(f)); /* Compare that what is on the file is the same that what we expected to be there */ SUCCESS(memcmp(result, wire, sizeof(result))); /* Must reach EOF */ qemu_get_byte(f); g_assert_cmpint(qemu_file_get_error(f), ==, -EIO); qemu_fclose(f); }
static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg, Error **errp) { Error *local_err = NULL; uint64_t value; int ret; colo_receive_check_message(f, expect_msg, &local_err); if (local_err) { error_propagate(errp, local_err); return 0; } value = qemu_get_be64(f); ret = qemu_file_get_error(f); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s", COLOMessage_lookup[expect_msg]); } return value; }
static void buffered_rate_tick(void *opaque) { QEMUFileBuffered *s = opaque; if (qemu_file_get_error(s->file)) { buffered_close(s); return; } qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100); if (s->freeze_output) return; s->bytes_xfer = 0; buffered_flush(s); /* Add some checks around this */ s->put_ready(s->opaque); }
static int buffered_close(void *opaque) { QEMUFileBuffered *s = opaque; int ret; DPRINTF("closing\n"); while (!qemu_file_get_error(s->file) && s->buffer_size) { buffered_flush(s); if (s->freeze_output) s->wait_for_unfreeze(s->opaque); } ret = s->close(s->opaque); qemu_del_timer(s->timer); qemu_free_timer(s->timer); g_free(s->buffer); g_free(s); return ret; }
/** Closes the file * * Returns negative error value if any error happened on previous operations or * while closing the file. Returns 0 or positive number on success. * * The meaning of return value on success depends on the specific backend * being used. */ int qemu_fclose(QEMUFile *f) { int ret; qemu_fflush(f); ret = qemu_file_get_error(f); if (f->ops->close) { int ret2 = f->ops->close(f->opaque); if (ret >= 0) { ret = ret2; } } /* If any error was spotted before closing, we should report it * instead of the close() return value. */ if (f->last_error) { ret = f->last_error; } g_free(f); trace_qemu_file_fclose(); return ret; }
static void buffered_flush(QEMUFileBuffered *s) { size_t offset = 0; int error; error = qemu_file_get_error(s->file); if (error != 0) { DPRINTF("flush when error, bailing: %s\n", strerror(-error)); return; } DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size); while (offset < s->buffer_size) { ssize_t ret; ret = s->put_buffer(s->opaque, s->buffer + offset, s->buffer_size - offset); if (ret == -EAGAIN) { DPRINTF("backend not ready, freezing\n"); s->freeze_output = 1; break; } if (ret <= 0) { DPRINTF("error flushing data, %zd\n", ret); qemu_file_set_error(s->file, ret); break; } else { DPRINTF("flushed %zd byte(s)\n", ret); offset += ret; } } DPRINTF("flushed %zu of %zu byte(s)\n", offset, s->buffer_size); memmove(s->buffer, s->buffer + offset, s->buffer_size - offset); s->buffer_size -= offset; }
static int buffered_close(void *opaque) { MigrationState *s = opaque; ssize_t ret = 0; int ret2; DPRINTF("closing\n"); s->xfer_limit = INT_MAX; while (!qemu_file_get_error(s->file) && s->buffer_size) { ret = buffered_flush(s); if (ret < 0) { break; } } ret2 = migrate_fd_close(s); if (ret >= 0) { ret = ret2; } s->complete = true; return ret; }
static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { QEMUFileBuffered *s = opaque; ssize_t error; DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos); error = qemu_file_get_error(s->file); if (error) { DPRINTF("flush when error, bailing: %s\n", strerror(-error)); return error; } DPRINTF("unfreezing output\n"); s->freeze_output = 0; if (size > 0) { DPRINTF("buffering %d bytes\n", size); buffered_append(s, buf, size); } error = buffered_flush(s); if (error < 0) { DPRINTF("buffered flush error. bailing: %s\n", strerror(-error)); return error; } if (pos == 0 && size == 0) { DPRINTF("file is ready\n"); if (!s->freeze_output && s->bytes_xfer < s->xfer_limit) { DPRINTF("notifying client\n"); migrate_fd_put_ready(s->migration_state); } } return size; }
static void buffered_flush(QEMUFileBuffered *s) { size_t offset = 0; int error; error = qemu_file_get_error(s->file); if (error != 0) { DPRINTF("flush when error, bailing: %s\n", strerror(-error)); return; } DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size); while (offset < s->buffer_size) { ssize_t ret; ret = s->put_buffer(s->opaque, s->buffer + offset, s->buffer_size - offset); if (ret == -EAGAIN) { DPRINTF("backend not ready, freezing\n"); DVERYDETAIL{ printf("bflush : backend not ready, freezing\n"); fflush(stdout);} s->freeze_output = 1; break; } if (ret <= 0) { DPRINTF("error flushing data, %zd\n", ret); DVERYDETAIL{ printf("bflush :error flushing data, %zd\n", ret); fflush(stdout);} qemu_file_set_error(s->file, ret); break; } else {
static int colo_do_checkpoint_transaction(MigrationState *s, QIOChannelBuffer *bioc, QEMUFile *fb) { Error *local_err = NULL; int ret = -1; colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); if (local_err) { goto out; } /* Reset channel-buffer directly */ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); bioc->usage = 0; qemu_mutex_lock_iothread(); if (failover_get_state() != FAILOVER_STATUS_NONE) { qemu_mutex_unlock_iothread(); goto out; } vm_stop_force_state(RUN_STATE_COLO); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("run", "stop"); /* * Failover request bh could be called after vm_stop_force_state(), * So we need check failover_request_is_active() again. */ if (failover_get_state() != FAILOVER_STATUS_NONE) { goto out; } /* Disable block migration */ s->params.blk = 0; s->params.shared = 0; qemu_savevm_state_header(fb); qemu_savevm_state_begin(fb, &s->params); qemu_mutex_lock_iothread(); qemu_savevm_state_complete_precopy(fb, false); qemu_mutex_unlock_iothread(); qemu_fflush(fb); colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { goto out; } /* * We need the size of the VMstate data in Secondary side, * With which we can decide how much data should be read. */ colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, bioc->usage, &local_err); if (local_err) { goto out; } qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); qemu_fflush(s->to_dst_file); ret = qemu_file_get_error(s->to_dst_file); if (ret < 0) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { goto out; } ret = 0; qemu_mutex_lock_iothread(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); out: if (local_err) { error_report_err(local_err); } return ret; }
static void *migration_thread(void *opaque) { MigrationState *s = opaque; int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); int64_t initial_bytes = 0; int64_t max_size = 0; int64_t start_time = initial_time; bool old_vm_running = false; DPRINTF("beginning savevm\n"); qemu_savevm_state_begin(s->file, &s->params); s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; migrate_set_state(s, MIG_STATE_SETUP, MIG_STATE_ACTIVE); DPRINTF("setup complete\n"); while (s->state == MIG_STATE_ACTIVE) { int64_t current_time; uint64_t pending_size; if (!qemu_file_rate_limit(s->file)) { DPRINTF("iterate\n"); pending_size = qemu_savevm_state_pending(s->file, max_size); DPRINTF("pending size %" PRIu64 " max %" PRIu64 "\n", pending_size, max_size); if (pending_size && pending_size >= max_size) { qemu_savevm_state_iterate(s->file); } else { int ret; DPRINTF("done iterating\n"); qemu_mutex_lock_iothread(); start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); old_vm_running = runstate_is_running(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); if (ret >= 0) { qemu_file_set_rate_limit(s->file, INT64_MAX); qemu_savevm_state_complete(s->file); } qemu_mutex_unlock_iothread(); if (ret < 0) { migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR); break; } if (!qemu_file_get_error(s->file)) { migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED); break; } } } if (qemu_file_get_error(s->file)) { migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR); break; } current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); if (current_time >= initial_time + BUFFER_DELAY) { uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes; uint64_t time_spent = current_time - initial_time; double bandwidth = transferred_bytes / time_spent; max_size = bandwidth * migrate_max_downtime() / 1000000; s->mbps = time_spent ? (((double) transferred_bytes * 8.0) / ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1; DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 "\n", transferred_bytes, time_spent, bandwidth, max_size); /* if we haven't sent anything, we don't want to recalculate 10000 is a small enough number for our purposes */ if (s->dirty_bytes_rate && transferred_bytes > 10000) { s->expected_downtime = s->dirty_bytes_rate / bandwidth; } qemu_file_reset_rate_limit(s->file); initial_time = current_time; initial_bytes = qemu_ftell(s->file); } if (qemu_file_rate_limit(s->file)) { /* usleep expects microseconds */ g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); } } qemu_mutex_lock_iothread(); if (s->state == MIG_STATE_COMPLETED) { int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); s->total_time = end_time - s->total_time; s->downtime = end_time - start_time; runstate_set(RUN_STATE_POSTMIGRATE); } else { if (old_vm_running) { vm_start(); } } qemu_bh_schedule(s->cleanup_bh); qemu_mutex_unlock_iothread(); return NULL; }
static int colo_do_checkpoint_transaction(MigrationState *s, QIOChannelBuffer *bioc, QEMUFile *fb) { Error *local_err = NULL; int ret = -1; colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); if (local_err) { goto out; } /* Reset channel-buffer directly */ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); bioc->usage = 0; qemu_mutex_lock_iothread(); if (failover_get_state() != FAILOVER_STATUS_NONE) { qemu_mutex_unlock_iothread(); goto out; } vm_stop_force_state(RUN_STATE_COLO); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("run", "stop"); /* * Failover request bh could be called after vm_stop_force_state(), * So we need check failover_request_is_active() again. */ if (failover_get_state() != FAILOVER_STATUS_NONE) { goto out; } colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); if (local_err) { goto out; } /* Disable block migration */ migrate_set_block_enabled(false, &local_err); qemu_mutex_lock_iothread(); #ifdef CONFIG_REPLICATION replication_do_checkpoint_all(&local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } #else abort(); #endif colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } /* Note: device state is saved into buffer */ ret = qemu_save_device_state(fb); qemu_mutex_unlock_iothread(); if (ret < 0) { goto out; } /* * Only save VM's live state, which not including device state. * TODO: We may need a timeout mechanism to prevent COLO process * to be blocked here. */ qemu_savevm_live_state(s->to_dst_file); qemu_fflush(fb); /* * We need the size of the VMstate data in Secondary side, * With which we can decide how much data should be read. */ colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, bioc->usage, &local_err); if (local_err) { goto out; } qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); qemu_fflush(s->to_dst_file); ret = qemu_file_get_error(s->to_dst_file); if (ret < 0) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { goto out; } ret = 0; qemu_mutex_lock_iothread(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); out: if (local_err) { error_report_err(local_err); } return ret; }