static void primary_vm_do_failover(void) { MigrationState *s = migrate_get_current(); int old_state; migrate_set_state(&s->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); /* * Wake up COLO thread which may blocked in recv() or send(), * The s->rp_state.from_dst_file and s->to_dst_file may use the * same fd, but we still shutdown the fd for twice, it is harmless. */ if (s->to_dst_file) { qemu_file_shutdown(s->to_dst_file); } if (s->rp_state.from_dst_file) { qemu_file_shutdown(s->rp_state.from_dst_file); } old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, FAILOVER_STATUS_COMPLETED); if (old_state != FAILOVER_STATUS_ACTIVE) { error_report("Incorrect state (%s) while doing failover for Primary VM", FailoverStatus_lookup[old_state]); return; } /* Notify COLO thread that failover work is finished */ qemu_sem_post(&s->colo_exit_sem); }
void colo_checkpoint_notify(void *opaque) { MigrationState *s = opaque; int64_t next_notify_time; qemu_sem_post(&s->colo_checkpoint_sem); s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); next_notify_time = s->colo_checkpoint_time + s->parameters.x_checkpoint_delay; timer_mod(s->colo_delay_timer, next_notify_time); }
static void secondary_vm_do_failover(void) { int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); /* Can not do failover during the process of VM's loading VMstate, Or * it will break the secondary VM. */ if (vmstate_loading) { old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, FAILOVER_STATUS_RELAUNCH); if (old_state != FAILOVER_STATUS_ACTIVE) { error_report("Unknown error while do failover for secondary VM," "old_state: %s", FailoverStatus_lookup[old_state]); } return; } migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); if (!autostart) { error_report("\"-S\" qemu option will be ignored in secondary side"); /* recover runstate to normal migration finish state */ autostart = true; } /* * Make sure COLO incoming thread not block in recv or send, * If mis->from_src_file and mis->to_src_file use the same fd, * The second shutdown() will return -1, we ignore this value, * It is harmless. */ if (mis->from_src_file) { qemu_file_shutdown(mis->from_src_file); } if (mis->to_src_file) { qemu_file_shutdown(mis->to_src_file); } old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, FAILOVER_STATUS_COMPLETED); if (old_state != FAILOVER_STATUS_ACTIVE) { error_report("Incorrect state (%s) while doing failover for " "secondary VM", FailoverStatus_lookup[old_state]); return; } /* Notify COLO incoming thread that failover work is finished */ qemu_sem_post(&mis->colo_incoming_sem); /* For Secondary VM, jump to incoming co */ if (mis->migration_incoming_co) { qemu_coroutine_enter(mis->migration_incoming_co); } }
static void primary_vm_do_failover(void) { #ifdef CONFIG_REPLICATION MigrationState *s = migrate_get_current(); int old_state; Error *local_err = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); /* * kick COLO thread which might wait at * qemu_sem_wait(&s->colo_checkpoint_sem). */ colo_checkpoint_notify(migrate_get_current()); /* * Wake up COLO thread which may blocked in recv() or send(), * The s->rp_state.from_dst_file and s->to_dst_file may use the * same fd, but we still shutdown the fd for twice, it is harmless. */ if (s->to_dst_file) { qemu_file_shutdown(s->to_dst_file); } if (s->rp_state.from_dst_file) { qemu_file_shutdown(s->rp_state.from_dst_file); } old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, FAILOVER_STATUS_COMPLETED); if (old_state != FAILOVER_STATUS_ACTIVE) { error_report("Incorrect state (%s) while doing failover for Primary VM", FailoverStatus_str(old_state)); return; } replication_stop_all(true, &local_err); if (local_err) { error_report_err(local_err); local_err = NULL; } /* Notify COLO thread that failover work is finished */ qemu_sem_post(&s->colo_exit_sem); #else abort(); #endif }
/* * Handle faults detected by the USERFAULT markings */ static void *postcopy_ram_fault_thread(void *opaque) { MigrationIncomingState *mis = opaque; struct uffd_msg msg; int ret; size_t index; RAMBlock *rb = NULL; trace_postcopy_ram_fault_thread_entry(); mis->last_rb = NULL; /* last RAMBlock we sent part of */ qemu_sem_post(&mis->fault_thread_sem); struct pollfd *pfd; size_t pfd_len = 2 + mis->postcopy_remote_fds->len; pfd = g_new0(struct pollfd, pfd_len); pfd[0].fd = mis->userfault_fd; pfd[0].events = POLLIN; pfd[1].fd = mis->userfault_event_fd; pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd); for (index = 0; index < mis->postcopy_remote_fds->len; index++) { struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds, struct PostCopyFD, index); pfd[2 + index].fd = pcfd->fd; pfd[2 + index].events = POLLIN; trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr, pcfd->fd); } while (true) { ram_addr_t rb_offset; int poll_result; /* * We're mainly waiting for the kernel to give us a faulting HVA, * however we can be told to quit via userfault_quit_fd which is * an eventfd */ poll_result = poll(pfd, pfd_len, -1 /* Wait forever */); if (poll_result == -1) { error_report("%s: userfault poll: %s", __func__, strerror(errno)); break; } if (pfd[1].revents) { uint64_t tmp64 = 0; /* Consume the signal */ if (read(mis->userfault_event_fd, &tmp64, 8) != 8) { /* Nothing obviously nicer than posting this error. */ error_report("%s: read() failed", __func__); } if (atomic_read(&mis->fault_thread_quit)) { trace_postcopy_ram_fault_thread_quit(); break; } } if (pfd[0].revents) { poll_result--; ret = read(mis->userfault_fd, &msg, sizeof(msg)); if (ret != sizeof(msg)) { if (errno == EAGAIN) { /* * if a wake up happens on the other thread just after * the poll, there is nothing to read. */ continue; } if (ret < 0) { error_report("%s: Failed to read full userfault " "message: %s", __func__, strerror(errno)); break; } else { error_report("%s: Read %d bytes from userfaultfd " "expected %zd", __func__, ret, sizeof(msg)); break; /* Lost alignment, don't know what we'd read next */ } } if (msg.event != UFFD_EVENT_PAGEFAULT) { error_report("%s: Read unexpected event %ud from userfaultfd", __func__, msg.event); continue; /* It's not a page fault, shouldn't happen */ } rb = qemu_ram_block_from_host( (void *)(uintptr_t)msg.arg.pagefault.address, true, &rb_offset); if (!rb) { error_report("postcopy_ram_fault_thread: Fault outside guest: %" PRIx64, (uint64_t)msg.arg.pagefault.address); break; } rb_offset &= ~(qemu_ram_pagesize(rb) - 1); trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, qemu_ram_get_idstr(rb), rb_offset, msg.arg.pagefault.feat.ptid); mark_postcopy_blocktime_begin( (uintptr_t)(msg.arg.pagefault.address), msg.arg.pagefault.feat.ptid, rb); /* * Send the request to the source - we want to request one * of our host page sizes (which is >= TPS) */ if (rb != mis->last_rb) { mis->last_rb = rb; migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), rb_offset, qemu_ram_pagesize(rb)); } else { /* Save some space */ migrate_send_rp_req_pages(mis, NULL, rb_offset, qemu_ram_pagesize(rb)); } } /* Now handle any requests from external processes on shared memory */ /* TODO: May need to handle devices deregistering during postcopy */ for (index = 2; index < pfd_len && poll_result; index++) { if (pfd[index].revents) { struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds, struct PostCopyFD, index - 2); poll_result--; if (pfd[index].revents & POLLERR) { error_report("%s: POLLERR on poll %zd fd=%d", __func__, index, pcfd->fd); pfd[index].events = 0; continue; } ret = read(pcfd->fd, &msg, sizeof(msg)); if (ret != sizeof(msg)) { if (errno == EAGAIN) { /* * if a wake up happens on the other thread just after * the poll, there is nothing to read. */ continue; } if (ret < 0) { error_report("%s: Failed to read full userfault " "message: %s (shared) revents=%d", __func__, strerror(errno), pfd[index].revents); /*TODO: Could just disable this sharer */ break; } else { error_report("%s: Read %d bytes from userfaultfd " "expected %zd (shared)", __func__, ret, sizeof(msg)); /*TODO: Could just disable this sharer */ break; /*Lost alignment,don't know what we'd read next*/ } } if (msg.event != UFFD_EVENT_PAGEFAULT) { error_report("%s: Read unexpected event %ud " "from userfaultfd (shared)", __func__, msg.event); continue; /* It's not a page fault, shouldn't happen */ } /* Call the device handler registered with us */ ret = pcfd->handler(pcfd, &msg); if (ret) { error_report("%s: Failed to resolve shared fault on %zd/%s", __func__, index, pcfd->idstr); /* TODO: Fail? Disable this sharer? */ } } } } trace_postcopy_ram_fault_thread_exit(); g_free(pfd); return NULL; }
static void secondary_vm_do_failover(void) { /* COLO needs enable block-replication */ #ifdef CONFIG_REPLICATION int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; /* Can not do failover during the process of VM's loading VMstate, Or * it will break the secondary VM. */ if (vmstate_loading) { old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, FAILOVER_STATUS_RELAUNCH); if (old_state != FAILOVER_STATUS_ACTIVE) { error_report("Unknown error while do failover for secondary VM," "old_state: %s", FailoverStatus_str(old_state)); } return; } migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); replication_stop_all(true, &local_err); if (local_err) { error_report_err(local_err); } /* Notify all filters of all NIC to do checkpoint */ colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err); if (local_err) { error_report_err(local_err); } if (!autostart) { error_report("\"-S\" qemu option will be ignored in secondary side"); /* recover runstate to normal migration finish state */ autostart = true; } /* * Make sure COLO incoming thread not block in recv or send, * If mis->from_src_file and mis->to_src_file use the same fd, * The second shutdown() will return -1, we ignore this value, * It is harmless. */ if (mis->from_src_file) { qemu_file_shutdown(mis->from_src_file); } if (mis->to_src_file) { qemu_file_shutdown(mis->to_src_file); } old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, FAILOVER_STATUS_COMPLETED); if (old_state != FAILOVER_STATUS_ACTIVE) { error_report("Incorrect state (%s) while doing failover for " "secondary VM", FailoverStatus_str(old_state)); return; } /* Notify COLO incoming thread that failover work is finished */ qemu_sem_post(&mis->colo_incoming_sem); /* For Secondary VM, jump to incoming co */ if (mis->migration_incoming_co) { qemu_coroutine_enter(mis->migration_incoming_co); } #else abort(); #endif }