static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) { GlusterAIOCB *acb = (GlusterAIOCB *)arg; BlockDriverState *bs = acb->common.bs; BDRVGlusterState *s = bs->opaque; int retval; acb->ret = ret; retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); if (retval != sizeof(acb)) { /* * Gluster AIO callback thread failed to notify the waiting * QEMU thread about IO completion. * * Complete this IO request and make the disk inaccessible for * subsequent reads and writes. */ error_report("Gluster failed to notify QEMU about IO completion"); qemu_mutex_lock_iothread(); /* We are in gluster thread context */ acb->common.cb(acb->common.opaque, -EIO); qemu_aio_release(acb); s->qemu_aio_count--; close(s->fds[GLUSTER_FD_READ]); close(s->fds[GLUSTER_FD_WRITE]); qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL); bs->drv = NULL; /* Make the disk inaccessible */ qemu_mutex_unlock_iothread(); } }
static int os_host_main_loop_wait(uint32_t timeout) { struct timeval tv, *tvarg = NULL; int ret; glib_select_fill(&nfds, &rfds, &wfds, &xfds, &timeout); if (timeout < UINT32_MAX) { tvarg = &tv; tv.tv_sec = timeout / 1000; tv.tv_usec = (timeout % 1000) * 1000; } if (timeout > 0) { qemu_mutex_unlock_iothread(); } ret = select(nfds + 1, &rfds, &wfds, &xfds, tvarg); if (timeout > 0) { qemu_mutex_lock_iothread(); } glib_select_poll(&rfds, &wfds, &xfds, (ret < 0)); return ret; }
void HELPER(chsc)(CPUS390XState *env, uint64_t inst) { S390CPU *cpu = s390_env_get_cpu(env); qemu_mutex_lock_iothread(); ioinst_handle_chsc(cpu, inst >> 16); qemu_mutex_unlock_iothread(); }
void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num) { uint64_t r; switch (num) { case 0x500: /* KVM hypercall */ qemu_mutex_lock_iothread(); r = s390_virtio_hypercall(env); qemu_mutex_unlock_iothread(); break; case 0x44: /* yield */ r = 0; break; case 0x308: /* ipl */ handle_diag_308(env, r1, r3); r = 0; break; default: r = -1; break; } if (r) { program_interrupt(env, PGM_OPERATION, ILEN_AUTO); } }
void HELPER(rsch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = s390_env_get_cpu(env); qemu_mutex_lock_iothread(); ioinst_handle_rsch(cpu, r1); qemu_mutex_unlock_iothread(); }
static void migrate_fd_cleanup(void *opaque) { MigrationState *s = opaque; qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; if (s->file) { DPRINTF("closing file\n"); qemu_mutex_unlock_iothread(); qemu_thread_join(&s->thread); qemu_mutex_lock_iothread(); qemu_fclose(s->file); s->file = NULL; } assert(s->state != MIG_STATE_ACTIVE); if (s->state != MIG_STATE_COMPLETED) { qemu_savevm_state_cancel(); if (s->state == MIG_STATE_CANCELLING) { migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED); } } notifier_list_notify(&migration_state_notifiers, s); }
static void mig_sleep_cpu(void *opq){ qemu_mutex_unlock_iothread(); //g_usleep(30*1000); g_usleep(freezing_time*10000); qemu_mutex_lock_iothread(); }
static void *qemu_dummy_cpu_thread_fn(void *arg) { #ifdef _WIN32 fprintf(stderr, "qtest is not supported under Windows\n"); exit(1); #else CPUArchState *env = arg; CPUState *cpu = ENV_GET_CPU(env); sigset_t waitset; int r; qemu_mutex_lock_iothread(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); /* signal CPU creation */ cpu->created = true; qemu_cond_signal(&qemu_cpu_cond); cpu_single_env = env; while (1) { cpu_single_env = NULL; qemu_mutex_unlock_iothread(); do { int sig; r = sigwait(&waitset, &sig); } while (r == -1 && (errno == EAGAIN || errno == EINTR)); if (r == -1) { perror("sigwait"); exit(1); } qemu_mutex_lock_iothread(); cpu_single_env = env; qemu_wait_io_event_common(cpu); } return NULL; #endif }
/* SCLP service call */ uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2) { qemu_mutex_lock_iothread(); int r = sclp_service_call(env, r1, r2); if (r < 0) { program_interrupt(env, -r, 4); r = 0; } qemu_mutex_unlock_iothread(); return r; }
void migrate_start_colo_process(MigrationState *s) { qemu_mutex_unlock_iothread(); qemu_sem_init(&s->colo_checkpoint_sem, 0); s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, colo_checkpoint_notify, s); qemu_sem_init(&s->colo_exit_sem, 0); migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); colo_process_checkpoint(s); qemu_mutex_lock_iothread(); }
void main_loop_wait(int timeout) { fd_set rfds, wfds, xfds; int ret, nfds; struct timeval tv; qemu_bh_update_timeout(&timeout); os_host_main_loop_wait(&timeout); tv.tv_sec = timeout / 1000; tv.tv_usec = (timeout % 1000) * 1000; /* poll any events */ /* XXX: separate device handlers from system ones */ nfds = -1; FD_ZERO(&rfds); FD_ZERO(&wfds); FD_ZERO(&xfds); qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds); if (slirp_is_inited()) { slirp_select_fill(&nfds, &rfds, &wfds, &xfds); } qemu_mutex_unlock_iothread(); ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv); qemu_mutex_lock_iothread(); qemu_iohandler_poll(&rfds, &wfds, &xfds, ret); if (slirp_is_inited()) { if (ret < 0) { FD_ZERO(&rfds); FD_ZERO(&wfds); FD_ZERO(&xfds); } slirp_select_poll(&rfds, &wfds, &xfds); } charpipe_poll(); qemu_clock_run_all_timers(); qemu_run_alarm_timer(); /* Check bottom-halves last in case any of the earlier events triggered them. */ qemu_bh_poll(); }
static int whpx_handle_halt(CPUState *cpu) { struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); int ret = 0; qemu_mutex_lock_iothread(); if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { cpu->exception_index = EXCP_HLT; cpu->halted = true; ret = 1; } qemu_mutex_unlock_iothread(); return ret; }
void os_host_main_loop_wait(int *timeout) { int ret, ret2, i; PollingEntry *pe; /* XXX: need to suppress polling by better using win32 events */ ret = 0; for(pe = first_polling_entry; pe != NULL; pe = pe->next) { ret |= pe->func(pe->opaque); } if (ret == 0) { int err; WaitObjects *w = &wait_objects; qemu_mutex_unlock_iothread(); ret = WaitForMultipleObjects(w->num, w->events, FALSE, *timeout); qemu_mutex_lock_iothread(); if (WAIT_OBJECT_0 + 0 <= ret && ret <= WAIT_OBJECT_0 + w->num - 1) { if (w->func[ret - WAIT_OBJECT_0]) w->func[ret - WAIT_OBJECT_0](w->opaque[ret - WAIT_OBJECT_0]); /* Check for additional signaled events */ for(i = (ret - WAIT_OBJECT_0 + 1); i < w->num; i++) { /* Check if event is signaled */ ret2 = WaitForSingleObject(w->events[i], 0); if(ret2 == WAIT_OBJECT_0) { if (w->func[i]) w->func[i](w->opaque[i]); } else if (ret2 == WAIT_TIMEOUT) { } else { err = GetLastError(); fprintf(stderr, "WaitForSingleObject error %d %d\n", i, err); } } } else if (ret == WAIT_TIMEOUT) { } else { err = GetLastError(); fprintf(stderr, "WaitForMultipleObjects error %d %d\n", ret, err); } } *timeout = 0; }
/* * We purposely use a thread, so that users are forced to wait for the status * register. */ static void *edu_fact_thread(void *opaque) { EduState *edu = opaque; while (1) { uint32_t val, ret = 1; qemu_mutex_lock(&edu->thr_mutex); while ((atomic_read(&edu->status) & EDU_STATUS_COMPUTING) == 0 && !edu->stopping) { qemu_cond_wait(&edu->thr_cond, &edu->thr_mutex); } if (edu->stopping) { qemu_mutex_unlock(&edu->thr_mutex); break; } val = edu->fact; qemu_mutex_unlock(&edu->thr_mutex); while (val > 0) { ret *= val--; } /* * We should sleep for a random period here, so that students are * forced to check the status properly. */ qemu_mutex_lock(&edu->thr_mutex); edu->fact = ret; qemu_mutex_unlock(&edu->thr_mutex); atomic_and(&edu->status, ~EDU_STATUS_COMPUTING); if (atomic_read(&edu->status) & EDU_STATUS_IRQFACT) { qemu_mutex_lock_iothread(); edu_raise_irq(edu, FACT_IRQ); qemu_mutex_unlock_iothread(); } } return NULL; }
static void whpx_vcpu_post_run(CPUState *cpu) { HRESULT hr; struct whpx_state *whpx = &whpx_global; struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); X86CPU *x86_cpu = X86_CPU(cpu); WHV_REGISTER_VALUE reg_values[4]; const WHV_REGISTER_NAME reg_names[4] = { WHvX64RegisterRflags, WHvX64RegisterCr8, WHvRegisterPendingInterruption, WHvRegisterInterruptState, }; hr = WHvGetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, reg_names, 4, reg_values); if (FAILED(hr)) { error_report("WHPX: Failed to get interrupt state regusters," " hr=%08lx", hr); vcpu->interruptable = false; return; } assert(reg_names[0] == WHvX64RegisterRflags); env->eflags = reg_values[0].Reg64; assert(reg_names[1] == WHvX64RegisterCr8); if (vcpu->tpr != reg_values[1].Reg64) { vcpu->tpr = reg_values[1].Reg64; qemu_mutex_lock_iothread(); cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr); qemu_mutex_unlock_iothread(); } assert(reg_names[2] == WHvRegisterPendingInterruption); vcpu->interrupt_in_flight = reg_values[2].PendingInterruption; assert(reg_names[3] == WHvRegisterInterruptState); vcpu->interruptable = !reg_values[3].InterruptState.InterruptShadow; return; }
static int os_host_main_loop_wait(int64_t timeout) { int ret; static int spin_counter; glib_pollfds_fill(&timeout); /* If the I/O thread is very busy or we are incorrectly busy waiting in * the I/O thread, this can lead to starvation of the BQL such that the * VCPU threads never run. To make sure we can detect the later case, * print a message to the screen. If we run into this condition, create * a fake timeout in order to give the VCPU threads a chance to run. */ if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) { static bool notified; if (!notified) { fprintf(stderr, "main-loop: WARNING: I/O thread spun for %d iterations\n", MAX_MAIN_LOOP_SPIN); notified = true; } timeout = SCALE_MS; } if (timeout) { spin_counter = 0; qemu_mutex_unlock_iothread(); } else { spin_counter++; } ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout); if (timeout) { qemu_mutex_lock_iothread(); } glib_pollfds_poll(); return ret; }
void process_queued_cpu_work(CPUState *cpu) { struct qemu_work_item *wi; if (cpu->queued_work_first == NULL) { return; } qemu_mutex_lock(&cpu->work_mutex); while (cpu->queued_work_first != NULL) { wi = cpu->queued_work_first; cpu->queued_work_first = wi->next; if (!cpu->queued_work_first) { cpu->queued_work_last = NULL; } qemu_mutex_unlock(&cpu->work_mutex); if (wi->exclusive) { /* Running work items outside the BQL avoids the following deadlock: * 1) start_exclusive() is called with the BQL taken while another * CPU is running; 2) cpu_exec in the other CPU tries to takes the * BQL, so it goes to sleep; start_exclusive() is sleeping too, so * neither CPU can proceed. */ qemu_mutex_unlock_iothread(); start_exclusive(); wi->func(cpu, wi->data); end_exclusive(); qemu_mutex_lock_iothread(); } else { wi->func(cpu, wi->data); } qemu_mutex_lock(&cpu->work_mutex); if (wi->free) { g_free(wi); } else { atomic_mb_set(&wi->done, true); } } qemu_mutex_unlock(&cpu->work_mutex); qemu_cond_broadcast(&qemu_work_cond); }
int machine_initialize(struct uc_struct *uc) { MachineClass *machine_class; MachineState *current_machine; module_call_init(uc, MODULE_INIT_QOM); register_types_object(uc); machine_register_types(uc); container_register_types(uc); cpu_register_types(uc); qdev_register_types(uc); // Initialize arch specific. uc->init_arch(uc); module_call_init(uc, MODULE_INIT_MACHINE); // this will auto initialize all register objects above. machine_class = find_default_machine(uc, uc->arch); if (machine_class == NULL) { //fprintf(stderr, "No machine specified, and there is no default.\n" // "Use -machine help to list supported machines!\n"); return -2; } current_machine = MACHINE(uc, object_new(uc, object_class_get_name( OBJECT_CLASS(machine_class)))); current_machine->uc = uc; uc->cpu_exec_init_all(uc); machine_class->max_cpus = 1; configure_accelerator(current_machine); qemu_init_cpu_loop(uc); qemu_mutex_lock_iothread(uc); current_machine->cpu_model = NULL; return machine_class->init(uc, current_machine); }
static int os_host_main_loop_wait(int64_t timeout) { GMainContext *context = g_main_context_default(); int ret; g_main_context_acquire(context); glib_pollfds_fill(&timeout); qemu_mutex_unlock_iothread(); replay_mutex_unlock(); ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout); replay_mutex_lock(); qemu_mutex_lock_iothread(); glib_pollfds_poll(); g_main_context_release(context); return ret; }
static void *buffered_file_thread(void *opaque) { MigrationState *s = opaque; int64_t initial_time = qemu_get_clock_ms(rt_clock); int64_t max_size = 0; bool last_round = false; int ret; qemu_mutex_lock_iothread(); DPRINTF("beginning savevm\n"); ret = qemu_savevm_state_begin(s->file, &s->params); if (ret < 0) { DPRINTF("failed, %d\n", ret); qemu_mutex_unlock_iothread(); goto out; } qemu_mutex_unlock_iothread(); while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); uint64_t pending_size; qemu_mutex_lock_iothread(); if (s->state != MIG_STATE_ACTIVE) { DPRINTF("put_ready returning because of non-active state\n"); qemu_mutex_unlock_iothread(); break; } if (s->complete) { qemu_mutex_unlock_iothread(); break; } if (s->bytes_xfer < s->xfer_limit) { DPRINTF("iterate\n"); pending_size = qemu_savevm_state_pending(s->file, max_size); DPRINTF("pending size %lu max %lu\n", pending_size, max_size); if (pending_size && pending_size >= max_size) { ret = qemu_savevm_state_iterate(s->file); if (ret < 0) { qemu_mutex_unlock_iothread(); break; } } else { int old_vm_running = runstate_is_running(); int64_t start_time, end_time; DPRINTF("done iterating\n"); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); if (old_vm_running) { vm_stop(RUN_STATE_FINISH_MIGRATE); } else { vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); } ret = qemu_savevm_state_complete(s->file); if (ret < 0) { qemu_mutex_unlock_iothread(); break; } else { migrate_fd_completed(s); } end_time = qemu_get_clock_ms(rt_clock); s->total_time = end_time - s->total_time; s->downtime = end_time - start_time; if (s->state != MIG_STATE_COMPLETED) { if (old_vm_running) { vm_start(); } } last_round = true; } } qemu_mutex_unlock_iothread(); if (current_time >= initial_time + BUFFER_DELAY) { uint64_t transferred_bytes = s->bytes_xfer; uint64_t time_spent = current_time - initial_time; double bandwidth = transferred_bytes / time_spent; max_size = bandwidth * migrate_max_downtime() / 1000000; DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64 "\n", transferred_bytes, time_spent, bandwidth, max_size); s->bytes_xfer = 0; initial_time = current_time; } if (!last_round && (s->bytes_xfer >= s->xfer_limit)) { /* usleep expects microseconds */ g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); } ret = buffered_flush(s); if (ret < 0) { break; } } out: if (ret < 0) { migrate_fd_error(s); } g_free(s->buffer); return NULL; }
static int os_host_main_loop_wait(uint32_t timeout) { GMainContext *context = g_main_context_default(); int ret, i; PollingEntry *pe; WaitObjects *w = &wait_objects; gint poll_timeout; static struct timeval tv0; /* XXX: need to suppress polling by better using win32 events */ ret = 0; for (pe = first_polling_entry; pe != NULL; pe = pe->next) { ret |= pe->func(pe->opaque); } if (ret != 0) { return ret; } if (nfds >= 0) { ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0); if (ret != 0) { timeout = 0; } } g_main_context_prepare(context, &max_priority); n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout, poll_fds, ARRAY_SIZE(poll_fds)); g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds)); for (i = 0; i < w->num; i++) { poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i]; poll_fds[n_poll_fds + i].events = G_IO_IN; } if (poll_timeout < 0 || timeout < poll_timeout) { poll_timeout = timeout; } qemu_mutex_unlock_iothread(); ret = g_poll(poll_fds, n_poll_fds + w->num, poll_timeout); qemu_mutex_lock_iothread(); if (ret > 0) { for (i = 0; i < w->num; i++) { w->revents[i] = poll_fds[n_poll_fds + i].revents; } for (i = 0; i < w->num; i++) { if (w->revents[i] && w->func[i]) { w->func[i](w->opaque[i]); } } } if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) { g_main_context_dispatch(context); } /* If an edge-triggered socket event occurred, select will return a * positive result on the next iteration. We do not need to do anything * here. */ return ret; }
void *colo_process_incoming_thread(void *opaque) { MigrationIncomingState *mis = opaque; QEMUFile *fb = NULL; QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ uint64_t total_size; uint64_t value; Error *local_err = NULL; qemu_sem_init(&mis->colo_incoming_sem, 0); migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); failover_init_state(); mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); if (!mis->to_src_file) { error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); goto out; } /* * Note: the communication between Primary side and Secondary side * should be sequential, we set the fd to unblocked in migration incoming * coroutine, and here we are in the COLO incoming thread, so it is ok to * set the fd back to blocked. */ qemu_file_set_blocking(mis->from_src_file, true); bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, &local_err); if (local_err) { goto out; } while (mis->state == MIGRATION_STATUS_COLO) { int request = 0; colo_wait_handle_message(mis->from_src_file, &request, &local_err); if (local_err) { goto out; } assert(request); if (failover_get_state() != FAILOVER_STATUS_NONE) { error_report("failover request"); goto out; } /* FIXME: This is unnecessary for periodic checkpoint mode */ colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); if (local_err) { goto out; } colo_receive_check_message(mis->from_src_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { goto out; } value = colo_receive_message_value(mis->from_src_file, COLO_MESSAGE_VMSTATE_SIZE, &local_err); if (local_err) { goto out; } /* * Read VM device state data into channel buffer, * It's better to re-use the memory allocated. * Here we need to handle the channel buffer directly. */ if (value > bioc->capacity) { bioc->capacity = value; bioc->data = g_realloc(bioc->data, bioc->capacity); } total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); if (total_size != value) { error_report("Got %" PRIu64 " VMState data, less than expected" " %" PRIu64, total_size, value); goto out; } bioc->usage = total_size; qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); if (local_err) { goto out; } qemu_mutex_lock_iothread(); qemu_system_reset(VMRESET_SILENT); vmstate_loading = true; if (qemu_loadvm_state(fb) < 0) { error_report("COLO: loadvm failed"); qemu_mutex_unlock_iothread(); goto out; } vmstate_loading = false; qemu_mutex_unlock_iothread(); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE); failover_request_active(NULL); goto out; } colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { goto out; } } out: vmstate_loading = false; /* Throw the unreported error message after exited from loop */ if (local_err) { error_report_err(local_err); } if (fb) { qemu_fclose(fb); } /* Hope this not to be too long to loop here */ qemu_sem_wait(&mis->colo_incoming_sem); qemu_sem_destroy(&mis->colo_incoming_sem); /* Must be called after failover BH is completed */ if (mis->to_src_file) { qemu_fclose(mis->to_src_file); } migration_incoming_exit_colo(); return NULL; }
static void whpx_vcpu_pre_run(CPUState *cpu) { HRESULT hr; struct whpx_state *whpx = &whpx_global; struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); X86CPU *x86_cpu = X86_CPU(cpu); int irq; uint8_t tpr; WHV_X64_PENDING_INTERRUPTION_REGISTER new_int = {0}; UINT32 reg_count = 0; WHV_REGISTER_VALUE reg_values[3] = {0}; WHV_REGISTER_NAME reg_names[3]; qemu_mutex_lock_iothread(); /* Inject NMI */ if (!vcpu->interrupt_in_flight.InterruptionPending && cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; vcpu->interruptable = false; new_int.InterruptionType = WHvX64PendingNmi; new_int.InterruptionPending = 1; new_int.InterruptionVector = 2; } if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; } } /* * Force the VCPU out of its inner loop to process any INIT requests or * commit pending TPR access. */ if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && !(env->hflags & HF_SMM_MASK)) { cpu->exit_request = 1; } if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { cpu->exit_request = 1; } } /* Get pending hard interruption or replay one that was overwritten */ if (!vcpu->interrupt_in_flight.InterruptionPending && vcpu->interruptable && (env->eflags & IF_MASK)) { assert(!new_int.InterruptionPending); if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; irq = cpu_get_pic_interrupt(env); if (irq >= 0) { new_int.InterruptionType = WHvX64PendingInterrupt; new_int.InterruptionPending = 1; new_int.InterruptionVector = irq; } } } /* Setup interrupt state if new one was prepared */ if (new_int.InterruptionPending) { reg_values[reg_count].PendingInterruption = new_int; reg_names[reg_count] = WHvRegisterPendingInterruption; reg_count += 1; } /* Sync the TPR to the CR8 if was modified during the intercept */ tpr = cpu_get_apic_tpr(x86_cpu->apic_state); if (tpr != vcpu->tpr) { vcpu->tpr = tpr; reg_values[reg_count].Reg64 = tpr; cpu->exit_request = 1; reg_names[reg_count] = WHvX64RegisterCr8; reg_count += 1; } /* Update the state of the interrupt delivery notification */ if (!vcpu->window_registered && cpu->interrupt_request & CPU_INTERRUPT_HARD) { reg_values[reg_count].DeliverabilityNotifications.InterruptNotification = 1; vcpu->window_registered = 1; reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; reg_count += 1; } qemu_mutex_unlock_iothread(); if (reg_count) { hr = WHvSetVirtualProcessorRegisters(whpx->partition, cpu->cpu_index, reg_names, reg_count, reg_values); if (FAILED(hr)) { error_report("WHPX: Failed to set interrupt state registers," " hr=%08lx", hr); } } return; }
static void colo_process_checkpoint(MigrationState *s) { QIOChannelBuffer *bioc; QEMUFile *fb = NULL; int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); Error *local_err = NULL; int ret; failover_init_state(); s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); if (!s->rp_state.from_dst_file) { error_report("Open QEMUFile from_dst_file failed"); goto out; } /* * Wait for Secondary finish loading VM states and enter COLO * restore. */ colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_CHECKPOINT_READY, &local_err); if (local_err) { goto out; } bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); timer_mod(s->colo_delay_timer, current_time + s->parameters.x_checkpoint_delay); while (s->state == MIGRATION_STATUS_COLO) { if (failover_get_state() != FAILOVER_STATUS_NONE) { error_report("failover request"); goto out; } qemu_sem_wait(&s->colo_checkpoint_sem); ret = colo_do_checkpoint_transaction(s, bioc, fb); if (ret < 0) { goto out; } } out: /* Throw the unreported error message after exited from loop */ if (local_err) { error_report_err(local_err); } if (fb) { qemu_fclose(fb); } timer_del(s->colo_delay_timer); /* Hope this not to be too long to wait here */ qemu_sem_wait(&s->colo_exit_sem); qemu_sem_destroy(&s->colo_exit_sem); /* * Must be called after failover BH is completed, * Or the failover BH may shutdown the wrong fd that * re-used by other threads after we release here. */ if (s->rp_state.from_dst_file) { qemu_fclose(s->rp_state.from_dst_file); } }
static int colo_do_checkpoint_transaction(MigrationState *s, QIOChannelBuffer *bioc, QEMUFile *fb) { Error *local_err = NULL; int ret = -1; colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); if (local_err) { goto out; } /* Reset channel-buffer directly */ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); bioc->usage = 0; qemu_mutex_lock_iothread(); if (failover_get_state() != FAILOVER_STATUS_NONE) { qemu_mutex_unlock_iothread(); goto out; } vm_stop_force_state(RUN_STATE_COLO); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("run", "stop"); /* * Failover request bh could be called after vm_stop_force_state(), * So we need check failover_request_is_active() again. */ if (failover_get_state() != FAILOVER_STATUS_NONE) { goto out; } /* Disable block migration */ s->params.blk = 0; s->params.shared = 0; qemu_savevm_state_header(fb); qemu_savevm_state_begin(fb, &s->params); qemu_mutex_lock_iothread(); qemu_savevm_state_complete_precopy(fb, false); qemu_mutex_unlock_iothread(); qemu_fflush(fb); colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { goto out; } /* * We need the size of the VMstate data in Secondary side, * With which we can decide how much data should be read. */ colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, bioc->usage, &local_err); if (local_err) { goto out; } qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); qemu_fflush(s->to_dst_file); ret = qemu_file_get_error(s->to_dst_file); if (ret < 0) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { goto out; } ret = 0; qemu_mutex_lock_iothread(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); out: if (local_err) { error_report_err(local_err); } return ret; }
static int os_host_main_loop_wait(int64_t timeout) { GMainContext *context = g_main_context_default(); GPollFD poll_fds[1024 * 2]; /* this is probably overkill */ int select_ret = 0; int g_poll_ret, ret, i, n_poll_fds; PollingEntry *pe; WaitObjects *w = &wait_objects; gint poll_timeout; int64_t poll_timeout_ns; static struct timeval tv0; fd_set rfds, wfds, xfds; int nfds; /* XXX: need to suppress polling by better using win32 events */ ret = 0; for (pe = first_polling_entry; pe != NULL; pe = pe->next) { ret |= pe->func(pe->opaque); } if (ret != 0) { return ret; } FD_ZERO(&rfds); FD_ZERO(&wfds); FD_ZERO(&xfds); nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds); if (nfds >= 0) { select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0); if (select_ret != 0) { timeout = 0; } if (select_ret > 0) { pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds); } } g_main_context_prepare(context, &max_priority); n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout, poll_fds, ARRAY_SIZE(poll_fds)); g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds)); for (i = 0; i < w->num; i++) { poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i]; poll_fds[n_poll_fds + i].events = G_IO_IN; } if (poll_timeout < 0) { poll_timeout_ns = -1; } else { poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS; } poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout); qemu_mutex_unlock_iothread(); g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns); qemu_mutex_lock_iothread(); if (g_poll_ret > 0) { for (i = 0; i < w->num; i++) { w->revents[i] = poll_fds[n_poll_fds + i].revents; } for (i = 0; i < w->num; i++) { if (w->revents[i] && w->func[i]) { w->func[i](w->opaque[i]); } } } if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) { g_main_context_dispatch(context); } return select_ret || g_poll_ret; }
void *colo_process_incoming_thread(void *opaque) { MigrationIncomingState *mis = opaque; QEMUFile *fb = NULL; QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ uint64_t total_size; uint64_t value; Error *local_err = NULL; int ret; rcu_register_thread(); qemu_sem_init(&mis->colo_incoming_sem, 0); migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); failover_init_state(); mis->to_src_file = qemu_file_get_return_path(mis->from_src_file); if (!mis->to_src_file) { error_report("COLO incoming thread: Open QEMUFile to_src_file failed"); goto out; } /* * Note: the communication between Primary side and Secondary side * should be sequential, we set the fd to unblocked in migration incoming * coroutine, and here we are in the COLO incoming thread, so it is ok to * set the fd back to blocked. */ qemu_file_set_blocking(mis->from_src_file, true); bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); #ifdef CONFIG_REPLICATION replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } #else abort(); #endif vm_start(); trace_colo_vm_state_change("stop", "run"); qemu_mutex_unlock_iothread(); colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, &local_err); if (local_err) { goto out; } while (mis->state == MIGRATION_STATUS_COLO) { int request = 0; colo_wait_handle_message(mis->from_src_file, &request, &local_err); if (local_err) { goto out; } assert(request); if (failover_get_state() != FAILOVER_STATUS_NONE) { error_report("failover request"); goto out; } qemu_mutex_lock_iothread(); vm_stop_force_state(RUN_STATE_COLO); trace_colo_vm_state_change("run", "stop"); qemu_mutex_unlock_iothread(); /* FIXME: This is unnecessary for periodic checkpoint mode */ colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); if (local_err) { goto out; } colo_receive_check_message(mis->from_src_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { goto out; } qemu_mutex_lock_iothread(); cpu_synchronize_all_pre_loadvm(); ret = qemu_loadvm_state_main(mis->from_src_file, mis); qemu_mutex_unlock_iothread(); if (ret < 0) { error_report("Load VM's live state (ram) error"); goto out; } value = colo_receive_message_value(mis->from_src_file, COLO_MESSAGE_VMSTATE_SIZE, &local_err); if (local_err) { goto out; } /* * Read VM device state data into channel buffer, * It's better to re-use the memory allocated. * Here we need to handle the channel buffer directly. */ if (value > bioc->capacity) { bioc->capacity = value; bioc->data = g_realloc(bioc->data, bioc->capacity); } total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); if (total_size != value) { error_report("Got %" PRIu64 " VMState data, less than expected" " %" PRIu64, total_size, value); goto out; } bioc->usage = total_size; qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); if (local_err) { goto out; } qemu_mutex_lock_iothread(); vmstate_loading = true; ret = qemu_load_device_state(fb); if (ret < 0) { error_report("COLO: load device state failed"); qemu_mutex_unlock_iothread(); goto out; } #ifdef CONFIG_REPLICATION replication_get_error_all(&local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } /* discard colo disk buffer */ replication_do_checkpoint_all(&local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } #else abort(); #endif /* Notify all filters of all NIC to do checkpoint */ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } vmstate_loading = false; vm_start(); trace_colo_vm_state_change("stop", "run"); qemu_mutex_unlock_iothread(); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE); failover_request_active(NULL); goto out; } colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { goto out; } } out: vmstate_loading = false; /* Throw the unreported error message after exited from loop */ if (local_err) { error_report_err(local_err); } switch (failover_get_state()) { case FAILOVER_STATUS_NONE: qapi_event_send_colo_exit(COLO_MODE_SECONDARY, COLO_EXIT_REASON_ERROR); break; case FAILOVER_STATUS_REQUIRE: qapi_event_send_colo_exit(COLO_MODE_SECONDARY, COLO_EXIT_REASON_REQUEST); break; default: abort(); } if (fb) { qemu_fclose(fb); } /* Hope this not to be too long to loop here */ qemu_sem_wait(&mis->colo_incoming_sem); qemu_sem_destroy(&mis->colo_incoming_sem); /* Must be called after failover BH is completed */ if (mis->to_src_file) { qemu_fclose(mis->to_src_file); } migration_incoming_disable_colo(); rcu_unregister_thread(); return NULL; }
static void colo_process_checkpoint(MigrationState *s) { QIOChannelBuffer *bioc; QEMUFile *fb = NULL; int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); Error *local_err = NULL; int ret; failover_init_state(); s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file); if (!s->rp_state.from_dst_file) { error_report("Open QEMUFile from_dst_file failed"); goto out; } packets_compare_notifier.notify = colo_compare_notify_checkpoint; colo_compare_register_notifier(&packets_compare_notifier); /* * Wait for Secondary finish loading VM states and enter COLO * restore. */ colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_CHECKPOINT_READY, &local_err); if (local_err) { goto out; } bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); #ifdef CONFIG_REPLICATION replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } #else abort(); #endif vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); timer_mod(s->colo_delay_timer, current_time + s->parameters.x_checkpoint_delay); while (s->state == MIGRATION_STATUS_COLO) { if (failover_get_state() != FAILOVER_STATUS_NONE) { error_report("failover request"); goto out; } qemu_sem_wait(&s->colo_checkpoint_sem); if (s->state != MIGRATION_STATUS_COLO) { goto out; } ret = colo_do_checkpoint_transaction(s, bioc, fb); if (ret < 0) { goto out; } } out: /* Throw the unreported error message after exited from loop */ if (local_err) { error_report_err(local_err); } if (fb) { qemu_fclose(fb); } /* * There are only two reasons we can get here, some error happened * or the user triggered failover. */ switch (failover_get_state()) { case FAILOVER_STATUS_NONE: qapi_event_send_colo_exit(COLO_MODE_PRIMARY, COLO_EXIT_REASON_ERROR); break; case FAILOVER_STATUS_REQUIRE: qapi_event_send_colo_exit(COLO_MODE_PRIMARY, COLO_EXIT_REASON_REQUEST); break; default: abort(); } /* Hope this not to be too long to wait here */ qemu_sem_wait(&s->colo_exit_sem); qemu_sem_destroy(&s->colo_exit_sem); /* * It is safe to unregister notifier after failover finished. * Besides, colo_delay_timer and colo_checkpoint_sem can't be * released befor unregister notifier, or there will be use-after-free * error. */ colo_compare_unregister_notifier(&packets_compare_notifier); timer_del(s->colo_delay_timer); timer_free(s->colo_delay_timer); qemu_sem_destroy(&s->colo_checkpoint_sem); /* * Must be called after failover BH is completed, * Or the failover BH may shutdown the wrong fd that * re-used by other threads after we release here. */ if (s->rp_state.from_dst_file) { qemu_fclose(s->rp_state.from_dst_file); } }
static int colo_do_checkpoint_transaction(MigrationState *s, QIOChannelBuffer *bioc, QEMUFile *fb) { Error *local_err = NULL; int ret = -1; colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_CHECKPOINT_REPLY, &local_err); if (local_err) { goto out; } /* Reset channel-buffer directly */ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); bioc->usage = 0; qemu_mutex_lock_iothread(); if (failover_get_state() != FAILOVER_STATUS_NONE) { qemu_mutex_unlock_iothread(); goto out; } vm_stop_force_state(RUN_STATE_COLO); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("run", "stop"); /* * Failover request bh could be called after vm_stop_force_state(), * So we need check failover_request_is_active() again. */ if (failover_get_state() != FAILOVER_STATUS_NONE) { goto out; } colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); if (local_err) { goto out; } /* Disable block migration */ migrate_set_block_enabled(false, &local_err); qemu_mutex_lock_iothread(); #ifdef CONFIG_REPLICATION replication_do_checkpoint_all(&local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } #else abort(); #endif colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } /* Note: device state is saved into buffer */ ret = qemu_save_device_state(fb); qemu_mutex_unlock_iothread(); if (ret < 0) { goto out; } /* * Only save VM's live state, which not including device state. * TODO: We may need a timeout mechanism to prevent COLO process * to be blocked here. */ qemu_savevm_live_state(s->to_dst_file); qemu_fflush(fb); /* * We need the size of the VMstate data in Secondary side, * With which we can decide how much data should be read. */ colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE, bioc->usage, &local_err); if (local_err) { goto out; } qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage); qemu_fflush(s->to_dst_file); ret = qemu_file_get_error(s->to_dst_file); if (ret < 0) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_RECEIVED, &local_err); if (local_err) { goto out; } colo_receive_check_message(s->rp_state.from_dst_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { goto out; } ret = 0; qemu_mutex_lock_iothread(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); out: if (local_err) { error_report_err(local_err); } return ret; }
static int whpx_vcpu_run(CPUState *cpu) { HRESULT hr; struct whpx_state *whpx = &whpx_global; struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); int ret; whpx_vcpu_process_async_events(cpu); if (cpu->halted) { cpu->exception_index = EXCP_HLT; atomic_set(&cpu->exit_request, false); return 0; } qemu_mutex_unlock_iothread(); cpu_exec_start(cpu); do { if (cpu->vcpu_dirty) { whpx_set_registers(cpu); cpu->vcpu_dirty = false; } whpx_vcpu_pre_run(cpu); if (atomic_read(&cpu->exit_request)) { whpx_vcpu_kick(cpu); } hr = WHvRunVirtualProcessor(whpx->partition, cpu->cpu_index, &vcpu->exit_ctx, sizeof(vcpu->exit_ctx)); if (FAILED(hr)) { error_report("WHPX: Failed to exec a virtual processor," " hr=%08lx", hr); ret = -1; break; } whpx_vcpu_post_run(cpu); switch (vcpu->exit_ctx.ExitReason) { case WHvRunVpExitReasonMemoryAccess: ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess); break; case WHvRunVpExitReasonX64IoPortAccess: ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess); break; case WHvRunVpExitReasonX64InterruptWindow: vcpu->window_registered = 0; break; case WHvRunVpExitReasonX64Halt: ret = whpx_handle_halt(cpu); break; case WHvRunVpExitReasonCanceled: cpu->exception_index = EXCP_INTERRUPT; ret = 1; break; case WHvRunVpExitReasonNone: case WHvRunVpExitReasonUnrecoverableException: case WHvRunVpExitReasonInvalidVpRegisterValue: case WHvRunVpExitReasonUnsupportedFeature: case WHvRunVpExitReasonX64MsrAccess: case WHvRunVpExitReasonX64Cpuid: case WHvRunVpExitReasonException: default: error_report("WHPX: Unexpected VP exit code %d", vcpu->exit_ctx.ExitReason); whpx_get_registers(cpu); qemu_mutex_lock_iothread(); qemu_system_guest_panicked(cpu_get_crash_info(cpu)); qemu_mutex_unlock_iothread(); break; } } while (!ret); cpu_exec_end(cpu); qemu_mutex_lock_iothread(); current_cpu = cpu; atomic_set(&cpu->exit_request, false); return ret < 0; }