//! //! Defines the thread that does the actual reboot of an instance. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *rebooting_thread(void *arg) { char *xml = NULL; char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" }; char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" }; // ncInstance *instance = ((ncInstance *) arg); ncInstance *instance = NULL; struct nc_state_t *nc = NULL; virDomainPtr dom = NULL; virConnectPtr conn = NULL; rebooting_thread_params *params = ((rebooting_thread_params *) arg); instance = &(params->instance); nc = &(params->nc); LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId); unlock_hypervisor_conn(); EUCA_FREE(params); return NULL; } // obtain the most up-to-date XML for domain from libvirt xml = virDomainGetXMLDesc(dom, 0); if (xml == NULL) { LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId); virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); EUCA_FREE(params); return NULL; } virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); // try shutdown first, then kill it if uncooperative if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) { LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } // Add a shift to values of three of the metrics: ones that // drop back to zero after a reboot. The shift, which is based // on the latest value, ensures that values sent upstream do // not go backwards . sensor_shift_metric(instance->instanceId, "CPUUtilization"); sensor_shift_metric(instance->instanceId, "NetworkIn"); sensor_shift_metric(instance->instanceId, "NetworkOut"); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } // domain is now shut down, create a new one with the same XML LOGINFO("[%s] rebooting\n", instance->instanceId); if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) { // need to sleep to allow midolman to update the VM interface sleep(10); } dom = virDomainCreateLinux(conn, xml, 0); if (dom == NULL) { LOGERROR("[%s] failed to restart instance\n", instance->instanceId); change_state(instance, SHUTOFF); } else { euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN); sensor_refresh_resources(resourceName, resourceAlias, 1); // refresh stats so we set base value accurately virDomainFree(dom); if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) { char iface[16], cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH]; int rc; snprintf(iface, 16, "vn_%s", instance->instanceId); // If this device does not have a 'brport' path, this isn't a bridge device snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface); if (!check_directory(sPath)) { LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface); snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface); rc = timeshell(cmd, obuf, ebuf, 256, 10); if (rc) { LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n"); } } // Repeat process for secondary interfaces as well for (int i=0; i < EUCA_MAX_NICS; i++) { if (strlen(instance->secNetCfgs[i].interfaceId) == 0) continue; snprintf(iface, 16, "vn_%s", instance->secNetCfgs[i].interfaceId); // If this device does not have a 'brport' path, this isn't a bridge device snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface); if (!check_directory(sPath)) { LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface); snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface); rc = timeshell(cmd, obuf, ebuf, 256, 10); if (rc) { LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n"); } } } } } EUCA_FREE(xml); unlock_hypervisor_conn(); unset_corrid(get_corrid()); EUCA_FREE(params); return NULL; }
//! //! Defines the thread that does the actual migration of an instance off the source. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *migrating_thread(void *arg) { ncInstance *instance = ((ncInstance *) arg); virDomainPtr dom = NULL; virConnectPtr conn = NULL; int migration_error = 0; LOGTRACE("invoked for %s\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot migrate instance %s (failed to connect to hypervisor), giving up and rolling back.\n", instance->instanceId, instance->instanceId); migration_error++; goto out; } else { LOGTRACE("[%s] connected to hypervisor\n", instance->instanceId); } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot migrate instance %s (failed to find domain), giving up and rolling back.\n", instance->instanceId, instance->instanceId); migration_error++; goto out; } char duri[1024]; snprintf(duri, sizeof(duri), "qemu+tls://%s/system", instance->migration_dst); virConnectPtr dconn = NULL; LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri); dconn = virConnectOpen(duri); if (dconn == NULL) { LOGWARN("[%s] cannot migrate instance using TLS (failed to connect to remote), retrying using SSH.\n", instance->instanceId); snprintf(duri, sizeof(duri), "qemu+ssh://%s/system", instance->migration_dst); LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri); dconn = virConnectOpen(duri); if (dconn == NULL) { LOGERROR("[%s] cannot migrate instance using TLS or SSH (failed to connect to remote), giving up and rolling back.\n", instance->instanceId); migration_error++; goto out; } } LOGINFO("[%s] migrating instance\n", instance->instanceId); virDomain *ddom = virDomainMigrate(dom, dconn, VIR_MIGRATE_LIVE | VIR_MIGRATE_NON_SHARED_DISK, NULL, // new name on destination (optional) NULL, // destination URI as seen from source (optional) 0L); // bandwidth limitation (0 => unlimited) if (ddom == NULL) { LOGERROR("[%s] cannot migrate instance, giving up and rolling back.\n", instance->instanceId); migration_error++; goto out; } else { LOGINFO("[%s] instance migrated\n", instance->instanceId); } virDomainFree(ddom); virConnectClose(dconn); out: if (dom) virDomainFree(dom); if (conn != NULL) unlock_hypervisor_conn(); sem_p(inst_sem); LOGDEBUG("%d outgoing migrations still active\n", --outgoing_migrations_in_progress); if (migration_error) { migration_rollback(instance); } else { // If this is set to NOT_MIGRATING here, it's briefly possible for // both the source and destination nodes to report the same instance // as Extant/NOT_MIGRATING, which is confusing! instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); copy_instances(); } sem_v(inst_sem); LOGDEBUG("done\n"); unset_corrid(get_corrid()); return NULL; }
//! //! Defines the thread that does the actual reboot of an instance. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *rebooting_thread(void *arg) { char *xml = NULL; char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" }; char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" }; ncInstance *instance = ((ncInstance *) arg); virDomainPtr dom = NULL; virConnectPtr conn = NULL; LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); return NULL; } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId); unlock_hypervisor_conn(); return NULL; } // obtain the most up-to-date XML for domain from libvirt xml = virDomainGetXMLDesc(dom, 0); if (xml == NULL) { LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId); virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); return NULL; } virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); // try shutdown first, then kill it if uncooperative if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) { LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId); return NULL; } // Add a shift to values of three of the metrics: ones that // drop back to zero after a reboot. The shift, which is based // on the latest value, ensures that values sent upstream do // not go backwards . sensor_shift_metric(instance->instanceId, "CPUUtilization"); sensor_shift_metric(instance->instanceId, "NetworkIn"); sensor_shift_metric(instance->instanceId, "NetworkOut"); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); return NULL; } // domain is now shut down, create a new one with the same XML LOGINFO("[%s] rebooting\n", instance->instanceId); dom = virDomainCreateLinux(conn, xml, 0); if (dom == NULL) { LOGERROR("[%s] failed to restart instance\n", instance->instanceId); change_state(instance, SHUTOFF); } else { euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN); sensor_refresh_resources(resourceName, resourceAlias, 1); // refresh stats so we set base value accurately virDomainFree(dom); } EUCA_FREE(xml); unlock_hypervisor_conn(); unset_corrid(get_corrid()); return NULL; }