//! //! Defines the thread that does the actual reboot of an instance. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *rebooting_thread(void *arg) { char *xml = NULL; char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" }; char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" }; // ncInstance *instance = ((ncInstance *) arg); ncInstance *instance = NULL; struct nc_state_t *nc = NULL; virDomainPtr dom = NULL; virConnectPtr conn = NULL; rebooting_thread_params *params = ((rebooting_thread_params *) arg); instance = &(params->instance); nc = &(params->nc); LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId); unlock_hypervisor_conn(); EUCA_FREE(params); return NULL; } // obtain the most up-to-date XML for domain from libvirt xml = virDomainGetXMLDesc(dom, 0); if (xml == NULL) { LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId); virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); EUCA_FREE(params); return NULL; } virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); // try shutdown first, then kill it if uncooperative if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) { LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } // Add a shift to values of three of the metrics: ones that // drop back to zero after a reboot. The shift, which is based // on the latest value, ensures that values sent upstream do // not go backwards . sensor_shift_metric(instance->instanceId, "CPUUtilization"); sensor_shift_metric(instance->instanceId, "NetworkIn"); sensor_shift_metric(instance->instanceId, "NetworkOut"); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } // domain is now shut down, create a new one with the same XML LOGINFO("[%s] rebooting\n", instance->instanceId); if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) { // need to sleep to allow midolman to update the VM interface sleep(10); } dom = virDomainCreateLinux(conn, xml, 0); if (dom == NULL) { LOGERROR("[%s] failed to restart instance\n", instance->instanceId); change_state(instance, SHUTOFF); } else { euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN); sensor_refresh_resources(resourceName, resourceAlias, 1); // refresh stats so we set base value accurately virDomainFree(dom); if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) { char iface[16], cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH]; int rc; snprintf(iface, 16, "vn_%s", instance->instanceId); // If this device does not have a 'brport' path, this isn't a bridge device snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface); if (!check_directory(sPath)) { LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface); snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface); rc = timeshell(cmd, obuf, ebuf, 256, 10); if (rc) { LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n"); } } // Repeat process for secondary interfaces as well for (int i=0; i < EUCA_MAX_NICS; i++) { if (strlen(instance->secNetCfgs[i].interfaceId) == 0) continue; snprintf(iface, 16, "vn_%s", instance->secNetCfgs[i].interfaceId); // If this device does not have a 'brport' path, this isn't a bridge device snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface); if (!check_directory(sPath)) { LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface); snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface); rc = timeshell(cmd, obuf, ebuf, 256, 10); if (rc) { LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n"); } } } } } EUCA_FREE(xml); unlock_hypervisor_conn(); unset_corrid(get_corrid()); EUCA_FREE(params); return NULL; }
//! //! Defines the thread that does the actual reboot of an instance. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *rebooting_thread(void *arg) { #define REATTACH_RETRIES 3 int i = 0; int err = 0; int error = 0; int rc = 0; int log_level_for_devstring = EUCATRACE; char *xml = NULL; char *remoteDevStr = NULL; char path[MAX_PATH] = ""; char lpath[MAX_PATH] = ""; char resourceName[1][MAX_SENSOR_NAME_LEN] = { {0} }; char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { {0} }; ncVolume *volume = NULL; ncInstance *instance = ((ncInstance *) arg); virDomainPtr dom = NULL; virConnectPtr *conn = NULL; logprintfl(EUCADEBUG, "[%s] spawning rebooting thread\n", instance->instanceId); if ((xml = file2str(instance->libvirtFilePath)) == NULL) { logprintfl(EUCAERROR, "[%s] cannot obtain instance XML file %s\n", instance->instanceId, instance->libvirtFilePath); return NULL; } if ((conn = check_hypervisor_conn()) == NULL) { logprintfl(EUCAERROR, "[%s] cannot restart instance %s, abandoning it\n", instance->instanceId, instance->instanceId); change_state(instance, SHUTOFF); EUCA_FREE(xml); return NULL; } sem_p(hyp_sem); { dom = virDomainLookupByName(*conn, instance->instanceId); } sem_v(hyp_sem); if (dom == NULL) { EUCA_FREE(xml); return NULL; } sem_p(hyp_sem); { // for KVM, must stop and restart the instance logprintfl(EUCADEBUG, "[%s] destroying domain\n", instance->instanceId); error = virDomainDestroy(dom); // @todo change to Shutdown? is this synchronous? virDomainFree(dom); } sem_v(hyp_sem); if (error) { EUCA_FREE(xml); return NULL; } // Add a shift to values of three of the metrics: ones that // drop back to zero after a reboot. The shift, which is based // on the latest value, ensures that values sent upstream do // not go backwards . sensor_shift_metric(instance->instanceId, "CPUUtilization"); sensor_shift_metric(instance->instanceId, "NetworkIn"); sensor_shift_metric(instance->instanceId, "NetworkOut"); // domain is now shut down, create a new one with the same XML sem_p(hyp_sem); { logprintfl(EUCAINFO, "[%s] rebooting\n", instance->instanceId); dom = virDomainCreateLinux(*conn, xml, 0); } sem_v(hyp_sem); EUCA_FREE(xml); euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN); sensor_refresh_resources(resourceName, resourceAlias, 1); // refresh stats so we set base value accurately // re-attach each volume previously attached for (i = 0; i < EUCA_MAX_VOLUMES; ++i) { volume = &instance->volumes[i]; if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching logprintfl(EUCADEBUG, "[%s] volumes [%d] = '%s'\n", instance->instanceId, i, volume->stateName); // get credentials, decrypt them remoteDevStr = get_iscsi_target(volume->remoteDev); if (!remoteDevStr || !strstr(remoteDevStr, "/dev")) { logprintfl(EUCAERROR, "[%s] failed to get local name of host iscsi device when re-attaching\n", instance->instanceId); rc = 1; } else { // set the path snprintf(path, sizeof(path), EUCALYPTUS_VOLUME_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX.xml snprintf(lpath, sizeof(lpath), EUCALYPTUS_VOLUME_LIBVIRT_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX-libvirt.xml // read in libvirt XML, which may have been modified by the hook above if ((xml = file2str(lpath)) == NULL) { logprintfl(EUCAERROR, "[%s][%s] failed to read volume XML from %s\n", instance->instanceId, volume->volumeId, lpath); rc = 1; } } EUCA_FREE(remoteDevStr); if (!rc) { // zhill - wrap with retry in case libvirt is dumb. err = 0; for (i = 1; i < REATTACH_RETRIES; i++) { // protect libvirt calls because we've seen problems during concurrent libvirt invocations sem_p(hyp_sem); { err = virDomainAttachDevice(dom, xml); } sem_v(hyp_sem); if (err) { logprintfl(EUCAERROR, "[%s][%s] failed to reattach volume (attempt %d of %d)\n", instance->instanceId, volume->volumeId, i, REATTACH_RETRIES); logprintfl(EUCADEBUG, "[%s][%s] error from virDomainAttachDevice: %d xml: %s\n", instance->instanceId, volume->volumeId, err, xml); sleep(3); // sleep a bit and retry } else { logprintfl(EUCAINFO, "[%s][%s] volume reattached as '%s'\n", instance->instanceId, volume->volumeId, volume->localDevReal); break; } } log_level_for_devstring = EUCATRACE; if (err) log_level_for_devstring = EUCADEBUG; logprintfl(log_level_for_devstring, "[%s][%s] remote device string: %s\n", instance->instanceId, volume->volumeId, volume->remoteDev); } EUCA_FREE(xml); } if (dom == NULL) { logprintfl(EUCAERROR, "[%s] failed to restart instance\n", instance->instanceId); change_state(instance, SHUTOFF); return NULL; } sem_p(hyp_sem); { virDomainFree(dom); } sem_v(hyp_sem); return NULL; #undef REATTACH_RETRIES }
//! //! Defines the thread that does the actual reboot of an instance. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *rebooting_thread(void *arg) { char *xml = NULL; char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" }; char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" }; ncInstance *instance = ((ncInstance *) arg); virDomainPtr dom = NULL; virConnectPtr conn = NULL; LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); return NULL; } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId); unlock_hypervisor_conn(); return NULL; } // obtain the most up-to-date XML for domain from libvirt xml = virDomainGetXMLDesc(dom, 0); if (xml == NULL) { LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId); virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); return NULL; } virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); // try shutdown first, then kill it if uncooperative if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) { LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId); return NULL; } // Add a shift to values of three of the metrics: ones that // drop back to zero after a reboot. The shift, which is based // on the latest value, ensures that values sent upstream do // not go backwards . sensor_shift_metric(instance->instanceId, "CPUUtilization"); sensor_shift_metric(instance->instanceId, "NetworkIn"); sensor_shift_metric(instance->instanceId, "NetworkOut"); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); return NULL; } // domain is now shut down, create a new one with the same XML LOGINFO("[%s] rebooting\n", instance->instanceId); dom = virDomainCreateLinux(conn, xml, 0); if (dom == NULL) { LOGERROR("[%s] failed to restart instance\n", instance->instanceId); change_state(instance, SHUTOFF); } else { euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN); sensor_refresh_resources(resourceName, resourceAlias, 1); // refresh stats so we set base value accurately virDomainFree(dom); } EUCA_FREE(xml); unlock_hypervisor_conn(); unset_corrid(get_corrid()); return NULL; }