예제 #1
0
//!
//! Defines the thread that does the actual reboot of an instance.
//!
//! @param[in] arg a transparent pointer to the argument passed to this thread handler
//!
//! @return Always return NULL
//!
static void *rebooting_thread(void *arg)
{
    char *xml = NULL;
    char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" };
    char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" };
    //    ncInstance *instance = ((ncInstance *) arg);
    ncInstance *instance = NULL;
    struct nc_state_t *nc = NULL;
    virDomainPtr dom = NULL;
    virConnectPtr conn = NULL;
    rebooting_thread_params *params = ((rebooting_thread_params *) arg);
    instance = &(params->instance);
    nc = &(params->nc);

    LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId);

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        EUCA_FREE(params);
        return NULL;
    }
    dom = virDomainLookupByName(conn, instance->instanceId);
    if (dom == NULL) {
        LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId);
        unlock_hypervisor_conn();
        EUCA_FREE(params);
        return NULL;
    }
    // obtain the most up-to-date XML for domain from libvirt
    xml = virDomainGetXMLDesc(dom, 0);
    if (xml == NULL) {
        LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId);
        virDomainFree(dom);            // release libvirt resource
        unlock_hypervisor_conn();
        EUCA_FREE(params);
        return NULL;
    }
    virDomainFree(dom);                // release libvirt resource
    unlock_hypervisor_conn();

    // try shutdown first, then kill it if uncooperative
    if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) {
        LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId);
        EUCA_FREE(params);
        return NULL;
    }
    // Add a shift to values of three of the metrics: ones that
    // drop back to zero after a reboot. The shift, which is based
    // on the latest value, ensures that values sent upstream do
    // not go backwards .
    sensor_shift_metric(instance->instanceId, "CPUUtilization");
    sensor_shift_metric(instance->instanceId, "NetworkIn");
    sensor_shift_metric(instance->instanceId, "NetworkOut");

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        EUCA_FREE(params);
        return NULL;
    }
    // domain is now shut down, create a new one with the same XML
    LOGINFO("[%s] rebooting\n", instance->instanceId);
    if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) {
        // need to sleep to allow midolman to update the VM interface
        sleep(10);
    }
    dom = virDomainCreateLinux(conn, xml, 0);
    if (dom == NULL) {
        LOGERROR("[%s] failed to restart instance\n", instance->instanceId);
        change_state(instance, SHUTOFF);
    } else {
        euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN);
        sensor_refresh_resources(resourceName, resourceAlias, 1);   // refresh stats so we set base value accurately
        virDomainFree(dom);

        if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) {
            char iface[16], cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH];
            int rc;
            snprintf(iface, 16, "vn_%s", instance->instanceId);
            
            // If this device does not have a 'brport' path, this isn't a bridge device
            snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface);
            if (!check_directory(sPath)) {
                LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface);
                snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
                rc = timeshell(cmd, obuf, ebuf, 256, 10);
                if (rc) {
                    LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n");
                }
            }

            // Repeat process for secondary interfaces as well
            for (int i=0; i < EUCA_MAX_NICS; i++) {
                if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
                    continue;

                snprintf(iface, 16, "vn_%s", instance->secNetCfgs[i].interfaceId);

                // If this device does not have a 'brport' path, this isn't a bridge device
                snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface);
                if (!check_directory(sPath)) {
                    LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface);
                    snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
                    rc = timeshell(cmd, obuf, ebuf, 256, 10);
                    if (rc) {
                        LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n");
                    }
                }
            }
        }

    }
    EUCA_FREE(xml);

    unlock_hypervisor_conn();
    unset_corrid(get_corrid());
    EUCA_FREE(params);
    return NULL;
}
예제 #2
0
//!
//! Defines the thread that does the actual reboot of an instance.
//!
//! @param[in] arg a transparent pointer to the argument passed to this thread handler
//!
//! @return Always return NULL
//!
static void *rebooting_thread(void *arg)
{
#define REATTACH_RETRIES      3

    int i = 0;
    int err = 0;
    int error = 0;
    int rc = 0;
    int log_level_for_devstring = EUCATRACE;
    char *xml = NULL;
    char *remoteDevStr = NULL;
    char path[MAX_PATH] = "";
    char lpath[MAX_PATH] = "";
    char resourceName[1][MAX_SENSOR_NAME_LEN] = { {0} };
    char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { {0} };
    ncVolume *volume = NULL;
    ncInstance *instance = ((ncInstance *) arg);
    virDomainPtr dom = NULL;
    virConnectPtr *conn = NULL;

    logprintfl(EUCADEBUG, "[%s] spawning rebooting thread\n", instance->instanceId);
    if ((xml = file2str(instance->libvirtFilePath)) == NULL) {
        logprintfl(EUCAERROR, "[%s] cannot obtain instance XML file %s\n", instance->instanceId, instance->libvirtFilePath);
        return NULL;
    }

    if ((conn = check_hypervisor_conn()) == NULL) {
        logprintfl(EUCAERROR, "[%s] cannot restart instance %s, abandoning it\n", instance->instanceId, instance->instanceId);
        change_state(instance, SHUTOFF);
        EUCA_FREE(xml);
        return NULL;
    }

    sem_p(hyp_sem);
    {
        dom = virDomainLookupByName(*conn, instance->instanceId);
    }
    sem_v(hyp_sem);

    if (dom == NULL) {
        EUCA_FREE(xml);
        return NULL;
    }

    sem_p(hyp_sem);
    {
        // for KVM, must stop and restart the instance
        logprintfl(EUCADEBUG, "[%s] destroying domain\n", instance->instanceId);
        error = virDomainDestroy(dom);  // @todo change to Shutdown? is this synchronous?
        virDomainFree(dom);
    }
    sem_v(hyp_sem);

    if (error) {
        EUCA_FREE(xml);
        return NULL;
    }
    // Add a shift to values of three of the metrics: ones that
    // drop back to zero after a reboot. The shift, which is based
    // on the latest value, ensures that values sent upstream do
    // not go backwards .
    sensor_shift_metric(instance->instanceId, "CPUUtilization");
    sensor_shift_metric(instance->instanceId, "NetworkIn");
    sensor_shift_metric(instance->instanceId, "NetworkOut");

    // domain is now shut down, create a new one with the same XML
    sem_p(hyp_sem);
    {
        logprintfl(EUCAINFO, "[%s] rebooting\n", instance->instanceId);
        dom = virDomainCreateLinux(*conn, xml, 0);
    }
    sem_v(hyp_sem);
    EUCA_FREE(xml);

    euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN);
    sensor_refresh_resources(resourceName, resourceAlias, 1);   // refresh stats so we set base value accurately

    // re-attach each volume previously attached
    for (i = 0; i < EUCA_MAX_VOLUMES; ++i) {
        volume = &instance->volumes[i];
        if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING))
            continue;           // skip the entry unless attached or attaching

        logprintfl(EUCADEBUG, "[%s] volumes [%d] = '%s'\n", instance->instanceId, i, volume->stateName);

        // get credentials, decrypt them
        remoteDevStr = get_iscsi_target(volume->remoteDev);
        if (!remoteDevStr || !strstr(remoteDevStr, "/dev")) {
            logprintfl(EUCAERROR, "[%s] failed to get local name of host iscsi device when re-attaching\n", instance->instanceId);
            rc = 1;
        } else {
            // set the path
            snprintf(path, sizeof(path), EUCALYPTUS_VOLUME_XML_PATH_FORMAT, instance->instancePath, volume->volumeId);  // vol-XXX.xml
            snprintf(lpath, sizeof(lpath), EUCALYPTUS_VOLUME_LIBVIRT_XML_PATH_FORMAT, instance->instancePath, volume->volumeId);    // vol-XXX-libvirt.xml

            // read in libvirt XML, which may have been modified by the hook above
            if ((xml = file2str(lpath)) == NULL) {
                logprintfl(EUCAERROR, "[%s][%s] failed to read volume XML from %s\n", instance->instanceId, volume->volumeId, lpath);
                rc = 1;
            }
        }

        EUCA_FREE(remoteDevStr);

        if (!rc) {
            // zhill - wrap with retry in case libvirt is dumb.
            err = 0;
            for (i = 1; i < REATTACH_RETRIES; i++) {
                // protect libvirt calls because we've seen problems during concurrent libvirt invocations
                sem_p(hyp_sem);
                {
                    err = virDomainAttachDevice(dom, xml);
                }
                sem_v(hyp_sem);

                if (err) {
                    logprintfl(EUCAERROR, "[%s][%s] failed to reattach volume (attempt %d of %d)\n", instance->instanceId, volume->volumeId, i,
                               REATTACH_RETRIES);
                    logprintfl(EUCADEBUG, "[%s][%s] error from virDomainAttachDevice: %d xml: %s\n", instance->instanceId, volume->volumeId, err,
                               xml);
                    sleep(3);   // sleep a bit and retry
                } else {
                    logprintfl(EUCAINFO, "[%s][%s] volume reattached as '%s'\n", instance->instanceId, volume->volumeId, volume->localDevReal);
                    break;
                }
            }

            log_level_for_devstring = EUCATRACE;
            if (err)
                log_level_for_devstring = EUCADEBUG;
            logprintfl(log_level_for_devstring, "[%s][%s] remote device string: %s\n", instance->instanceId, volume->volumeId, volume->remoteDev);
        }

        EUCA_FREE(xml);
    }

    if (dom == NULL) {
        logprintfl(EUCAERROR, "[%s] failed to restart instance\n", instance->instanceId);
        change_state(instance, SHUTOFF);
        return NULL;
    }

    sem_p(hyp_sem);
    {
        virDomainFree(dom);
    }
    sem_v(hyp_sem);
    return NULL;

#undef REATTACH_RETRIES
}
예제 #3
0
//!
//! Defines the thread that does the actual reboot of an instance.
//!
//! @param[in] arg a transparent pointer to the argument passed to this thread handler
//!
//! @return Always return NULL
//!
static void *rebooting_thread(void *arg)
{
    char *xml = NULL;
    char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" };
    char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" };
    ncInstance *instance = ((ncInstance *) arg);
    virDomainPtr dom = NULL;
    virConnectPtr conn = NULL;

    LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId);

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        return NULL;
    }
    dom = virDomainLookupByName(conn, instance->instanceId);
    if (dom == NULL) {
        LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId);
        unlock_hypervisor_conn();
        return NULL;
    }
    // obtain the most up-to-date XML for domain from libvirt
    xml = virDomainGetXMLDesc(dom, 0);
    if (xml == NULL) {
        LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId);
        virDomainFree(dom);            // release libvirt resource
        unlock_hypervisor_conn();
        return NULL;
    }
    virDomainFree(dom);                // release libvirt resource
    unlock_hypervisor_conn();

    // try shutdown first, then kill it if uncooperative
    if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) {
        LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId);
        return NULL;
    }
    // Add a shift to values of three of the metrics: ones that
    // drop back to zero after a reboot. The shift, which is based
    // on the latest value, ensures that values sent upstream do
    // not go backwards .
    sensor_shift_metric(instance->instanceId, "CPUUtilization");
    sensor_shift_metric(instance->instanceId, "NetworkIn");
    sensor_shift_metric(instance->instanceId, "NetworkOut");

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        return NULL;
    }
    // domain is now shut down, create a new one with the same XML
    LOGINFO("[%s] rebooting\n", instance->instanceId);
    dom = virDomainCreateLinux(conn, xml, 0);
    if (dom == NULL) {
        LOGERROR("[%s] failed to restart instance\n", instance->instanceId);
        change_state(instance, SHUTOFF);
    } else {
        euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN);
        sensor_refresh_resources(resourceName, resourceAlias, 1);   // refresh stats so we set base value accurately
        virDomainFree(dom);
    }
    EUCA_FREE(xml);

    unlock_hypervisor_conn();
    unset_corrid(get_corrid());
    return NULL;
}