Example #1
0
//!
//! Defines the thread that does the actual reboot of an instance.
//!
//! @param[in] arg a transparent pointer to the argument passed to this thread handler
//!
//! @return Always return NULL
//!
static void *rebooting_thread(void *arg)
{
    char *xml = NULL;
    char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" };
    char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" };
    //    ncInstance *instance = ((ncInstance *) arg);
    ncInstance *instance = NULL;
    struct nc_state_t *nc = NULL;
    virDomainPtr dom = NULL;
    virConnectPtr conn = NULL;
    rebooting_thread_params *params = ((rebooting_thread_params *) arg);
    instance = &(params->instance);
    nc = &(params->nc);

    LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId);

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        EUCA_FREE(params);
        return NULL;
    }
    dom = virDomainLookupByName(conn, instance->instanceId);
    if (dom == NULL) {
        LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId);
        unlock_hypervisor_conn();
        EUCA_FREE(params);
        return NULL;
    }
    // obtain the most up-to-date XML for domain from libvirt
    xml = virDomainGetXMLDesc(dom, 0);
    if (xml == NULL) {
        LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId);
        virDomainFree(dom);            // release libvirt resource
        unlock_hypervisor_conn();
        EUCA_FREE(params);
        return NULL;
    }
    virDomainFree(dom);                // release libvirt resource
    unlock_hypervisor_conn();

    // try shutdown first, then kill it if uncooperative
    if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) {
        LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId);
        EUCA_FREE(params);
        return NULL;
    }
    // Add a shift to values of three of the metrics: ones that
    // drop back to zero after a reboot. The shift, which is based
    // on the latest value, ensures that values sent upstream do
    // not go backwards .
    sensor_shift_metric(instance->instanceId, "CPUUtilization");
    sensor_shift_metric(instance->instanceId, "NetworkIn");
    sensor_shift_metric(instance->instanceId, "NetworkOut");

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        EUCA_FREE(params);
        return NULL;
    }
    // domain is now shut down, create a new one with the same XML
    LOGINFO("[%s] rebooting\n", instance->instanceId);
    if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) {
        // need to sleep to allow midolman to update the VM interface
        sleep(10);
    }
    dom = virDomainCreateLinux(conn, xml, 0);
    if (dom == NULL) {
        LOGERROR("[%s] failed to restart instance\n", instance->instanceId);
        change_state(instance, SHUTOFF);
    } else {
        euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN);
        sensor_refresh_resources(resourceName, resourceAlias, 1);   // refresh stats so we set base value accurately
        virDomainFree(dom);

        if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) {
            char iface[16], cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH];
            int rc;
            snprintf(iface, 16, "vn_%s", instance->instanceId);
            
            // If this device does not have a 'brport' path, this isn't a bridge device
            snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface);
            if (!check_directory(sPath)) {
                LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface);
                snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
                rc = timeshell(cmd, obuf, ebuf, 256, 10);
                if (rc) {
                    LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n");
                }
            }

            // Repeat process for secondary interfaces as well
            for (int i=0; i < EUCA_MAX_NICS; i++) {
                if (strlen(instance->secNetCfgs[i].interfaceId) == 0)
                    continue;

                snprintf(iface, 16, "vn_%s", instance->secNetCfgs[i].interfaceId);

                // If this device does not have a 'brport' path, this isn't a bridge device
                snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface);
                if (!check_directory(sPath)) {
                    LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface);
                    snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface);
                    rc = timeshell(cmd, obuf, ebuf, 256, 10);
                    if (rc) {
                        LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n");
                    }
                }
            }
        }

    }
    EUCA_FREE(xml);

    unlock_hypervisor_conn();
    unset_corrid(get_corrid());
    EUCA_FREE(params);
    return NULL;
}
Example #2
0
//!
//! Defines the thread that does the actual migration of an instance off the source.
//!
//! @param[in] arg a transparent pointer to the argument passed to this thread handler
//!
//! @return Always return NULL
//!
static void *migrating_thread(void *arg)
{
    ncInstance *instance = ((ncInstance *) arg);
    virDomainPtr dom = NULL;
    virConnectPtr conn = NULL;
    int migration_error = 0;

    LOGTRACE("invoked for %s\n", instance->instanceId);

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot migrate instance %s (failed to connect to hypervisor), giving up and rolling back.\n", instance->instanceId, instance->instanceId);
        migration_error++;
        goto out;
    } else {
        LOGTRACE("[%s] connected to hypervisor\n", instance->instanceId);
    }

    dom = virDomainLookupByName(conn, instance->instanceId);
    if (dom == NULL) {
        LOGERROR("[%s] cannot migrate instance %s (failed to find domain), giving up and rolling back.\n", instance->instanceId, instance->instanceId);
        migration_error++;
        goto out;
    }

    char duri[1024];
    snprintf(duri, sizeof(duri), "qemu+tls://%s/system", instance->migration_dst);

    virConnectPtr dconn = NULL;

    LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri);
    dconn = virConnectOpen(duri);
    if (dconn == NULL) {
        LOGWARN("[%s] cannot migrate instance using TLS (failed to connect to remote), retrying using SSH.\n", instance->instanceId);
        snprintf(duri, sizeof(duri), "qemu+ssh://%s/system", instance->migration_dst);
        LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri);
        dconn = virConnectOpen(duri);
        if (dconn == NULL) {
            LOGERROR("[%s] cannot migrate instance using TLS or SSH (failed to connect to remote), giving up and rolling back.\n", instance->instanceId);
            migration_error++;
            goto out;
        }
    }

    LOGINFO("[%s] migrating instance\n", instance->instanceId);
    virDomain *ddom = virDomainMigrate(dom,
                                       dconn,
                                       VIR_MIGRATE_LIVE | VIR_MIGRATE_NON_SHARED_DISK,
                                       NULL,    // new name on destination (optional)
                                       NULL,    // destination URI as seen from source (optional)
                                       0L); // bandwidth limitation (0 => unlimited)
    if (ddom == NULL) {
        LOGERROR("[%s] cannot migrate instance, giving up and rolling back.\n", instance->instanceId);
        migration_error++;
        goto out;
    } else {
        LOGINFO("[%s] instance migrated\n", instance->instanceId);
    }
    virDomainFree(ddom);
    virConnectClose(dconn);

out:
    if (dom)
        virDomainFree(dom);

    if (conn != NULL)
        unlock_hypervisor_conn();

    sem_p(inst_sem);
    LOGDEBUG("%d outgoing migrations still active\n", --outgoing_migrations_in_progress);
    if (migration_error) {
        migration_rollback(instance);
    } else {
        // If this is set to NOT_MIGRATING here, it's briefly possible for
        // both the source and destination nodes to report the same instance
        // as Extant/NOT_MIGRATING, which is confusing!
        instance->migration_state = MIGRATION_CLEANING;
        save_instance_struct(instance);
        copy_instances();
    }
    sem_v(inst_sem);

    LOGDEBUG("done\n");
    unset_corrid(get_corrid());
    return NULL;
}
Example #3
0
//!
//! Defines the thread that does the actual reboot of an instance.
//!
//! @param[in] arg a transparent pointer to the argument passed to this thread handler
//!
//! @return Always return NULL
//!
static void *rebooting_thread(void *arg)
{
    char *xml = NULL;
    char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" };
    char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" };
    ncInstance *instance = ((ncInstance *) arg);
    virDomainPtr dom = NULL;
    virConnectPtr conn = NULL;

    LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId);

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        return NULL;
    }
    dom = virDomainLookupByName(conn, instance->instanceId);
    if (dom == NULL) {
        LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId);
        unlock_hypervisor_conn();
        return NULL;
    }
    // obtain the most up-to-date XML for domain from libvirt
    xml = virDomainGetXMLDesc(dom, 0);
    if (xml == NULL) {
        LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId);
        virDomainFree(dom);            // release libvirt resource
        unlock_hypervisor_conn();
        return NULL;
    }
    virDomainFree(dom);                // release libvirt resource
    unlock_hypervisor_conn();

    // try shutdown first, then kill it if uncooperative
    if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) {
        LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId);
        return NULL;
    }
    // Add a shift to values of three of the metrics: ones that
    // drop back to zero after a reboot. The shift, which is based
    // on the latest value, ensures that values sent upstream do
    // not go backwards .
    sensor_shift_metric(instance->instanceId, "CPUUtilization");
    sensor_shift_metric(instance->instanceId, "NetworkIn");
    sensor_shift_metric(instance->instanceId, "NetworkOut");

    if ((conn = lock_hypervisor_conn()) == NULL) {
        LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId);
        return NULL;
    }
    // domain is now shut down, create a new one with the same XML
    LOGINFO("[%s] rebooting\n", instance->instanceId);
    dom = virDomainCreateLinux(conn, xml, 0);
    if (dom == NULL) {
        LOGERROR("[%s] failed to restart instance\n", instance->instanceId);
        change_state(instance, SHUTOFF);
    } else {
        euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN);
        sensor_refresh_resources(resourceName, resourceAlias, 1);   // refresh stats so we set base value accurately
        virDomainFree(dom);
    }
    EUCA_FREE(xml);

    unlock_hypervisor_conn();
    unset_corrid(get_corrid());
    return NULL;
}