//! //! Handles the reboot request of an instance. //! //! @param[in] nc a pointer to the NC state structure to initialize //! @param[in] pMeta a pointer to the node controller (NC) metadata structure //! @param[in] instanceId the instance identifier string (i-XXXXXXXX) //! //! @return EUCA_OK on success or proper error code. Known error code returned include: //! EUCA_ERROR, EUCA_NOT_FOUND_ERROR, and EUCA_FATAL_ERROR. //! static int doRebootInstance(struct nc_state_t *nc, ncMetadata * pMeta, char *instanceId) { pthread_t tcb = { 0 }; ncInstance *instance = NULL; rebooting_thread_params *params = NULL; sem_p(inst_sem); { instance = find_instance(&global_instances, instanceId); } sem_v(inst_sem); if (instance == NULL) { LOGERROR("[%s] cannot find instance\n", instanceId); return (EUCA_NOT_FOUND_ERROR); } params = EUCA_ZALLOC(1, sizeof(rebooting_thread_params)); memcpy(&(params->instance), instance, sizeof(ncInstance)); memcpy(&(params->nc), nc, sizeof(struct nc_state_t)); // since shutdown/restart may take a while, we do them in a thread if (pthread_create(&tcb, NULL, rebooting_thread, params)) { LOGERROR("[%s] failed to spawn a reboot thread\n", instanceId); return (EUCA_FATAL_ERROR); } set_corrid_pthread(get_corrid() != NULL ? get_corrid()->correlation_id : NULL, tcb); if (pthread_detach(tcb)) { LOGERROR("[%s] failed to detach the rebooting thread\n", instanceId); return (EUCA_FATAL_ERROR); } return (EUCA_OK); }
//! //! Refactored logic for detaching a local iSCSI device and unexporting the volume. To be invoked only by other functions in this file after acquiring the necessary lock. //! //! @param[in] sc_url - The URL to reach the cluster's SC at. //! @param[in] use_ws_sec - boolean to determine use of WS-SEC. //! @param[in] ws_sec_policy_file - Policy file path for WS-SEC //! @param[in] vol_data - The ebs_volume_data struct pointer //! @param[in] connect_string - The connection string to use for local connection //! @param[in] local_ip - The local host's external IP //! @param[in] local_iqn - The local host's IQN //! @param[in] do_rescan - Set to false to indicate no rescan should be done on disconnect, or true to use rescan //! //! @return EUCA_OK on success, EUCA_ERROR on failure //! //! @pre //! //! @post //! //! @note should be invoked only by functions in this file that acquired the necessary lock. //! static int cleanup_volume_attachment(char *sc_url, int use_ws_sec, char *ws_sec_policy_file, ebs_volume_data * vol_data, char *connect_string, char *local_ip, char *local_iqn, int do_rescan) { int rc = 0; char *reencrypted_token = NULL; char *refreshedDev = NULL; if (vol_data == NULL || connect_string == NULL || local_ip == NULL || local_iqn == NULL) { LOGERROR("Cannot cleanup volume attachment. Got NULL input parameters.\n"); return EUCA_ERROR; } LOGDEBUG("[%s] attempting to disconnect iscsi target\n", vol_data->volumeId); if (disconnect_iscsi_target(connect_string, do_rescan) != 0) { LOGERROR("[%s] failed to disconnect iscsi target\n", vol_data->volumeId); LOGDEBUG("Skipping SC Call due to previous errors\n"); return EUCA_ERROR; } if (sc_url == NULL || strlen(sc_url) <= 0) { LOGERROR("[%s] Cannot invoke SC UnexportVolume, SC URL is invalid\n", vol_data->volumeId); return EUCA_ERROR; } rc = re_encrypt_token(vol_data->token, &reencrypted_token); if (rc != EUCA_OK || reencrypted_token == NULL || strlen(reencrypted_token) <= 0) { LOGERROR("Failed on re-encryption of token for call to SC\n"); if (reencrypted_token != NULL) { EUCA_FREE(reencrypted_token); } return EUCA_ERROR; } else { LOGTRACE("Re-encrypted token for %s is %s\n", vol_data->volumeId, reencrypted_token); } threadCorrelationId* corr_id = get_corrid(); LOGTRACE("Calling scClientCall with url: %s and token %s\n", sc_url, vol_data->token); if (scClientCall( corr_id == NULL ? NULL : corr_id->correlation_id, NULL, use_ws_sec, ws_sec_policy_file, request_timeout_sec, sc_url, "UnexportVolume", vol_data->volumeId, reencrypted_token, local_ip, local_iqn) != EUCA_OK) { EUCA_FREE(reencrypted_token); LOGERROR("ERROR unexporting volume %s\n", vol_data->volumeId); return EUCA_ERROR; } else { EUCA_FREE(reencrypted_token); //Ok, now refresh local session to be sure it's gone. //Should return error of not found. refreshedDev = get_iscsi_target(connect_string); if (refreshedDev) { //Failure, should have NULL. return EUCA_ERROR; } else { //We're good return EUCA_OK; } } }
//! //! Handles the instance migration request. //! //! @param[in] nc a pointer to the node controller (NC) state //! @param[in] pMeta a pointer to the node controller (NC) metadata structure //! @param[in] instances metadata for the instance to migrate to destination //! @param[in] instancesLen number of instances in the instance list //! @param[in] action IP of the destination Node Controller //! @param[in] credentials credentials that enable the migration //! //! @return EUCA_OK on success or EUCA_*ERROR on failure //! //! @pre //! //! @post static int doMigrateInstances(struct nc_state_t *nc, ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials, char ** resourceLocations, int resourceLocationsLen) { int ret = EUCA_OK; int credentials_prepared = 0; char *libvirt_xml_modified = NULL; if (instancesLen <= 0) { LOGERROR("called with invalid instancesLen (%d)\n", instancesLen); pMeta->replyString = strdup("internal error (invalid instancesLen)"); return (EUCA_INVALID_ERROR); } LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen); for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { LOGDEBUG("verifying instance # %d...\n", inst_idx); if (instances[inst_idx]) { ncInstance *instance_idx = instances[inst_idx]; LOGDEBUG("[%s] proposed migration action '%s' (%s > %s) [creds=%s]\n", SP(instance_idx->instanceId), SP(action), SP(instance_idx->migration_src), SP(instance_idx->migration_dst), (instance_idx->migration_credentials == NULL) ? "UNSET" : "present"); } else { pMeta->replyString = strdup("internal error (instance count mismatch)"); LOGERROR("Mismatch between migration instance count (%d) and length of instance list\n", instancesLen); return (EUCA_ERROR); } } // TO-DO: Optimize the location of this loop, placing it inside various conditionals below? for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { ncInstance *instance_req = instances[inst_idx]; char *sourceNodeName = instance_req->migration_src; char *destNodeName = instance_req->migration_dst; LOGDEBUG("[%s] processing instance # %d (%s > %s)\n", instance_req->instanceId, inst_idx, instance_req->migration_src, instance_req->migration_dst); // this is a call to the source of migration if (!strcmp(pMeta->nodeName, sourceNodeName)) { // locate the instance structure ncInstance *instance; sem_p(inst_sem); { instance = find_instance(&global_instances, instance_req->instanceId); } sem_v(inst_sem); if (instance == NULL) { LOGERROR("[%s] cannot find instance\n", instance_req->instanceId); pMeta->replyString = strdup("failed to locate instance to migrate"); return (EUCA_NOT_FOUND_ERROR); } if (strcmp(action, "prepare") == 0) { sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); instance->migrationTime = time(NULL); update_resource_locations(&(instance->params), resourceLocations, resourceLocationsLen); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // Establish migration-credential keys if this is the first instance preparation for this host. LOGINFO("[%s] migration source preparing %s > %s [creds=%s]\n", SP(instance->instanceId), SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); if (!credentials_prepared) { if (generate_migration_keys(sourceNodeName, credentials, TRUE, instance) != EUCA_OK) { pMeta->replyString = strdup("internal error (migration credentials generation failed)"); return (EUCA_SYSTEM_ERROR); } else { credentials_prepared++; } } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; save_instance_struct(instance); copy_instances(); sem_v(inst_sem); } else if (strcmp(action, "commit") == 0) { sem_p(inst_sem); if (instance->migration_state == MIGRATION_IN_PROGRESS) { LOGWARN("[%s] duplicate request to migration source to initiate %s > %s (already migrating)\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_v(inst_sem); return (EUCA_DUPLICATE_ERROR); } else if (instance->migration_state != MIGRATION_READY) { LOGERROR("[%s] request to commit migration %s > %s when source migration_state='%s' (not 'ready')\n", instance->instanceId, SP(sourceNodeName), SP(destNodeName), migration_state_names[instance->migration_state]); sem_v(inst_sem); return (EUCA_UNSUPPORTED_ERROR); } instance->migration_state = MIGRATION_IN_PROGRESS; outgoing_migrations_in_progress++; LOGINFO("[%s] migration source initiating %s > %s [creds=%s] (1 of %d active outgoing migrations)\n", instance->instanceId, instance->migration_src, instance->migration_dst, (instance->migration_credentials == NULL) ? "UNSET" : "present", outgoing_migrations_in_progress); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // since migration may take a while, we do them in a thread pthread_t tcb = { 0 }; if (pthread_create(&tcb, NULL, migrating_thread, (void *)instance)) { LOGERROR("[%s] failed to spawn a migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } set_corrid_pthread(get_corrid() != NULL ? get_corrid()->correlation_id : NULL, tcb); if (pthread_detach(tcb)) { LOGERROR("[%s] failed to detach the migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } } else if (strcmp(action, "rollback") == 0) { if ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING)) { LOGINFO("[%s] rolling back migration (%s > %s) on source\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_p(inst_sem); migration_rollback(instance); sem_v(inst_sem); } else { LOGINFO("[%s] ignoring request to roll back migration on source with instance in state %s(%s) -- duplicate rollback request?\n", instance->instanceId, instance->stateName, migration_state_names[instance->migration_state]); } } else { LOGERROR("[%s] action '%s' is not valid\n", instance->instanceId, action); return (EUCA_INVALID_ERROR); } } else if (!strcmp(pMeta->nodeName, destNodeName)) { // this is a migrate request to destination if (!strcmp(action, "commit")) { LOGERROR("[%s] action '%s' for migration (%s > %s) is not valid on destination node\n", instance_req->instanceId, action, SP(sourceNodeName), SP(destNodeName)); return (EUCA_UNSUPPORTED_ERROR); } else if (!strcmp(action, "rollback")) { LOGINFO("[%s] rolling back migration (%s > %s) on destination\n", instance_req->instanceId, SP(sourceNodeName), SP(destNodeName)); sem_p(inst_sem); { ncInstance *instance = find_instance(&global_instances, instance_req->instanceId); if (instance != NULL) { LOGDEBUG("[%s] marked for cleanup\n", instance->instanceId); change_state(instance, SHUTOFF); instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); } } sem_v(inst_sem); return EUCA_OK; } else if (strcmp(action, "prepare") != 0) { LOGERROR("[%s] action '%s' is not valid or not implemented\n", instance_req->instanceId, action); return (EUCA_INVALID_ERROR); } // Everything from here on is specific to "prepare" on a destination. // allocate a new instance struct ncInstance *instance = clone_instance(instance_req); if (instance == NULL) { LOGERROR("[%s] could not allocate instance struct\n", instance_req->instanceId); goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; instance->migrationTime = time(NULL); //In preparing state, so set migrationTime. euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); update_resource_locations(&(instance->params), resourceLocations, resourceLocationsLen); sem_v(inst_sem); // Establish migration-credential keys. LOGINFO("[%s] migration destination preparing %s > %s [creds=%s]\n", instance->instanceId, SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); // First, call config-file modification script to authorize source node. LOGDEBUG("[%s] authorizing migration source node %s\n", instance->instanceId, instance->migration_src); if (authorize_migration_keys("-a", instance->migration_src, instance->migration_credentials, instance, TRUE) != EUCA_OK) { goto failed_dest; } // Then, generate keys and restart libvirtd. if (generate_migration_keys(instance->migration_dst, instance->migration_credentials, TRUE, instance) != EUCA_OK) { goto failed_dest; } int error; //Fix for EUCA-10433, need instance struct in global_instances prior to doing volume ops //The monitor thread will now pick up the instance, so the migrationTime must be set sem_p(inst_sem); save_instance_struct(instance); error = add_instance(&global_instances, instance); copy_instances(); sem_v(inst_sem); if (error) { if (error == EUCA_DUPLICATE_ERROR) { LOGINFO("[%s] instance struct already exists (from previous migration?), deleting and re-adding...\n", instance->instanceId); error = remove_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (remove) instance struct, failing...\n", instance->instanceId); goto failed_dest; } error = add_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (add) instance struct, failing...\n", instance->instanceId); goto failed_dest; } } else { LOGERROR("[%s] could not add instance struct, failing...\n", instance->instanceId); goto failed_dest; } } if (vbr_parse(&(instance->params), pMeta) != EUCA_OK) { goto failed_dest; } // set up networking char brname[IF_NAME_LEN] = ""; if (!strcmp(nc->pEucaNet->sMode, NETMODE_MANAGED)) { snprintf(brname, IF_NAME_LEN, "%s", instance->groupIds[0]); } else { snprintf(brname, IF_NAME_LEN, "%s", nc->pEucaNet->sBridgeDevice); } euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName)); // TODO: move stuff in startup_thread() into a function? set_instance_params(instance); if ((error = create_instance_backing(instance, TRUE)) // create files that back the disks || (error = gen_instance_xml(instance)) // create euca-specific instance XML file || (error = gen_libvirt_instance_xml(instance))) { // transform euca-specific XML into libvirt XML LOGERROR("[%s] failed to prepare images for migrating instance (error=%d)\n", instance->instanceId, error); goto failed_dest; } // attach any volumes for (int v = 0; v < EUCA_MAX_VOLUMES; v++) { ncVolume *volume = &instance->volumes[v]; if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching LOGDEBUG("[%s] volumes [%d] = '%s'\n", instance->instanceId, v, volume->stateName); ebs_volume_data *vol_data = NULL; char *libvirt_xml = NULL; char serial[128]; char bus[16]; set_serial_and_bus(volume->volumeId, volume->devName, serial, sizeof(serial), bus, sizeof(bus)); if ((ret = connect_ebs(volume->devName, serial, bus, nc, instance->instanceId, volume->volumeId, volume->attachmentToken, &libvirt_xml, &vol_data)) != EUCA_OK) { goto unroll; } // update the volume struct with connection string obtained from SC euca_strncpy(volume->connectionString, vol_data->connect_string, sizeof(volume->connectionString)); // save volume info into vol-XXX-libvirt.xml for future detach if (create_vol_xml(instance->instanceId, volume->volumeId, libvirt_xml, &libvirt_xml_modified) != EUCA_OK) { goto unroll; } continue; unroll: ret = EUCA_ERROR; // @TODO: unroll all previous ones // for (int uv = v - 1; uv >= 0; uv--) { // disconnect_ebs(nc, instance->instanceId, volume->volumeId, ) // } goto failed_dest; } // build any secondary network interface xml files for (int w=0; w < EUCA_MAX_NICS; w++) { if (strlen(instance->secNetCfgs[w].interfaceId) == 0) continue; gen_libvirt_nic_xml(instance->instancePath, instance->secNetCfgs[w].interfaceId); } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; instance->migrationTime = 0; //Reset the timer, to ensure monitoring thread handles this properly. This is required when setting BOOTING state instance->bootTime = time(NULL); // otherwise nc_state.booting_cleanup_threshold will kick in change_state(instance, BOOTING); // not STAGING, since in that mode we don't poll hypervisor for info LOGINFO("[%s] migration destination ready %s > %s\n", instance->instanceId, instance->migration_src, instance->migration_dst); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); continue; failed_dest: sem_p(inst_sem); // Just making sure... if (instance != NULL) { LOGERROR("[%s] setting instance to Teardown(cleaning) after destination failure to prepare for migration\n", instance->instanceId); // Set state to Teardown(cleaning) so source won't wait until timeout to roll back. instance->migration_state = MIGRATION_CLEANING; instance->terminationTime = time(NULL); change_state(instance, TEARDOWN); save_instance_struct(instance); add_instance(&global_instances, instance); // OK if this fails--that should mean it's already been added. copy_instances(); } // If no remaining incoming or pending migrations, deauthorize all clients. // TO-DO: Consolidate with similar sequence in handlers.c into a utility function? if (!incoming_migrations_in_progress) { int incoming_migrations_pending = 0; LOGINFO("[%s] no remaining active incoming migrations -- checking to see if there are any pending migrations\n", instance->instanceId); bunchOfInstances *head = NULL; for (head = global_instances; head; head = head->next) { if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) { LOGINFO("[%s] is pending migration, state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId, migration_state_names[head->instance->migration_state]); incoming_migrations_pending++; } } // TO-DO: Add belt and suspenders? if (!incoming_migrations_pending) { LOGINFO("[%s] no remaining incoming or pending migrations -- deauthorizing all migration client keys\n", instance->instanceId); authorize_migration_keys("-D -r", NULL, NULL, NULL, FALSE); } } sem_v(inst_sem); // Set to generic EUCA_ERROR unless already set to a more-specific error. if (ret == EUCA_OK) { ret = EUCA_ERROR; } } else { LOGERROR("unexpected migration request (node %s is neither source nor destination)\n", pMeta->nodeName); ret = EUCA_ERROR; } } return ret; }
//! //! Defines the thread that does the actual migration of an instance off the source. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *migrating_thread(void *arg) { ncInstance *instance = ((ncInstance *) arg); virDomainPtr dom = NULL; virConnectPtr conn = NULL; int migration_error = 0; LOGTRACE("invoked for %s\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot migrate instance %s (failed to connect to hypervisor), giving up and rolling back.\n", instance->instanceId, instance->instanceId); migration_error++; goto out; } else { LOGTRACE("[%s] connected to hypervisor\n", instance->instanceId); } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot migrate instance %s (failed to find domain), giving up and rolling back.\n", instance->instanceId, instance->instanceId); migration_error++; goto out; } char duri[1024]; snprintf(duri, sizeof(duri), "qemu+tls://%s/system", instance->migration_dst); virConnectPtr dconn = NULL; LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri); dconn = virConnectOpen(duri); if (dconn == NULL) { LOGWARN("[%s] cannot migrate instance using TLS (failed to connect to remote), retrying using SSH.\n", instance->instanceId); snprintf(duri, sizeof(duri), "qemu+ssh://%s/system", instance->migration_dst); LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri); dconn = virConnectOpen(duri); if (dconn == NULL) { LOGERROR("[%s] cannot migrate instance using TLS or SSH (failed to connect to remote), giving up and rolling back.\n", instance->instanceId); migration_error++; goto out; } } LOGINFO("[%s] migrating instance\n", instance->instanceId); virDomain *ddom = virDomainMigrate(dom, dconn, VIR_MIGRATE_LIVE | VIR_MIGRATE_NON_SHARED_DISK, NULL, // new name on destination (optional) NULL, // destination URI as seen from source (optional) 0L); // bandwidth limitation (0 => unlimited) if (ddom == NULL) { LOGERROR("[%s] cannot migrate instance, giving up and rolling back.\n", instance->instanceId); migration_error++; goto out; } else { LOGINFO("[%s] instance migrated\n", instance->instanceId); } virDomainFree(ddom); virConnectClose(dconn); out: if (dom) virDomainFree(dom); if (conn != NULL) unlock_hypervisor_conn(); sem_p(inst_sem); LOGDEBUG("%d outgoing migrations still active\n", --outgoing_migrations_in_progress); if (migration_error) { migration_rollback(instance); } else { // If this is set to NOT_MIGRATING here, it's briefly possible for // both the source and destination nodes to report the same instance // as Extant/NOT_MIGRATING, which is confusing! instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); copy_instances(); } sem_v(inst_sem); LOGDEBUG("done\n"); unset_corrid(get_corrid()); return NULL; }
//! //! Defines the thread that does the actual reboot of an instance. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *rebooting_thread(void *arg) { char *xml = NULL; char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" }; char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" }; // ncInstance *instance = ((ncInstance *) arg); ncInstance *instance = NULL; struct nc_state_t *nc = NULL; virDomainPtr dom = NULL; virConnectPtr conn = NULL; rebooting_thread_params *params = ((rebooting_thread_params *) arg); instance = &(params->instance); nc = &(params->nc); LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId); unlock_hypervisor_conn(); EUCA_FREE(params); return NULL; } // obtain the most up-to-date XML for domain from libvirt xml = virDomainGetXMLDesc(dom, 0); if (xml == NULL) { LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId); virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); EUCA_FREE(params); return NULL; } virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); // try shutdown first, then kill it if uncooperative if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) { LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } // Add a shift to values of three of the metrics: ones that // drop back to zero after a reboot. The shift, which is based // on the latest value, ensures that values sent upstream do // not go backwards . sensor_shift_metric(instance->instanceId, "CPUUtilization"); sensor_shift_metric(instance->instanceId, "NetworkIn"); sensor_shift_metric(instance->instanceId, "NetworkOut"); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); EUCA_FREE(params); return NULL; } // domain is now shut down, create a new one with the same XML LOGINFO("[%s] rebooting\n", instance->instanceId); if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) { // need to sleep to allow midolman to update the VM interface sleep(10); } dom = virDomainCreateLinux(conn, xml, 0); if (dom == NULL) { LOGERROR("[%s] failed to restart instance\n", instance->instanceId); change_state(instance, SHUTOFF); } else { euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN); sensor_refresh_resources(resourceName, resourceAlias, 1); // refresh stats so we set base value accurately virDomainFree(dom); if (!strcmp(nc->pEucaNet->sMode, NETMODE_VPCMIDO)) { char iface[16], cmd[EUCA_MAX_PATH], obuf[256], ebuf[256], sPath[EUCA_MAX_PATH]; int rc; snprintf(iface, 16, "vn_%s", instance->instanceId); // If this device does not have a 'brport' path, this isn't a bridge device snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface); if (!check_directory(sPath)) { LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface); snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface); rc = timeshell(cmd, obuf, ebuf, 256, 10); if (rc) { LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n"); } } // Repeat process for secondary interfaces as well for (int i=0; i < EUCA_MAX_NICS; i++) { if (strlen(instance->secNetCfgs[i].interfaceId) == 0) continue; snprintf(iface, 16, "vn_%s", instance->secNetCfgs[i].interfaceId); // If this device does not have a 'brport' path, this isn't a bridge device snprintf(sPath, EUCA_MAX_PATH, "/sys/class/net/%s/brport/", iface); if (!check_directory(sPath)) { LOGDEBUG("[%s] removing instance interface %s from host bridge\n", instance->instanceId, iface); snprintf(cmd, EUCA_MAX_PATH, "%s brctl delif %s %s", nc->rootwrap_cmd_path, instance->params.guestNicDeviceName, iface); rc = timeshell(cmd, obuf, ebuf, 256, 10); if (rc) { LOGERROR("unable to remove instance interface from bridge after launch: instance will not be able to connect to midonet (will not connect to network): check bridge/libvirt/kvm health\n"); } } } } } EUCA_FREE(xml); unlock_hypervisor_conn(); unset_corrid(get_corrid()); EUCA_FREE(params); return NULL; }
//! //! Connects an EBS volume to the local host as an ISCSI block device. The resulting device is placed in *result_device //! //! @param[in] sc_url - The URL to reach the cluster's SC at. //! @param[in] attachment_token - The attachment token/volume string received in the attach request (or vbr) //! @param[in] use_ws_sec - Boolean to use WS-SEC on SC call. //! @param[in] ws_sec_policy_file - Policy file for WS-SEC on SC call. //! @param[in] local_ip - External IP of the local host. Will be used to send to SC for authorization //! @param[in] local_iqn - IQN of local host for sending to SC for authorization. //! @param[out] result_device - The block device that is the EBS volume connected to local host, will be populated on success. //! @param[out] vol_data - The populated ebs_volume_data struct that hold volume info (id, connect string, token, etc) //! //! @return EUCA_OK on success, EUCA_ERROR on failure. //! //! @pre //! //! @post //! //! @note //! int connect_ebs_volume(char *sc_url, char *attachment_token, int use_ws_sec, char *ws_sec_policy_file, char *local_ip, char *local_iqn, char **result_device, ebs_volume_data ** vol_data) { int ret = EUCA_OK; char *reencrypted_token = NULL; char *connect_string = NULL; char *dev = NULL; int do_rescan = 1; if (sc_url == NULL || strlen(sc_url) == 0 || attachment_token == NULL || local_ip == NULL || local_iqn == NULL) { LOGERROR("Cannont connect ebs volume. Got NULL input parameters.\n"); return EUCA_ERROR; } if (deserialize_volume(attachment_token, vol_data) != EUCA_OK || *vol_data == NULL) { LOGERROR("Failed parsing volume string %s\n", attachment_token); ret = EUCA_ERROR; return ret; } if (strlen((*vol_data)->volumeId) == 0 || (*vol_data)->token == NULL || strlen((*vol_data)->token) == 0) { LOGERROR("After deserializing volume string, still found null or empty volumeId or token"); ret = EUCA_ERROR; return ret; } LOGTRACE("Parsed volume info: volumeId=%s, encrypted token=%s\n", (*vol_data)->volumeId, (*vol_data)->token); if (re_encrypt_token((*vol_data)->token, &reencrypted_token) != EUCA_OK || reencrypted_token == NULL || strlen(reencrypted_token) <= 0) { LOGERROR("Failed on re-encryption of token for call to SC\n"); if (reencrypted_token != NULL) { EUCA_FREE(reencrypted_token); } return EUCA_ERROR; } LOGTRACE("Requesting volume lock\n"); sem_p(vol_sem); //Acquire the lock, after this, failure requires 'goto release' for release of lock LOGTRACE("Got volume lock\n"); LOGTRACE("Calling ExportVolume on SC at %s\n", sc_url); threadCorrelationId* corr_id = get_corrid(); if (scClientCall(corr_id!=NULL ? corr_id->correlation_id: NULL, NULL, use_ws_sec, ws_sec_policy_file, request_timeout_sec, sc_url, "ExportVolume", (*vol_data)->volumeId, reencrypted_token, local_ip, local_iqn, &connect_string) != EUCA_OK) { LOGERROR("Failed to get connection information for volume %s from storage controller at: %s\n", (*vol_data)->volumeId, sc_url); ret = EUCA_ERROR; goto release; } else { if (euca_strncpy((*vol_data)->connect_string, connect_string, EBS_CONNECT_STRING_MAX_LENGTH) == NULL) { LOGERROR("Failed to copy connect string from SC response: %s\n", connect_string); ret = EUCA_ERROR; goto release; } } //copy the connection info from the SC return to the resourceLocation. dev = connect_iscsi_target((*vol_data)->connect_string); if (!dev || !strstr(dev, "/dev")) { LOGERROR("Failed to connect to iSCSI target: %s\n", (*vol_data)->connect_string); //disconnect the volume if (cleanup_volume_attachment(sc_url, use_ws_sec, ws_sec_policy_file, (*vol_data), (*vol_data)->connect_string, local_ip, local_iqn, do_rescan) != EUCA_OK) { LOGTRACE("cleanup_volume_attachment returned failure on cleanup from connection failure\n"); } ret = EUCA_ERROR; goto release; } *result_device = dev; release: LOGTRACE("Releasing volume lock\n"); sem_v(vol_sem); LOGTRACE("Released volume lock\n"); if (reencrypted_token != NULL) { EUCA_FREE(reencrypted_token); } if (ret != EUCA_OK && (*vol_data) != NULL) { //Free the vol data struct too EUCA_FREE(*vol_data); } return ret; }
//! //! Handles the instance migration request. //! //! @param[in] nc a pointer to the node controller (NC) state //! @param[in] pMeta a pointer to the node controller (NC) metadata structure //! @param[in] instances metadata for the instance to migrate to destination //! @param[in] instancesLen number of instances in the instance list //! @param[in] action IP of the destination Node Controller //! @param[in] credentials credentials that enable the migration //! //! @return EUCA_OK on success or EUCA_*ERROR on failure //! //! @pre //! //! @post static int doMigrateInstances(struct nc_state_t *nc, ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials) { int ret = EUCA_OK; int credentials_prepared = 0; if (instancesLen <= 0) { LOGERROR("called with invalid instancesLen (%d)\n", instancesLen); pMeta->replyString = strdup("internal error (invalid instancesLen)"); return (EUCA_INVALID_ERROR); } LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen); for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { LOGDEBUG("verifying instance # %d...\n", inst_idx); if (instances[inst_idx]) { ncInstance *instance_idx = instances[inst_idx]; LOGDEBUG("[%s] proposed migration action '%s' (%s > %s) [creds=%s]\n", SP(instance_idx->instanceId), SP(action), SP(instance_idx->migration_src), SP(instance_idx->migration_dst), (instance_idx->migration_credentials == NULL) ? "UNSET" : "present"); } else { pMeta->replyString = strdup("internal error (instance count mismatch)"); LOGERROR("Mismatch between migration instance count (%d) and length of instance list\n", instancesLen); return (EUCA_ERROR); } } // TO-DO: Optimize the location of this loop, placing it inside various conditionals below? for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { ncInstance *instance_req = instances[inst_idx]; char *sourceNodeName = instance_req->migration_src; char *destNodeName = instance_req->migration_dst; LOGDEBUG("[%s] processing instance # %d (%s > %s)\n", instance_req->instanceId, inst_idx, instance_req->migration_src, instance_req->migration_dst); // this is a call to the source of migration if (!strcmp(pMeta->nodeName, sourceNodeName)) { // locate the instance structure ncInstance *instance; sem_p(inst_sem); { instance = find_instance(&global_instances, instance_req->instanceId); } sem_v(inst_sem); if (instance == NULL) { LOGERROR("[%s] cannot find instance\n", instance_req->instanceId); pMeta->replyString = strdup("failed to locate instance to migrate"); return (EUCA_NOT_FOUND_ERROR); } if (strcmp(action, "prepare") == 0) { sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); instance->migrationTime = time(NULL); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // Establish migration-credential keys if this is the first instance preparation for this host. LOGINFO("[%s] migration source preparing %s > %s [creds=%s]\n", SP(instance->instanceId), SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); if (!credentials_prepared) { if (generate_migration_keys(sourceNodeName, credentials, TRUE, instance) != EUCA_OK) { pMeta->replyString = strdup("internal error (migration credentials generation failed)"); return (EUCA_SYSTEM_ERROR); } else { credentials_prepared++; } } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; save_instance_struct(instance); copy_instances(); sem_v(inst_sem); } else if (strcmp(action, "commit") == 0) { sem_p(inst_sem); if (instance->migration_state == MIGRATION_IN_PROGRESS) { LOGWARN("[%s] duplicate request to migration source to initiate %s > %s (already migrating)\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_v(inst_sem); return (EUCA_DUPLICATE_ERROR); } else if (instance->migration_state != MIGRATION_READY) { LOGERROR("[%s] request to commit migration %s > %s when source migration_state='%s' (not 'ready')\n", instance->instanceId, SP(sourceNodeName), SP(destNodeName), migration_state_names[instance->migration_state]); sem_v(inst_sem); return (EUCA_UNSUPPORTED_ERROR); } instance->migration_state = MIGRATION_IN_PROGRESS; outgoing_migrations_in_progress++; LOGINFO("[%s] migration source initiating %s > %s [creds=%s] (1 of %d active outgoing migrations)\n", instance->instanceId, instance->migration_src, instance->migration_dst, (instance->migration_credentials == NULL) ? "UNSET" : "present", outgoing_migrations_in_progress); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // since migration may take a while, we do them in a thread pthread_t tcb = { 0 }; if (pthread_create(&tcb, NULL, migrating_thread, (void *)instance)) { LOGERROR("[%s] failed to spawn a migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } set_corrid_pthread( get_corrid()!=NULL ? get_corrid()->correlation_id : NULL , tcb); if (pthread_detach(tcb)) { LOGERROR("[%s] failed to detach the migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } } else if (strcmp(action, "rollback") == 0) { if ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING)) { LOGINFO("[%s] rolling back migration (%s > %s) on source\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_p(inst_sem); migration_rollback(instance); sem_v(inst_sem); } else { LOGINFO("[%s] ignoring request to roll back migration on source with instance in state %s(%s) -- duplicate rollback request?\n", instance->instanceId, instance->stateName, migration_state_names[instance->migration_state]); } } else { LOGERROR("[%s] action '%s' is not valid\n", instance->instanceId, action); return (EUCA_INVALID_ERROR); } } else if (!strcmp(pMeta->nodeName, destNodeName)) { // this is a migrate request to destination if (!strcmp(action, "commit")) { LOGERROR("[%s] action '%s' for migration (%s > %s) is not valid on destination node\n", instance_req->instanceId, action, SP(sourceNodeName), SP(destNodeName)); return (EUCA_UNSUPPORTED_ERROR); } else if (!strcmp(action, "rollback")) { LOGINFO("[%s] rolling back migration (%s > %s) on destination\n", instance_req->instanceId, SP(sourceNodeName), SP(destNodeName)); sem_p(inst_sem); { ncInstance *instance = find_instance(&global_instances, instance_req->instanceId); if (instance != NULL) { LOGDEBUG("[%s] marked for cleanup\n", instance->instanceId); change_state(instance, SHUTOFF); instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); } } sem_v(inst_sem); return EUCA_OK; } else if (strcmp(action, "prepare") != 0) { LOGERROR("[%s] action '%s' is not valid or not implemented\n", instance_req->instanceId, action); return (EUCA_INVALID_ERROR); } // Everything from here on is specific to "prepare" on a destination. // allocate a new instance struct ncInstance *instance = clone_instance(instance_req); if (instance == NULL) { LOGERROR("[%s] could not allocate instance struct\n", instance_req->instanceId); goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); save_instance_struct(instance); sem_v(inst_sem); // Establish migration-credential keys. LOGINFO("[%s] migration destination preparing %s > %s [creds=%s]\n", instance->instanceId, SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); // First, call config-file modification script to authorize source node. LOGDEBUG("[%s] authorizing migration source node %s\n", instance->instanceId, instance->migration_src); if (authorize_migration_keys("-a", instance->migration_src, instance->migration_credentials, instance, TRUE) != EUCA_OK) { goto failed_dest; } // Then, generate keys and restart libvirtd. if (generate_migration_keys(instance->migration_dst, instance->migration_credentials, TRUE, instance) != EUCA_OK) { goto failed_dest; } int error; if (vbr_parse(&(instance->params), pMeta) != EUCA_OK) { goto failed_dest; } // set up networking char *brname = NULL; if ((error = vnetStartNetwork(nc->vnetconfig, instance->ncnet.vlan, NULL, NULL, NULL, &brname)) != EUCA_OK) { LOGERROR("[%s] start network failed for instance, terminating it\n", instance->instanceId); EUCA_FREE(brname); goto failed_dest; } euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName)); EUCA_FREE(brname); // TODO: move stuff in startup_thread() into a function? set_instance_params(instance); if ((error = create_instance_backing(instance, TRUE)) // create files that back the disks || (error = gen_instance_xml(instance)) // create euca-specific instance XML file || (error = gen_libvirt_instance_xml(instance))) { // transform euca-specific XML into libvirt XML LOGERROR("[%s] failed to prepare images for migrating instance (error=%d)\n", instance->instanceId, error); goto failed_dest; } // attach any volumes for (int v = 0; v < EUCA_MAX_VOLUMES; v++) { ncVolume *volume = &instance->volumes[v]; if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching LOGDEBUG("[%s] volumes [%d] = '%s'\n", instance->instanceId, v, volume->stateName); // TODO: factor what the following out of here and doAttachVolume() in handlers_default.c int have_remote_device = 0; char *xml = NULL; char *remoteDevStr = NULL; char scUrl[512]; char localDevReal[32], localDevTag[256], remoteDevReal[132]; char *tagBuf = localDevTag; ebs_volume_data *vol_data = NULL; ret = convert_dev_names(volume->localDev, localDevReal, tagBuf); if (ret) goto unroll; //Do the ebs connect. LOGTRACE("[%s][%s] Connecting EBS volume to local host\n", instance->instanceId, volume->volumeId); get_service_url("storage", nc, scUrl); if (strlen(scUrl) == 0) { LOGERROR("[%s][%s] Failed to lookup enabled Storage Controller. Cannot attach volume %s\n", instance->instanceId, volume->volumeId, scUrl); have_remote_device = 0; goto unroll; } else { LOGTRACE("[%s][%s] Using SC URL: %s\n", instance->instanceId, volume->volumeId, scUrl); } //Do the ebs connect. LOGTRACE("[%s][%s] Connecting EBS volume to local host\n", instance->instanceId, volume->volumeId); int rc = connect_ebs_volume(scUrl, volume->attachmentToken, nc->config_use_ws_sec, nc->config_sc_policy_file, nc->ip, nc->iqn, &remoteDevStr, &vol_data); if (rc) { LOGERROR("Error connecting ebs volume %s\n", volume->attachmentToken); have_remote_device = 0; ret = EUCA_ERROR; goto unroll; } // update the volume struct with connection string obtained from SC euca_strncpy(volume->connectionString, vol_data->connect_string, sizeof(volume->connectionString)); if (!remoteDevStr || !strstr(remoteDevStr, "/dev")) { LOGERROR("[%s][%s] failed to connect to iscsi target\n", instance->instanceId, volume->volumeId); remoteDevReal[0] = '\0'; } else { LOGDEBUG("[%s][%s] attached iSCSI target of host device '%s'\n", instance->instanceId, volume->volumeId, remoteDevStr); snprintf(remoteDevReal, sizeof(remoteDevReal), "%s", remoteDevStr); have_remote_device = 1; } EUCA_FREE(remoteDevStr); // something went wrong above, abort if (!have_remote_device) { goto unroll; } // make sure there is a block device if (check_block(remoteDevReal)) { LOGERROR("[%s][%s] cannot verify that host device '%s' is available for hypervisor attach\n", instance->instanceId, volume->volumeId, remoteDevReal); goto unroll; } // generate XML for libvirt attachment request if (gen_volume_xml(volume->volumeId, instance, localDevReal, remoteDevReal) // creates vol-XXX.xml || gen_libvirt_volume_xml(volume->volumeId, instance)) { // creates vol-XXX-libvirt.xml via XSLT transform LOGERROR("[%s][%s] could not produce attach device xml\n", instance->instanceId, volume->volumeId); goto unroll; } // invoke hooks char path[EUCA_MAX_PATH]; char lpath[EUCA_MAX_PATH]; snprintf(path, sizeof(path), EUCALYPTUS_VOLUME_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX.xml snprintf(lpath, sizeof(lpath), EUCALYPTUS_VOLUME_LIBVIRT_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX-libvirt.xml if (call_hooks(NC_EVENT_PRE_ATTACH, lpath)) { LOGERROR("[%s][%s] cancelled volume attachment via hooks\n", instance->instanceId, volume->volumeId); goto unroll; } // read in libvirt XML, which may have been modified by the hook above if ((xml = file2str(lpath)) == NULL) { LOGERROR("[%s][%s] failed to read volume XML from %s\n", instance->instanceId, volume->volumeId, lpath); goto unroll; } continue; unroll: ret = EUCA_ERROR; // TODO: unroll all volume attachments goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; instance->bootTime = time(NULL); // otherwise nc_state.booting_cleanup_threshold will kick in change_state(instance, BOOTING); // not STAGING, since in that mode we don't poll hypervisor for info LOGINFO("[%s] migration destination ready %s > %s\n", instance->instanceId, instance->migration_src, instance->migration_dst); save_instance_struct(instance); error = add_instance(&global_instances, instance); copy_instances(); sem_v(inst_sem); if (error) { if (error == EUCA_DUPLICATE_ERROR) { LOGINFO("[%s] instance struct already exists (from previous migration?), deleting and re-adding...\n", instance->instanceId); error = remove_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (remove) instance struct, failing...\n", instance->instanceId); goto failed_dest; } error = add_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (add) instance struct, failing...\n", instance->instanceId); goto failed_dest; } } else { LOGERROR("[%s] could not add instance struct, failing...\n", instance->instanceId); goto failed_dest; } } continue; failed_dest: sem_p(inst_sem); // Just making sure... if (instance != NULL) { LOGERROR("[%s] setting instance to Teardown(cleaning) after destination failure to prepare for migration\n", instance->instanceId); // Set state to Teardown(cleaning) so source won't wait until timeout to roll back. instance->migration_state = MIGRATION_CLEANING; instance->terminationTime = time(NULL); change_state(instance, TEARDOWN); save_instance_struct(instance); add_instance(&global_instances, instance); // OK if this fails--that should mean it's already been added. copy_instances(); } // If no remaining incoming or pending migrations, deauthorize all clients. // TO-DO: Consolidate with similar sequence in handlers.c into a utility function? if (!incoming_migrations_in_progress) { int incoming_migrations_pending = 0; LOGINFO("[%s] no remaining active incoming migrations -- checking to see if there are any pending migrations\n", instance->instanceId); bunchOfInstances *head = NULL; for (head = global_instances; head; head = head->next) { if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) { LOGINFO("[%s] is pending migration, state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId, migration_state_names[head->instance->migration_state]); incoming_migrations_pending++; } } // TO-DO: Add belt and suspenders? if (!incoming_migrations_pending) { LOGINFO("[%s] no remaining incoming or pending migrations -- deauthorizing all migration client keys\n", instance->instanceId); authorize_migration_keys("-D -r", NULL, NULL, NULL, FALSE); } } sem_v(inst_sem); // Set to generic EUCA_ERROR unless already set to a more-specific error. if (ret == EUCA_OK) { ret = EUCA_ERROR; } } else { LOGERROR("unexpected migration request (node %s is neither source nor destination)\n", pMeta->nodeName); ret = EUCA_ERROR; } } return ret; }
//! //! Defines the thread that does the actual reboot of an instance. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *rebooting_thread(void *arg) { char *xml = NULL; char resourceName[1][MAX_SENSOR_NAME_LEN] = { "" }; char resourceAlias[1][MAX_SENSOR_NAME_LEN] = { "" }; ncInstance *instance = ((ncInstance *) arg); virDomainPtr dom = NULL; virConnectPtr conn = NULL; LOGDEBUG("[%s] spawning rebooting thread\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); return NULL; } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot locate instance to reboot, giving up\n", instance->instanceId); unlock_hypervisor_conn(); return NULL; } // obtain the most up-to-date XML for domain from libvirt xml = virDomainGetXMLDesc(dom, 0); if (xml == NULL) { LOGERROR("[%s] cannot obtain metadata for instance to reboot, giving up\n", instance->instanceId); virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); return NULL; } virDomainFree(dom); // release libvirt resource unlock_hypervisor_conn(); // try shutdown first, then kill it if uncooperative if (shutdown_then_destroy_domain(instance->instanceId, TRUE) != EUCA_OK) { LOGERROR("[%s] failed to shutdown and destroy the instance to reboot, giving up\n", instance->instanceId); return NULL; } // Add a shift to values of three of the metrics: ones that // drop back to zero after a reboot. The shift, which is based // on the latest value, ensures that values sent upstream do // not go backwards . sensor_shift_metric(instance->instanceId, "CPUUtilization"); sensor_shift_metric(instance->instanceId, "NetworkIn"); sensor_shift_metric(instance->instanceId, "NetworkOut"); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot connect to hypervisor to restart instance, giving up\n", instance->instanceId); return NULL; } // domain is now shut down, create a new one with the same XML LOGINFO("[%s] rebooting\n", instance->instanceId); dom = virDomainCreateLinux(conn, xml, 0); if (dom == NULL) { LOGERROR("[%s] failed to restart instance\n", instance->instanceId); change_state(instance, SHUTOFF); } else { euca_strncpy(resourceName[0], instance->instanceId, MAX_SENSOR_NAME_LEN); sensor_refresh_resources(resourceName, resourceAlias, 1); // refresh stats so we set base value accurately virDomainFree(dom); } EUCA_FREE(xml); unlock_hypervisor_conn(); unset_corrid(get_corrid()); return NULL; }
//! //! Main log-printing function, which will dump a line into a log, with a prefix appropriate for //! the log level, given that the log level is above the threshold. //! //! @param[in] func the caller function name (i.e. __FUNCTION__) //! @param[in] file the file in which the caller function reside (i.e. __FILE__) //! @param[in] line the line at which this function was called (i.e. __LINE__) //! @param[in] level the log level for this message //! @param[in] format the format string of the message //! @param[in] ... the variable argument part of the format //! //! @return 0 on success, -1 on failure with this function or 1 if log_line() failed. //! //! @see log_line() //! //! @pre \li The func field must not be null if the prefix spec contains 'm'. //! \li The file field must not be null if the prefix spec contains 'F'. //! \li The log level must be valid and greather than or equal to the configured log level //! \li The format string must not be null. //! //! @post If the given level if greater or equal to the configured log level, the message will be //! printed into our log file or syslog. //! //! @todo evaluate if we cannot standardize the error code returned. //! int logprintfl(const char *func, const char *file, int line, log_level_e level, const char *format, ...) { int rc = -1; int left = 0; int size = 0; int offset = 0; char *s = NULL; char c = '\0'; char cn = '\0'; boolean custom_spec = FALSE; char buf[LOGLINEBUF] = ""; va_list ap = { {0} }; const char *prefix_spec = NULL; boolean is_corrid = FALSE; char buf_corrid[128] = ""; // return if level is invalid or below the threshold if (level < log_level) { return (0); } if ((level < 0) || (level > EUCA_LOG_OFF)) { // unexpected log level return (-1); } threadCorrelationId *corr_id = get_corrid(); if (corr_id != NULL && corr_id->correlation_id != NULL && strlen(corr_id->correlation_id) >= 74) { is_corrid = TRUE; } if (strcmp(log_custom_prefix, USE_STANDARD_PREFIX) == 0) { prefix_spec = log_level_prefix[log_level]; custom_spec = FALSE; } else { prefix_spec = log_custom_prefix; custom_spec = TRUE; } // go over prefix format for the log level (defined in log.h or custom) for (; *prefix_spec != '\0'; prefix_spec++) { s = buf + offset; if ((left = sizeof(buf) - offset - 1) < 1) { // not enough room in internal buffer for a prefix return -1; } // see if we have a formatting character or a regular one c = prefix_spec[0]; cn = prefix_spec[1]; if ((c != '%') // not a special formatting char || (c == '%' && cn == '%') // formatting char, escaped || (c == '%' && cn == '\0')) { // formatting char at the end s[0] = c; s[1] = '\0'; offset++; if ((c == '%') && (cn == '%')) { // swallow the one extra '%' in input prefix_spec++; } continue; } // move past the '%' to the formatting char prefix_spec++; size = 0; switch (*prefix_spec) { case 'T': // timestamp size = fill_timestamp(s, left); break; case 'L': { // log-level char l[6]; euca_strncpy(l, log_level_names[level], 6); // we want hard truncation size = snprintf(s, left, "%5s", l); break; } case 'p': { // process ID char p[11]; snprintf(p, sizeof(p), "%010d", getpid()); // 10 chars is enough for max 32-bit unsigned integer size = print_field_truncated(&prefix_spec, s, left, p); break; } case 't': { // thread ID char t[21]; snprintf(t, sizeof(t), "%020d", (pid_t) syscall(SYS_gettid)); // 20 chars is enough for max 64-bit unsigned integer size = print_field_truncated(&prefix_spec, s, left, t); break; } case 'm': // method size = print_field_truncated(&prefix_spec, s, left, func); break; case 'F': { // file-and-line char file_and_line[64]; snprintf(file_and_line, sizeof(file_and_line), "%s:%d", file, line); size = print_field_truncated(&prefix_spec, s, left, file_and_line); break; } case 's': { // max RSS of the process struct rusage u; bzero(&u, sizeof(struct rusage)); getrusage(RUSAGE_SELF, &u); // unfortunately, many fields in 'struct rusage' aren't supported on Linux (notably: ru_ixrss, ru_idrss, ru_isrss) char size_str[64]; snprintf(size_str, sizeof(size_str), "%05ld", u.ru_maxrss / 1024); size = print_field_truncated(&prefix_spec, s, left, size_str); break; } case '?': // not supported currently s[0] = '?'; s[1] = '\0'; size = 1; break; default: s[0] = *prefix_spec; s[1] = '\0'; size = 1; break; } if (size < 0) { // something went wrong in the snprintf()s above logprintf("error in prefix construction in logprintfl()\n"); return -1; } offset += size; } if (is_corrid && log_file_path_req_track != NULL) { buf[offset++] = ' '; buf[offset++] = '['; for (int i = 0; i < 8; i++) { buf[offset++] = corr_id->correlation_id[i]; } buf[offset++] = '-'; for (int i = 0; i < 4; i++) { buf[offset++] = corr_id->correlation_id[i + 47]; } buf[offset++] = ']'; } // add a space between the prefix and the message proper if ((offset > 0) && ((sizeof(buf) - offset - 1) > 0)) { buf[offset++] = ' '; buf[offset] = '\0'; } // append the log message passed via va_list va_start(ap, format); { rc = vsnprintf(buf + offset, sizeof(buf) - offset - 1, format, ap); } va_end(ap); if (rc < 0) return (rc); if (syslog_facility != -1) { // log to syslog, at the appropriate level: euca DEBUG, TRACE, and EXTREME use syslog's DEBUG int l = LOG_DEBUG; if (level == EUCA_LOG_ERROR) l = LOG_ERR; else if (level == EUCA_LOG_WARN) l = LOG_WARNING; else if (level == EUCA_LOG_INFO) l = LOG_INFO; if (custom_spec) syslog(l, buf); else syslog(l, buf + offset); } if (is_corrid && log_file_path_req_track != NULL) { log_line(log_file_path_req_track, buf); sprintf(buf_corrid, "[%.8s-%.4s] ", corr_id->correlation_id, corr_id->correlation_id + 47); if ((s = strstr(buf, buf_corrid)) != NULL) { offset = 16; while (TRUE) { s[offset - 16] = s[offset]; c = s[offset++]; if (c == '\0' || offset >= LOGLINEBUF) break; } s[offset - 16] = '\0'; } } if ((rc = log_line(log_file_path, buf)) != EUCA_OK) return (rc); return EUCA_OK; }