//! //! Handles the instance migration request. //! //! @param[in] nc a pointer to the node controller (NC) state //! @param[in] pMeta a pointer to the node controller (NC) metadata structure //! @param[in] instances metadata for the instance to migrate to destination //! @param[in] instancesLen number of instances in the instance list //! @param[in] action IP of the destination Node Controller //! @param[in] credentials credentials that enable the migration //! //! @return EUCA_OK on success or EUCA_*ERROR on failure //! //! @pre //! //! @post static int doMigrateInstances(struct nc_state_t *nc, ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials, char ** resourceLocations, int resourceLocationsLen) { int ret = EUCA_OK; int credentials_prepared = 0; char *libvirt_xml_modified = NULL; if (instancesLen <= 0) { LOGERROR("called with invalid instancesLen (%d)\n", instancesLen); pMeta->replyString = strdup("internal error (invalid instancesLen)"); return (EUCA_INVALID_ERROR); } LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen); for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { LOGDEBUG("verifying instance # %d...\n", inst_idx); if (instances[inst_idx]) { ncInstance *instance_idx = instances[inst_idx]; LOGDEBUG("[%s] proposed migration action '%s' (%s > %s) [creds=%s]\n", SP(instance_idx->instanceId), SP(action), SP(instance_idx->migration_src), SP(instance_idx->migration_dst), (instance_idx->migration_credentials == NULL) ? "UNSET" : "present"); } else { pMeta->replyString = strdup("internal error (instance count mismatch)"); LOGERROR("Mismatch between migration instance count (%d) and length of instance list\n", instancesLen); return (EUCA_ERROR); } } // TO-DO: Optimize the location of this loop, placing it inside various conditionals below? for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { ncInstance *instance_req = instances[inst_idx]; char *sourceNodeName = instance_req->migration_src; char *destNodeName = instance_req->migration_dst; LOGDEBUG("[%s] processing instance # %d (%s > %s)\n", instance_req->instanceId, inst_idx, instance_req->migration_src, instance_req->migration_dst); // this is a call to the source of migration if (!strcmp(pMeta->nodeName, sourceNodeName)) { // locate the instance structure ncInstance *instance; sem_p(inst_sem); { instance = find_instance(&global_instances, instance_req->instanceId); } sem_v(inst_sem); if (instance == NULL) { LOGERROR("[%s] cannot find instance\n", instance_req->instanceId); pMeta->replyString = strdup("failed to locate instance to migrate"); return (EUCA_NOT_FOUND_ERROR); } if (strcmp(action, "prepare") == 0) { sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); instance->migrationTime = time(NULL); update_resource_locations(&(instance->params), resourceLocations, resourceLocationsLen); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // Establish migration-credential keys if this is the first instance preparation for this host. LOGINFO("[%s] migration source preparing %s > %s [creds=%s]\n", SP(instance->instanceId), SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); if (!credentials_prepared) { if (generate_migration_keys(sourceNodeName, credentials, TRUE, instance) != EUCA_OK) { pMeta->replyString = strdup("internal error (migration credentials generation failed)"); return (EUCA_SYSTEM_ERROR); } else { credentials_prepared++; } } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; save_instance_struct(instance); copy_instances(); sem_v(inst_sem); } else if (strcmp(action, "commit") == 0) { sem_p(inst_sem); if (instance->migration_state == MIGRATION_IN_PROGRESS) { LOGWARN("[%s] duplicate request to migration source to initiate %s > %s (already migrating)\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_v(inst_sem); return (EUCA_DUPLICATE_ERROR); } else if (instance->migration_state != MIGRATION_READY) { LOGERROR("[%s] request to commit migration %s > %s when source migration_state='%s' (not 'ready')\n", instance->instanceId, SP(sourceNodeName), SP(destNodeName), migration_state_names[instance->migration_state]); sem_v(inst_sem); return (EUCA_UNSUPPORTED_ERROR); } instance->migration_state = MIGRATION_IN_PROGRESS; outgoing_migrations_in_progress++; LOGINFO("[%s] migration source initiating %s > %s [creds=%s] (1 of %d active outgoing migrations)\n", instance->instanceId, instance->migration_src, instance->migration_dst, (instance->migration_credentials == NULL) ? "UNSET" : "present", outgoing_migrations_in_progress); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // since migration may take a while, we do them in a thread pthread_t tcb = { 0 }; if (pthread_create(&tcb, NULL, migrating_thread, (void *)instance)) { LOGERROR("[%s] failed to spawn a migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } set_corrid_pthread(get_corrid() != NULL ? get_corrid()->correlation_id : NULL, tcb); if (pthread_detach(tcb)) { LOGERROR("[%s] failed to detach the migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } } else if (strcmp(action, "rollback") == 0) { if ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING)) { LOGINFO("[%s] rolling back migration (%s > %s) on source\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_p(inst_sem); migration_rollback(instance); sem_v(inst_sem); } else { LOGINFO("[%s] ignoring request to roll back migration on source with instance in state %s(%s) -- duplicate rollback request?\n", instance->instanceId, instance->stateName, migration_state_names[instance->migration_state]); } } else { LOGERROR("[%s] action '%s' is not valid\n", instance->instanceId, action); return (EUCA_INVALID_ERROR); } } else if (!strcmp(pMeta->nodeName, destNodeName)) { // this is a migrate request to destination if (!strcmp(action, "commit")) { LOGERROR("[%s] action '%s' for migration (%s > %s) is not valid on destination node\n", instance_req->instanceId, action, SP(sourceNodeName), SP(destNodeName)); return (EUCA_UNSUPPORTED_ERROR); } else if (!strcmp(action, "rollback")) { LOGINFO("[%s] rolling back migration (%s > %s) on destination\n", instance_req->instanceId, SP(sourceNodeName), SP(destNodeName)); sem_p(inst_sem); { ncInstance *instance = find_instance(&global_instances, instance_req->instanceId); if (instance != NULL) { LOGDEBUG("[%s] marked for cleanup\n", instance->instanceId); change_state(instance, SHUTOFF); instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); } } sem_v(inst_sem); return EUCA_OK; } else if (strcmp(action, "prepare") != 0) { LOGERROR("[%s] action '%s' is not valid or not implemented\n", instance_req->instanceId, action); return (EUCA_INVALID_ERROR); } // Everything from here on is specific to "prepare" on a destination. // allocate a new instance struct ncInstance *instance = clone_instance(instance_req); if (instance == NULL) { LOGERROR("[%s] could not allocate instance struct\n", instance_req->instanceId); goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; instance->migrationTime = time(NULL); //In preparing state, so set migrationTime. euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); update_resource_locations(&(instance->params), resourceLocations, resourceLocationsLen); sem_v(inst_sem); // Establish migration-credential keys. LOGINFO("[%s] migration destination preparing %s > %s [creds=%s]\n", instance->instanceId, SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); // First, call config-file modification script to authorize source node. LOGDEBUG("[%s] authorizing migration source node %s\n", instance->instanceId, instance->migration_src); if (authorize_migration_keys("-a", instance->migration_src, instance->migration_credentials, instance, TRUE) != EUCA_OK) { goto failed_dest; } // Then, generate keys and restart libvirtd. if (generate_migration_keys(instance->migration_dst, instance->migration_credentials, TRUE, instance) != EUCA_OK) { goto failed_dest; } int error; //Fix for EUCA-10433, need instance struct in global_instances prior to doing volume ops //The monitor thread will now pick up the instance, so the migrationTime must be set sem_p(inst_sem); save_instance_struct(instance); error = add_instance(&global_instances, instance); copy_instances(); sem_v(inst_sem); if (error) { if (error == EUCA_DUPLICATE_ERROR) { LOGINFO("[%s] instance struct already exists (from previous migration?), deleting and re-adding...\n", instance->instanceId); error = remove_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (remove) instance struct, failing...\n", instance->instanceId); goto failed_dest; } error = add_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (add) instance struct, failing...\n", instance->instanceId); goto failed_dest; } } else { LOGERROR("[%s] could not add instance struct, failing...\n", instance->instanceId); goto failed_dest; } } if (vbr_parse(&(instance->params), pMeta) != EUCA_OK) { goto failed_dest; } // set up networking char brname[IF_NAME_LEN] = ""; if (!strcmp(nc->pEucaNet->sMode, NETMODE_MANAGED)) { snprintf(brname, IF_NAME_LEN, "%s", instance->groupIds[0]); } else { snprintf(brname, IF_NAME_LEN, "%s", nc->pEucaNet->sBridgeDevice); } euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName)); // TODO: move stuff in startup_thread() into a function? set_instance_params(instance); if ((error = create_instance_backing(instance, TRUE)) // create files that back the disks || (error = gen_instance_xml(instance)) // create euca-specific instance XML file || (error = gen_libvirt_instance_xml(instance))) { // transform euca-specific XML into libvirt XML LOGERROR("[%s] failed to prepare images for migrating instance (error=%d)\n", instance->instanceId, error); goto failed_dest; } // attach any volumes for (int v = 0; v < EUCA_MAX_VOLUMES; v++) { ncVolume *volume = &instance->volumes[v]; if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching LOGDEBUG("[%s] volumes [%d] = '%s'\n", instance->instanceId, v, volume->stateName); ebs_volume_data *vol_data = NULL; char *libvirt_xml = NULL; char serial[128]; char bus[16]; set_serial_and_bus(volume->volumeId, volume->devName, serial, sizeof(serial), bus, sizeof(bus)); if ((ret = connect_ebs(volume->devName, serial, bus, nc, instance->instanceId, volume->volumeId, volume->attachmentToken, &libvirt_xml, &vol_data)) != EUCA_OK) { goto unroll; } // update the volume struct with connection string obtained from SC euca_strncpy(volume->connectionString, vol_data->connect_string, sizeof(volume->connectionString)); // save volume info into vol-XXX-libvirt.xml for future detach if (create_vol_xml(instance->instanceId, volume->volumeId, libvirt_xml, &libvirt_xml_modified) != EUCA_OK) { goto unroll; } continue; unroll: ret = EUCA_ERROR; // @TODO: unroll all previous ones // for (int uv = v - 1; uv >= 0; uv--) { // disconnect_ebs(nc, instance->instanceId, volume->volumeId, ) // } goto failed_dest; } // build any secondary network interface xml files for (int w=0; w < EUCA_MAX_NICS; w++) { if (strlen(instance->secNetCfgs[w].interfaceId) == 0) continue; gen_libvirt_nic_xml(instance->instancePath, instance->secNetCfgs[w].interfaceId); } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; instance->migrationTime = 0; //Reset the timer, to ensure monitoring thread handles this properly. This is required when setting BOOTING state instance->bootTime = time(NULL); // otherwise nc_state.booting_cleanup_threshold will kick in change_state(instance, BOOTING); // not STAGING, since in that mode we don't poll hypervisor for info LOGINFO("[%s] migration destination ready %s > %s\n", instance->instanceId, instance->migration_src, instance->migration_dst); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); continue; failed_dest: sem_p(inst_sem); // Just making sure... if (instance != NULL) { LOGERROR("[%s] setting instance to Teardown(cleaning) after destination failure to prepare for migration\n", instance->instanceId); // Set state to Teardown(cleaning) so source won't wait until timeout to roll back. instance->migration_state = MIGRATION_CLEANING; instance->terminationTime = time(NULL); change_state(instance, TEARDOWN); save_instance_struct(instance); add_instance(&global_instances, instance); // OK if this fails--that should mean it's already been added. copy_instances(); } // If no remaining incoming or pending migrations, deauthorize all clients. // TO-DO: Consolidate with similar sequence in handlers.c into a utility function? if (!incoming_migrations_in_progress) { int incoming_migrations_pending = 0; LOGINFO("[%s] no remaining active incoming migrations -- checking to see if there are any pending migrations\n", instance->instanceId); bunchOfInstances *head = NULL; for (head = global_instances; head; head = head->next) { if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) { LOGINFO("[%s] is pending migration, state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId, migration_state_names[head->instance->migration_state]); incoming_migrations_pending++; } } // TO-DO: Add belt and suspenders? if (!incoming_migrations_pending) { LOGINFO("[%s] no remaining incoming or pending migrations -- deauthorizing all migration client keys\n", instance->instanceId); authorize_migration_keys("-D -r", NULL, NULL, NULL, FALSE); } } sem_v(inst_sem); // Set to generic EUCA_ERROR unless already set to a more-specific error. if (ret == EUCA_OK) { ret = EUCA_ERROR; } } else { LOGERROR("unexpected migration request (node %s is neither source nor destination)\n", pMeta->nodeName); ret = EUCA_ERROR; } } return ret; }
//! //! Defines the thread that does the actual migration of an instance off the source. //! //! @param[in] arg a transparent pointer to the argument passed to this thread handler //! //! @return Always return NULL //! static void *migrating_thread(void *arg) { ncInstance *instance = ((ncInstance *) arg); virDomainPtr dom = NULL; virConnectPtr conn = NULL; int migration_error = 0; LOGTRACE("invoked for %s\n", instance->instanceId); if ((conn = lock_hypervisor_conn()) == NULL) { LOGERROR("[%s] cannot migrate instance %s (failed to connect to hypervisor), giving up and rolling back.\n", instance->instanceId, instance->instanceId); migration_error++; goto out; } else { LOGTRACE("[%s] connected to hypervisor\n", instance->instanceId); } dom = virDomainLookupByName(conn, instance->instanceId); if (dom == NULL) { LOGERROR("[%s] cannot migrate instance %s (failed to find domain), giving up and rolling back.\n", instance->instanceId, instance->instanceId); migration_error++; goto out; } char duri[1024]; snprintf(duri, sizeof(duri), "qemu+tls://%s/system", instance->migration_dst); virConnectPtr dconn = NULL; LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri); dconn = virConnectOpen(duri); if (dconn == NULL) { LOGWARN("[%s] cannot migrate instance using TLS (failed to connect to remote), retrying using SSH.\n", instance->instanceId); snprintf(duri, sizeof(duri), "qemu+ssh://%s/system", instance->migration_dst); LOGDEBUG("[%s] connecting to remote hypervisor at '%s'\n", instance->instanceId, duri); dconn = virConnectOpen(duri); if (dconn == NULL) { LOGERROR("[%s] cannot migrate instance using TLS or SSH (failed to connect to remote), giving up and rolling back.\n", instance->instanceId); migration_error++; goto out; } } LOGINFO("[%s] migrating instance\n", instance->instanceId); virDomain *ddom = virDomainMigrate(dom, dconn, VIR_MIGRATE_LIVE | VIR_MIGRATE_NON_SHARED_DISK, NULL, // new name on destination (optional) NULL, // destination URI as seen from source (optional) 0L); // bandwidth limitation (0 => unlimited) if (ddom == NULL) { LOGERROR("[%s] cannot migrate instance, giving up and rolling back.\n", instance->instanceId); migration_error++; goto out; } else { LOGINFO("[%s] instance migrated\n", instance->instanceId); } virDomainFree(ddom); virConnectClose(dconn); out: if (dom) virDomainFree(dom); if (conn != NULL) unlock_hypervisor_conn(); sem_p(inst_sem); LOGDEBUG("%d outgoing migrations still active\n", --outgoing_migrations_in_progress); if (migration_error) { migration_rollback(instance); } else { // If this is set to NOT_MIGRATING here, it's briefly possible for // both the source and destination nodes to report the same instance // as Extant/NOT_MIGRATING, which is confusing! instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); copy_instances(); } sem_v(inst_sem); LOGDEBUG("done\n"); unset_corrid(get_corrid()); return NULL; }
static int instance_migrate_thread(void *param) { if (!param) return -1; struct instance_migrate *instmig = (struct instance_migrate*)param; unsigned long flags = VIR_MIGRATE_LIVE | VIR_MIGRATE_PEER2PEER | VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_UNSAFE; char uri[CHAR_BUFFER_SIZE] = {0}; pid_t pid; int ret = 0; /* char cmd[CHAR_BUFFER_SIZE] = {0}; snprintf(cmd, sizeof(CHAR_BUFFER_SIZE), "virsh migrate --live --p2p %s qemu+tcp://%s/system --tunnelled --unsafe --verbose", instanceId, targetIp);*/ snprintf(uri, sizeof(uri), "qemu+tcp://%s/system", instmig->targetNc); { //virDomainJobInfo jobinfo; // http://libvirt.org/html/libvirt-libvirt.html#virDomainMigrateToURI if (-1 == long_task_live_migrate_create(instmig->taskId, instmig->instanceId, 0)) { logprintfl(EUCAERROR, HANF_DEBUG"%s: create live migrate task id[%s] for instance [%s] failed\n", __func__, instmig->taskId, instmig->instanceId); ret = -1; goto OUT; } ret = virDomainMigrateToURI(instmig->dom, uri, flags, NULL, 0/*0 means no bandwidth limitation*/); logprintfl(EUCAINFO, HANF_DEBUG"%s: virDomainMigrateToURI ret code is [%d]\n", __func__, ret); if (ret) { logprintfl(EUCAERROR, HANF_DEBUG"%s: uri[%s] migrate instance [%s] to [%s] failed\n", __func__, uri, instmig->instanceId, instmig->targetNc); long_task_set_failed(instmig->taskId, task_progress_names[FAILED_MIGRATE]); change_task_state(instmig->instance, FAILED_MIGRATE); ret = 1; } else { logprintfl(EUCAERROR, HANF_DEBUG"%s: uri[%s] migrate instance [%s] to [%s] DONE\n", __func__, uri, instmig->instanceId, instmig->targetNc); long_task_set_success(instmig->taskId, task_progress_names[SUCCESS_MIGRATE]); change_task_state(instmig->instance, SUCCESS_MIGRATE); /* after migrating, the instance in source NC will go disappear. then the thread monitoring_thread() will change_state(TEARDOWN) if instance not found, then TEARDOWN instance will be clean up in cache. And at the almost same time, owner of the migrated instance will be change in targetNC, which will cause cache to be clean up too. */ change_state(instmig->instance, TEARDOWN); ret = 0; } OUT: sem_p(inst_sem); copy_instances (); sem_v(inst_sem); sem_p(hyp_sem); virDomainFree(instmig->dom); sem_v(hyp_sem); free(instmig->targetNc); free(instmig->instanceId); free(instmig->taskId); free(instmig); } return ret; }
/* thread that does the actual reboot */ static void * rebooting_thread (void *arg) { virConnectPtr *conn; ncInstance * instance = (ncInstance *)arg; struct stat statbuf; int rc = 0; // RESTARTING, // FAILED_RESTART, // SUCCESS_RESTART logprintfl (EUCADEBUG, "{%u} spawning rebooting thread\n", (unsigned int)pthread_self()); char * xml = file2str (instance->libvirtFilePath); if (xml == NULL) { logprintfl (EUCAERROR, "cannot obtain XML file %s\n", instance->libvirtFilePath); return NULL; } conn = check_hypervisor_conn(); if (! conn) { logprintfl (EUCAERROR, "cannot restart instance %s, abandoning it\n", instance->instanceId); change_state (instance, SHUTOFF); free (xml); return NULL; } sem_p(inst_sem); change_task_state (instance, REBOOTING); copy_instances (); sem_v(inst_sem); sem_p (hyp_sem); virDomainPtr dom = virDomainLookupByName(*conn, instance->instanceId); sem_v (hyp_sem); if (dom == NULL) { free (xml); sem_p(inst_sem); change_task_state (instance, FAILED_REBOOT); sem_v(inst_sem); return NULL; } sem_p (hyp_sem); // for KVM, must stop and restart the instance int error = virDomainDestroy (dom); // TODO: change to Shutdown? TODO: is this synchronous? virDomainFree(dom); sem_v (hyp_sem); if (error) { free (xml); sem_p(inst_sem); change_task_state (instance, FAILED_REBOOT); sem_v(inst_sem); return NULL; } #if 0 // domain is now shut down, create a new one with the same XML sem_p (hyp_sem); dom = virDomainCreateLinux (*conn, xml, 0); sem_v (hyp_sem); free (xml); char *remoteDevStr=NULL; // re-attach each volume previously attached for (int i=0; i < EUCA_MAX_VOLUMES; ++i) { ncVolume * volume = &instance->volumes[i]; if (strcmp (volume->stateName, VOL_STATE_ATTACHED) && strcmp (volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching char attach_xml[1024]; int rc; // get credentials, decrypt them remoteDevStr = get_iscsi_target (volume->remoteDev); if (!remoteDevStr || !strstr(remoteDevStr, "/dev")) { logprintfl(EUCAERROR, "Reattach-volume: failed to get local name of host iscsi device\n"); rc = 1; } else { rc = gen_libvirt_attach_xml (volume->volumeId, instance, volume->localDevReal, remoteDevStr, attach_xml, sizeof(attach_xml)); } if (remoteDevStr) free (remoteDevStr); if (!rc) { int err; sem_p (hyp_sem); err = virDomainAttachDevice (dom, attach_xml); sem_v (hyp_sem); if (err) { logprintfl (EUCAERROR, "virDomainAttachDevice() failed (err=%d) XML=%s\n", err, attach_xml); } else { logprintfl (EUCAINFO, "reattached '%s' to '%s' in domain %s\n", volume->remoteDev, volume->localDevReal, instance->instanceId); } } } #endif int ret=restore_and_volume_instance(instance->instanceId); //20130122-th if (ret==1) { logprintfl (EUCAERROR, "Failed to REBOOT instance %s\n", instance->instanceId); change_state (instance, SHUTOFF); sem_p(inst_sem); change_task_state (instance, FAILED_REBOOT); sem_v(inst_sem); return NULL; } sem_p(inst_sem); change_task_state (instance, SUCCESS_REBOOT); save_instance_struct (instance); copy_instances(); sem_v (inst_sem); return NULL; }
static void * shutdowning_thread (void *arg) { struct thread_params_t * params = (struct thread_params_t *)arg; if(params==NULL) { logprintfl (EUCADEBUG, "testtest\n"); return NULL; } ncInstance * instance = params->instance; char * taskId = params->taskId; int err = 0; /* { long_task *shutdowning_task=malloc(sizeof(long_task)); strncpy(shutdowning_task->taskId, taskId, sizeof(shutdowning_task->taskId)); shutdowning_task->type=INST_SHUTDOWNING; logprintfl(EUCAINFO, "line:%d: taskId=%s \n", __LINE__, shutdowning_task->taskId); sem_p(long_task_sem); long_task_add(shutdowning_task); sem_v(long_task_sem); long_task_status_set(taskId, "shutdowning"); } */ if(instance ==NULL || taskId==NULL) { logprintfl(EUCAERROR,"shutdownInstance failed: instance=NULL||taskId=NULL in shutdowning_thread \n"); return NULL; } err=long_task_general_create(taskId, instance->instanceId, 0); if(err !=0) { logprintfl(EUCAERROR,"create long task for shutdownInstance in shutdowning_thread failed err=%d taskId=%s ! try change taskId \n",err,taskId); return NULL; } /* long_task *shutdownTask = NULL; shutdownTask = long_task_find(taskId); if(shutdownTask == NULL) { logprintfl(EUCAINFO,"long_task_find failed in shutdowning_thread, taskId=%s \n",taskId); return NULL; } err=long_task_status_set(shutdownTask, task_progress_names[SHUTDOWNING]); if(err != 0) { logprintfl(EUCAINFO,"long_task_status_set failed in shutdowning_thread \n"); return NULL; } */ sem_p(inst_sem); change_task_state(instance, SHUTDOWNING); copy_instances(); sem_v(inst_sem); err=shutdown_or_destroy_instance(instance->instanceId,0); if(err != 0) { long_task_set_failed(taskId, task_progress_names[FAILED_SHUTDOWN]); sem_p(inst_sem); change_task_state(instance, FAILED_SHUTDOWN); copy_instances(); sem_v(inst_sem); //err=shutdown_or_destroy_instance(instance->instanceId,1); } else { long_task_set_success(taskId, task_progress_names[SUCCESS_SHUTDOWN]); sem_p(inst_sem); change_task_state(instance, SUCCESS_SHUTDOWN); copy_instances(); sem_v(inst_sem); } if(params->taskId) free(params->taskId); free(params); return NULL; }
//! //! Handles the instance migration request. //! //! @param[in] nc a pointer to the node controller (NC) state //! @param[in] pMeta a pointer to the node controller (NC) metadata structure //! @param[in] instances metadata for the instance to migrate to destination //! @param[in] instancesLen number of instances in the instance list //! @param[in] action IP of the destination Node Controller //! @param[in] credentials credentials that enable the migration //! //! @return EUCA_OK on success or EUCA_*ERROR on failure //! //! @pre //! //! @post static int doMigrateInstances(struct nc_state_t *nc, ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials) { int ret = EUCA_OK; int credentials_prepared = 0; if (instancesLen <= 0) { LOGERROR("called with invalid instancesLen (%d)\n", instancesLen); pMeta->replyString = strdup("internal error (invalid instancesLen)"); return (EUCA_INVALID_ERROR); } LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen); for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { LOGDEBUG("verifying instance # %d...\n", inst_idx); if (instances[inst_idx]) { ncInstance *instance_idx = instances[inst_idx]; LOGDEBUG("[%s] proposed migration action '%s' (%s > %s) [creds=%s]\n", SP(instance_idx->instanceId), SP(action), SP(instance_idx->migration_src), SP(instance_idx->migration_dst), (instance_idx->migration_credentials == NULL) ? "UNSET" : "present"); } else { pMeta->replyString = strdup("internal error (instance count mismatch)"); LOGERROR("Mismatch between migration instance count (%d) and length of instance list\n", instancesLen); return (EUCA_ERROR); } } // TO-DO: Optimize the location of this loop, placing it inside various conditionals below? for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { ncInstance *instance_req = instances[inst_idx]; char *sourceNodeName = instance_req->migration_src; char *destNodeName = instance_req->migration_dst; LOGDEBUG("[%s] processing instance # %d (%s > %s)\n", instance_req->instanceId, inst_idx, instance_req->migration_src, instance_req->migration_dst); // this is a call to the source of migration if (!strcmp(pMeta->nodeName, sourceNodeName)) { // locate the instance structure ncInstance *instance; sem_p(inst_sem); { instance = find_instance(&global_instances, instance_req->instanceId); } sem_v(inst_sem); if (instance == NULL) { LOGERROR("[%s] cannot find instance\n", instance_req->instanceId); pMeta->replyString = strdup("failed to locate instance to migrate"); return (EUCA_NOT_FOUND_ERROR); } if (strcmp(action, "prepare") == 0) { sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); instance->migrationTime = time(NULL); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // Establish migration-credential keys if this is the first instance preparation for this host. LOGINFO("[%s] migration source preparing %s > %s [creds=%s]\n", SP(instance->instanceId), SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); if (!credentials_prepared) { if (generate_migration_keys(sourceNodeName, credentials, TRUE, instance) != EUCA_OK) { pMeta->replyString = strdup("internal error (migration credentials generation failed)"); return (EUCA_SYSTEM_ERROR); } else { credentials_prepared++; } } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; save_instance_struct(instance); copy_instances(); sem_v(inst_sem); } else if (strcmp(action, "commit") == 0) { sem_p(inst_sem); if (instance->migration_state == MIGRATION_IN_PROGRESS) { LOGWARN("[%s] duplicate request to migration source to initiate %s > %s (already migrating)\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_v(inst_sem); return (EUCA_DUPLICATE_ERROR); } else if (instance->migration_state != MIGRATION_READY) { LOGERROR("[%s] request to commit migration %s > %s when source migration_state='%s' (not 'ready')\n", instance->instanceId, SP(sourceNodeName), SP(destNodeName), migration_state_names[instance->migration_state]); sem_v(inst_sem); return (EUCA_UNSUPPORTED_ERROR); } instance->migration_state = MIGRATION_IN_PROGRESS; outgoing_migrations_in_progress++; LOGINFO("[%s] migration source initiating %s > %s [creds=%s] (1 of %d active outgoing migrations)\n", instance->instanceId, instance->migration_src, instance->migration_dst, (instance->migration_credentials == NULL) ? "UNSET" : "present", outgoing_migrations_in_progress); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // since migration may take a while, we do them in a thread pthread_t tcb = { 0 }; if (pthread_create(&tcb, NULL, migrating_thread, (void *)instance)) { LOGERROR("[%s] failed to spawn a migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } set_corrid_pthread( get_corrid()!=NULL ? get_corrid()->correlation_id : NULL , tcb); if (pthread_detach(tcb)) { LOGERROR("[%s] failed to detach the migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } } else if (strcmp(action, "rollback") == 0) { if ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING)) { LOGINFO("[%s] rolling back migration (%s > %s) on source\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_p(inst_sem); migration_rollback(instance); sem_v(inst_sem); } else { LOGINFO("[%s] ignoring request to roll back migration on source with instance in state %s(%s) -- duplicate rollback request?\n", instance->instanceId, instance->stateName, migration_state_names[instance->migration_state]); } } else { LOGERROR("[%s] action '%s' is not valid\n", instance->instanceId, action); return (EUCA_INVALID_ERROR); } } else if (!strcmp(pMeta->nodeName, destNodeName)) { // this is a migrate request to destination if (!strcmp(action, "commit")) { LOGERROR("[%s] action '%s' for migration (%s > %s) is not valid on destination node\n", instance_req->instanceId, action, SP(sourceNodeName), SP(destNodeName)); return (EUCA_UNSUPPORTED_ERROR); } else if (!strcmp(action, "rollback")) { LOGINFO("[%s] rolling back migration (%s > %s) on destination\n", instance_req->instanceId, SP(sourceNodeName), SP(destNodeName)); sem_p(inst_sem); { ncInstance *instance = find_instance(&global_instances, instance_req->instanceId); if (instance != NULL) { LOGDEBUG("[%s] marked for cleanup\n", instance->instanceId); change_state(instance, SHUTOFF); instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); } } sem_v(inst_sem); return EUCA_OK; } else if (strcmp(action, "prepare") != 0) { LOGERROR("[%s] action '%s' is not valid or not implemented\n", instance_req->instanceId, action); return (EUCA_INVALID_ERROR); } // Everything from here on is specific to "prepare" on a destination. // allocate a new instance struct ncInstance *instance = clone_instance(instance_req); if (instance == NULL) { LOGERROR("[%s] could not allocate instance struct\n", instance_req->instanceId); goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); save_instance_struct(instance); sem_v(inst_sem); // Establish migration-credential keys. LOGINFO("[%s] migration destination preparing %s > %s [creds=%s]\n", instance->instanceId, SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); // First, call config-file modification script to authorize source node. LOGDEBUG("[%s] authorizing migration source node %s\n", instance->instanceId, instance->migration_src); if (authorize_migration_keys("-a", instance->migration_src, instance->migration_credentials, instance, TRUE) != EUCA_OK) { goto failed_dest; } // Then, generate keys and restart libvirtd. if (generate_migration_keys(instance->migration_dst, instance->migration_credentials, TRUE, instance) != EUCA_OK) { goto failed_dest; } int error; if (vbr_parse(&(instance->params), pMeta) != EUCA_OK) { goto failed_dest; } // set up networking char *brname = NULL; if ((error = vnetStartNetwork(nc->vnetconfig, instance->ncnet.vlan, NULL, NULL, NULL, &brname)) != EUCA_OK) { LOGERROR("[%s] start network failed for instance, terminating it\n", instance->instanceId); EUCA_FREE(brname); goto failed_dest; } euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName)); EUCA_FREE(brname); // TODO: move stuff in startup_thread() into a function? set_instance_params(instance); if ((error = create_instance_backing(instance, TRUE)) // create files that back the disks || (error = gen_instance_xml(instance)) // create euca-specific instance XML file || (error = gen_libvirt_instance_xml(instance))) { // transform euca-specific XML into libvirt XML LOGERROR("[%s] failed to prepare images for migrating instance (error=%d)\n", instance->instanceId, error); goto failed_dest; } // attach any volumes for (int v = 0; v < EUCA_MAX_VOLUMES; v++) { ncVolume *volume = &instance->volumes[v]; if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching LOGDEBUG("[%s] volumes [%d] = '%s'\n", instance->instanceId, v, volume->stateName); // TODO: factor what the following out of here and doAttachVolume() in handlers_default.c int have_remote_device = 0; char *xml = NULL; char *remoteDevStr = NULL; char scUrl[512]; char localDevReal[32], localDevTag[256], remoteDevReal[132]; char *tagBuf = localDevTag; ebs_volume_data *vol_data = NULL; ret = convert_dev_names(volume->localDev, localDevReal, tagBuf); if (ret) goto unroll; //Do the ebs connect. LOGTRACE("[%s][%s] Connecting EBS volume to local host\n", instance->instanceId, volume->volumeId); get_service_url("storage", nc, scUrl); if (strlen(scUrl) == 0) { LOGERROR("[%s][%s] Failed to lookup enabled Storage Controller. Cannot attach volume %s\n", instance->instanceId, volume->volumeId, scUrl); have_remote_device = 0; goto unroll; } else { LOGTRACE("[%s][%s] Using SC URL: %s\n", instance->instanceId, volume->volumeId, scUrl); } //Do the ebs connect. LOGTRACE("[%s][%s] Connecting EBS volume to local host\n", instance->instanceId, volume->volumeId); int rc = connect_ebs_volume(scUrl, volume->attachmentToken, nc->config_use_ws_sec, nc->config_sc_policy_file, nc->ip, nc->iqn, &remoteDevStr, &vol_data); if (rc) { LOGERROR("Error connecting ebs volume %s\n", volume->attachmentToken); have_remote_device = 0; ret = EUCA_ERROR; goto unroll; } // update the volume struct with connection string obtained from SC euca_strncpy(volume->connectionString, vol_data->connect_string, sizeof(volume->connectionString)); if (!remoteDevStr || !strstr(remoteDevStr, "/dev")) { LOGERROR("[%s][%s] failed to connect to iscsi target\n", instance->instanceId, volume->volumeId); remoteDevReal[0] = '\0'; } else { LOGDEBUG("[%s][%s] attached iSCSI target of host device '%s'\n", instance->instanceId, volume->volumeId, remoteDevStr); snprintf(remoteDevReal, sizeof(remoteDevReal), "%s", remoteDevStr); have_remote_device = 1; } EUCA_FREE(remoteDevStr); // something went wrong above, abort if (!have_remote_device) { goto unroll; } // make sure there is a block device if (check_block(remoteDevReal)) { LOGERROR("[%s][%s] cannot verify that host device '%s' is available for hypervisor attach\n", instance->instanceId, volume->volumeId, remoteDevReal); goto unroll; } // generate XML for libvirt attachment request if (gen_volume_xml(volume->volumeId, instance, localDevReal, remoteDevReal) // creates vol-XXX.xml || gen_libvirt_volume_xml(volume->volumeId, instance)) { // creates vol-XXX-libvirt.xml via XSLT transform LOGERROR("[%s][%s] could not produce attach device xml\n", instance->instanceId, volume->volumeId); goto unroll; } // invoke hooks char path[EUCA_MAX_PATH]; char lpath[EUCA_MAX_PATH]; snprintf(path, sizeof(path), EUCALYPTUS_VOLUME_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX.xml snprintf(lpath, sizeof(lpath), EUCALYPTUS_VOLUME_LIBVIRT_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX-libvirt.xml if (call_hooks(NC_EVENT_PRE_ATTACH, lpath)) { LOGERROR("[%s][%s] cancelled volume attachment via hooks\n", instance->instanceId, volume->volumeId); goto unroll; } // read in libvirt XML, which may have been modified by the hook above if ((xml = file2str(lpath)) == NULL) { LOGERROR("[%s][%s] failed to read volume XML from %s\n", instance->instanceId, volume->volumeId, lpath); goto unroll; } continue; unroll: ret = EUCA_ERROR; // TODO: unroll all volume attachments goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; instance->bootTime = time(NULL); // otherwise nc_state.booting_cleanup_threshold will kick in change_state(instance, BOOTING); // not STAGING, since in that mode we don't poll hypervisor for info LOGINFO("[%s] migration destination ready %s > %s\n", instance->instanceId, instance->migration_src, instance->migration_dst); save_instance_struct(instance); error = add_instance(&global_instances, instance); copy_instances(); sem_v(inst_sem); if (error) { if (error == EUCA_DUPLICATE_ERROR) { LOGINFO("[%s] instance struct already exists (from previous migration?), deleting and re-adding...\n", instance->instanceId); error = remove_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (remove) instance struct, failing...\n", instance->instanceId); goto failed_dest; } error = add_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (add) instance struct, failing...\n", instance->instanceId); goto failed_dest; } } else { LOGERROR("[%s] could not add instance struct, failing...\n", instance->instanceId); goto failed_dest; } } continue; failed_dest: sem_p(inst_sem); // Just making sure... if (instance != NULL) { LOGERROR("[%s] setting instance to Teardown(cleaning) after destination failure to prepare for migration\n", instance->instanceId); // Set state to Teardown(cleaning) so source won't wait until timeout to roll back. instance->migration_state = MIGRATION_CLEANING; instance->terminationTime = time(NULL); change_state(instance, TEARDOWN); save_instance_struct(instance); add_instance(&global_instances, instance); // OK if this fails--that should mean it's already been added. copy_instances(); } // If no remaining incoming or pending migrations, deauthorize all clients. // TO-DO: Consolidate with similar sequence in handlers.c into a utility function? if (!incoming_migrations_in_progress) { int incoming_migrations_pending = 0; LOGINFO("[%s] no remaining active incoming migrations -- checking to see if there are any pending migrations\n", instance->instanceId); bunchOfInstances *head = NULL; for (head = global_instances; head; head = head->next) { if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) { LOGINFO("[%s] is pending migration, state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId, migration_state_names[head->instance->migration_state]); incoming_migrations_pending++; } } // TO-DO: Add belt and suspenders? if (!incoming_migrations_pending) { LOGINFO("[%s] no remaining incoming or pending migrations -- deauthorizing all migration client keys\n", instance->instanceId); authorize_migration_keys("-D -r", NULL, NULL, NULL, FALSE); } } sem_v(inst_sem); // Set to generic EUCA_ERROR unless already set to a more-specific error. if (ret == EUCA_OK) { ret = EUCA_ERROR; } } else { LOGERROR("unexpected migration request (node %s is neither source nor destination)\n", pMeta->nodeName); ret = EUCA_ERROR; } } return ret; }