//! //! Save the instance structure data in the instance.checkpoint file under the instance's //! work blobstore path. //! //! @param[in] instance pointer to the instance to save //! //! @return EUCA_OK on success of the following error codes: //! \li EUCA_INVALID_ERROR: if any parameter does not meet the preconditions //! \li EUCA_PERMISSION_ERROR: if we fail to open/create the checkpoint file //! \li EUCA_IO_ERROR: if we fail to save the instance in the checkpoint file //! //! @pre The instance variable must not be NULL. //! //! @post On success, the checkpoint file is created and contains the instance information //! int save_instance_struct(const ncInstance * instance) { if (instance->state == TEARDOWN) { return EUCA_OK; // instance is without disk state => nowhere to write metadata } else { return gen_instance_xml(instance); } }
//! //! Loads an instance structure data from the instance.xml file under the instance's //! work blobstore path. //! //! @param[in] instanceId the instance identifier string (i-XXXXXXXX) //! //! @return A pointer to the instance structure if successful or otherwise NULL. //! //! @pre The instanceId parameter must not be NULL. //! //! @post On success, a newly allocated pointer to the instance is returned where the stateCode //! is set to NO_STATE. //! ncInstance *load_instance_struct(const char *instanceId) { DIR *insts_dir = NULL; char tmp_path[EUCA_MAX_PATH] = ""; char user_paths[EUCA_MAX_PATH] = ""; char checkpoint_path[EUCA_MAX_PATH] = ""; ncInstance *instance = NULL; struct dirent *dir_entry = NULL; struct stat mystat = { 0 }; // Allocate memory for our instance if ((instance = EUCA_ZALLOC(1, sizeof(ncInstance))) == NULL) { LOGERROR("out of memory (for instance struct)\n"); return (NULL); } // We know the instance indentifier euca_strncpy(instance->instanceId, instanceId, sizeof(instance->instanceId)); // we don't know userId, so we'll look for instanceId in every user's // directory (we're assuming that instanceIds are unique in the system) set_path(user_paths, sizeof(user_paths), NULL, NULL); if ((insts_dir = opendir(user_paths)) == NULL) { LOGERROR("failed to open %s\n", user_paths); goto free; } // Scan every path under the user path for one that conaints our instance while ((dir_entry = readdir(insts_dir)) != NULL) { snprintf(tmp_path, sizeof(tmp_path), "%s/%s/%s", user_paths, dir_entry->d_name, instance->instanceId); if (stat(tmp_path, &mystat) == 0) { // found it. Now save our user identifier euca_strncpy(instance->userId, dir_entry->d_name, sizeof(instance->userId)); break; } } // Done with the directory closedir(insts_dir); insts_dir = NULL; // Did we really find one? if (strlen(instance->userId) < 1) { LOGERROR("didn't find instance %s\n", instance->instanceId); goto free; } // set various instance-directory-relative paths in the instance struct set_instance_paths(instance); // Check if there is a binary checkpoint file, used by versions up to 3.3, // and load metadata from it (as part of a "warm" upgrade from 3.3.0 and 3.3.1). set_path(checkpoint_path, sizeof(checkpoint_path), instance, "instance.checkpoint"); set_path(instance->xmlFilePath, sizeof(instance->xmlFilePath), instance, INSTANCE_FILE_NAME); if (check_file(checkpoint_path) == 0) { ncInstance33 instance33; { // read in the checkpoint int fd = open(checkpoint_path, O_RDONLY); if (fd < 0) { LOGERROR("failed to load metadata for %s from %s: %s\n", instance->instanceId, checkpoint_path, strerror(errno)); goto free; } size_t meta_size = (size_t) sizeof(ncInstance33); assert(meta_size <= SSIZE_MAX); // beyond that read() behavior is unspecified ssize_t bytes_read = read(fd, &instance33, meta_size); close(fd); if (bytes_read < meta_size) { LOGERROR("metadata checkpoint for %s (%ld bytes) in %s is too small (< %ld)\n", instance->instanceId, bytes_read, checkpoint_path, meta_size); goto free; } } // Convert the 3.3 struct into the current struct. // Currently, a copy is sufficient, but if ncInstance // ever changes so that its beginning differs from ncInstanc33, // we may have to write something more elaborate or to break // the ability to upgrade from 3.3. We attempt to detect such a // change with the following if-statement, which compares offsets // in the structs of the last member in the 3.3 version. if (((unsigned long)&(instance->last_stat) - (unsigned long)instance) != ((unsigned long)&(instance33.last_stat) - (unsigned long)&instance33)) { LOGERROR("BUG: upgrade from v3.3 is not possible due to changes to instance struct\n"); goto free; } memcpy(instance, &instance33, sizeof(ncInstance33)); LOGINFO("[%s] upgraded instance checkpoint from v3.3\n", instance->instanceId); } else { // no binary checkpoint, so we expect an XML-formatted checkpoint if (read_instance_xml(instance->xmlFilePath, instance) != EUCA_OK) { LOGERROR("failed to read instance XML\n"); goto free; } } // Reset some fields for safety since they would now be wrong instance->stateCode = NO_STATE; instance->params.root = NULL; instance->params.kernel = NULL; instance->params.ramdisk = NULL; instance->params.swap = NULL; instance->params.ephemeral0 = NULL; // fix up the pointers vbr_parse(&(instance->params), NULL); // perform any upgrade-related manipulations to bring the struct up to date // save the struct back to disk after the upgrade routine had a chance to modify it if (gen_instance_xml(instance) != EUCA_OK) { LOGERROR("failed to create instance XML in %s\n", instance->xmlFilePath); goto free; } // remove the binary checkpoint because it is no longer needed and not used past 3.3 unlink(checkpoint_path); return (instance); free: EUCA_FREE(instance); return (NULL); }
//! //! Save the instance structure data in the instance.checkpoint file under the instance's //! work blobstore path. //! //! @param[in] instance pointer to the instance to save //! //! @return EUCA_OK on success of the following error codes: //! \li EUCA_INVALID_ERROR: if any parameter does not meet the preconditions //! \li EUCA_PERMISSION_ERROR: if we fail to open/create the checkpoint file //! \li EUCA_IO_ERROR: if we fail to save the instance in the checkpoint file //! //! @pre The instance variable must not be NULL. //! //! @post On success, the checkpoint file is created and contains the instance information //! int save_instance_struct(const ncInstance * instance) { return gen_instance_xml(instance); }
//! //! Handles the instance migration request. //! //! @param[in] nc a pointer to the node controller (NC) state //! @param[in] pMeta a pointer to the node controller (NC) metadata structure //! @param[in] instances metadata for the instance to migrate to destination //! @param[in] instancesLen number of instances in the instance list //! @param[in] action IP of the destination Node Controller //! @param[in] credentials credentials that enable the migration //! //! @return EUCA_OK on success or EUCA_*ERROR on failure //! //! @pre //! //! @post static int doMigrateInstances(struct nc_state_t *nc, ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials, char ** resourceLocations, int resourceLocationsLen) { int ret = EUCA_OK; int credentials_prepared = 0; char *libvirt_xml_modified = NULL; if (instancesLen <= 0) { LOGERROR("called with invalid instancesLen (%d)\n", instancesLen); pMeta->replyString = strdup("internal error (invalid instancesLen)"); return (EUCA_INVALID_ERROR); } LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen); for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { LOGDEBUG("verifying instance # %d...\n", inst_idx); if (instances[inst_idx]) { ncInstance *instance_idx = instances[inst_idx]; LOGDEBUG("[%s] proposed migration action '%s' (%s > %s) [creds=%s]\n", SP(instance_idx->instanceId), SP(action), SP(instance_idx->migration_src), SP(instance_idx->migration_dst), (instance_idx->migration_credentials == NULL) ? "UNSET" : "present"); } else { pMeta->replyString = strdup("internal error (instance count mismatch)"); LOGERROR("Mismatch between migration instance count (%d) and length of instance list\n", instancesLen); return (EUCA_ERROR); } } // TO-DO: Optimize the location of this loop, placing it inside various conditionals below? for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { ncInstance *instance_req = instances[inst_idx]; char *sourceNodeName = instance_req->migration_src; char *destNodeName = instance_req->migration_dst; LOGDEBUG("[%s] processing instance # %d (%s > %s)\n", instance_req->instanceId, inst_idx, instance_req->migration_src, instance_req->migration_dst); // this is a call to the source of migration if (!strcmp(pMeta->nodeName, sourceNodeName)) { // locate the instance structure ncInstance *instance; sem_p(inst_sem); { instance = find_instance(&global_instances, instance_req->instanceId); } sem_v(inst_sem); if (instance == NULL) { LOGERROR("[%s] cannot find instance\n", instance_req->instanceId); pMeta->replyString = strdup("failed to locate instance to migrate"); return (EUCA_NOT_FOUND_ERROR); } if (strcmp(action, "prepare") == 0) { sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); instance->migrationTime = time(NULL); update_resource_locations(&(instance->params), resourceLocations, resourceLocationsLen); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // Establish migration-credential keys if this is the first instance preparation for this host. LOGINFO("[%s] migration source preparing %s > %s [creds=%s]\n", SP(instance->instanceId), SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); if (!credentials_prepared) { if (generate_migration_keys(sourceNodeName, credentials, TRUE, instance) != EUCA_OK) { pMeta->replyString = strdup("internal error (migration credentials generation failed)"); return (EUCA_SYSTEM_ERROR); } else { credentials_prepared++; } } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; save_instance_struct(instance); copy_instances(); sem_v(inst_sem); } else if (strcmp(action, "commit") == 0) { sem_p(inst_sem); if (instance->migration_state == MIGRATION_IN_PROGRESS) { LOGWARN("[%s] duplicate request to migration source to initiate %s > %s (already migrating)\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_v(inst_sem); return (EUCA_DUPLICATE_ERROR); } else if (instance->migration_state != MIGRATION_READY) { LOGERROR("[%s] request to commit migration %s > %s when source migration_state='%s' (not 'ready')\n", instance->instanceId, SP(sourceNodeName), SP(destNodeName), migration_state_names[instance->migration_state]); sem_v(inst_sem); return (EUCA_UNSUPPORTED_ERROR); } instance->migration_state = MIGRATION_IN_PROGRESS; outgoing_migrations_in_progress++; LOGINFO("[%s] migration source initiating %s > %s [creds=%s] (1 of %d active outgoing migrations)\n", instance->instanceId, instance->migration_src, instance->migration_dst, (instance->migration_credentials == NULL) ? "UNSET" : "present", outgoing_migrations_in_progress); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // since migration may take a while, we do them in a thread pthread_t tcb = { 0 }; if (pthread_create(&tcb, NULL, migrating_thread, (void *)instance)) { LOGERROR("[%s] failed to spawn a migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } set_corrid_pthread(get_corrid() != NULL ? get_corrid()->correlation_id : NULL, tcb); if (pthread_detach(tcb)) { LOGERROR("[%s] failed to detach the migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } } else if (strcmp(action, "rollback") == 0) { if ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING)) { LOGINFO("[%s] rolling back migration (%s > %s) on source\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_p(inst_sem); migration_rollback(instance); sem_v(inst_sem); } else { LOGINFO("[%s] ignoring request to roll back migration on source with instance in state %s(%s) -- duplicate rollback request?\n", instance->instanceId, instance->stateName, migration_state_names[instance->migration_state]); } } else { LOGERROR("[%s] action '%s' is not valid\n", instance->instanceId, action); return (EUCA_INVALID_ERROR); } } else if (!strcmp(pMeta->nodeName, destNodeName)) { // this is a migrate request to destination if (!strcmp(action, "commit")) { LOGERROR("[%s] action '%s' for migration (%s > %s) is not valid on destination node\n", instance_req->instanceId, action, SP(sourceNodeName), SP(destNodeName)); return (EUCA_UNSUPPORTED_ERROR); } else if (!strcmp(action, "rollback")) { LOGINFO("[%s] rolling back migration (%s > %s) on destination\n", instance_req->instanceId, SP(sourceNodeName), SP(destNodeName)); sem_p(inst_sem); { ncInstance *instance = find_instance(&global_instances, instance_req->instanceId); if (instance != NULL) { LOGDEBUG("[%s] marked for cleanup\n", instance->instanceId); change_state(instance, SHUTOFF); instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); } } sem_v(inst_sem); return EUCA_OK; } else if (strcmp(action, "prepare") != 0) { LOGERROR("[%s] action '%s' is not valid or not implemented\n", instance_req->instanceId, action); return (EUCA_INVALID_ERROR); } // Everything from here on is specific to "prepare" on a destination. // allocate a new instance struct ncInstance *instance = clone_instance(instance_req); if (instance == NULL) { LOGERROR("[%s] could not allocate instance struct\n", instance_req->instanceId); goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; instance->migrationTime = time(NULL); //In preparing state, so set migrationTime. euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); update_resource_locations(&(instance->params), resourceLocations, resourceLocationsLen); sem_v(inst_sem); // Establish migration-credential keys. LOGINFO("[%s] migration destination preparing %s > %s [creds=%s]\n", instance->instanceId, SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); // First, call config-file modification script to authorize source node. LOGDEBUG("[%s] authorizing migration source node %s\n", instance->instanceId, instance->migration_src); if (authorize_migration_keys("-a", instance->migration_src, instance->migration_credentials, instance, TRUE) != EUCA_OK) { goto failed_dest; } // Then, generate keys and restart libvirtd. if (generate_migration_keys(instance->migration_dst, instance->migration_credentials, TRUE, instance) != EUCA_OK) { goto failed_dest; } int error; //Fix for EUCA-10433, need instance struct in global_instances prior to doing volume ops //The monitor thread will now pick up the instance, so the migrationTime must be set sem_p(inst_sem); save_instance_struct(instance); error = add_instance(&global_instances, instance); copy_instances(); sem_v(inst_sem); if (error) { if (error == EUCA_DUPLICATE_ERROR) { LOGINFO("[%s] instance struct already exists (from previous migration?), deleting and re-adding...\n", instance->instanceId); error = remove_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (remove) instance struct, failing...\n", instance->instanceId); goto failed_dest; } error = add_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (add) instance struct, failing...\n", instance->instanceId); goto failed_dest; } } else { LOGERROR("[%s] could not add instance struct, failing...\n", instance->instanceId); goto failed_dest; } } if (vbr_parse(&(instance->params), pMeta) != EUCA_OK) { goto failed_dest; } // set up networking char brname[IF_NAME_LEN] = ""; if (!strcmp(nc->pEucaNet->sMode, NETMODE_MANAGED)) { snprintf(brname, IF_NAME_LEN, "%s", instance->groupIds[0]); } else { snprintf(brname, IF_NAME_LEN, "%s", nc->pEucaNet->sBridgeDevice); } euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName)); // TODO: move stuff in startup_thread() into a function? set_instance_params(instance); if ((error = create_instance_backing(instance, TRUE)) // create files that back the disks || (error = gen_instance_xml(instance)) // create euca-specific instance XML file || (error = gen_libvirt_instance_xml(instance))) { // transform euca-specific XML into libvirt XML LOGERROR("[%s] failed to prepare images for migrating instance (error=%d)\n", instance->instanceId, error); goto failed_dest; } // attach any volumes for (int v = 0; v < EUCA_MAX_VOLUMES; v++) { ncVolume *volume = &instance->volumes[v]; if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching LOGDEBUG("[%s] volumes [%d] = '%s'\n", instance->instanceId, v, volume->stateName); ebs_volume_data *vol_data = NULL; char *libvirt_xml = NULL; char serial[128]; char bus[16]; set_serial_and_bus(volume->volumeId, volume->devName, serial, sizeof(serial), bus, sizeof(bus)); if ((ret = connect_ebs(volume->devName, serial, bus, nc, instance->instanceId, volume->volumeId, volume->attachmentToken, &libvirt_xml, &vol_data)) != EUCA_OK) { goto unroll; } // update the volume struct with connection string obtained from SC euca_strncpy(volume->connectionString, vol_data->connect_string, sizeof(volume->connectionString)); // save volume info into vol-XXX-libvirt.xml for future detach if (create_vol_xml(instance->instanceId, volume->volumeId, libvirt_xml, &libvirt_xml_modified) != EUCA_OK) { goto unroll; } continue; unroll: ret = EUCA_ERROR; // @TODO: unroll all previous ones // for (int uv = v - 1; uv >= 0; uv--) { // disconnect_ebs(nc, instance->instanceId, volume->volumeId, ) // } goto failed_dest; } // build any secondary network interface xml files for (int w=0; w < EUCA_MAX_NICS; w++) { if (strlen(instance->secNetCfgs[w].interfaceId) == 0) continue; gen_libvirt_nic_xml(instance->instancePath, instance->secNetCfgs[w].interfaceId); } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; instance->migrationTime = 0; //Reset the timer, to ensure monitoring thread handles this properly. This is required when setting BOOTING state instance->bootTime = time(NULL); // otherwise nc_state.booting_cleanup_threshold will kick in change_state(instance, BOOTING); // not STAGING, since in that mode we don't poll hypervisor for info LOGINFO("[%s] migration destination ready %s > %s\n", instance->instanceId, instance->migration_src, instance->migration_dst); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); continue; failed_dest: sem_p(inst_sem); // Just making sure... if (instance != NULL) { LOGERROR("[%s] setting instance to Teardown(cleaning) after destination failure to prepare for migration\n", instance->instanceId); // Set state to Teardown(cleaning) so source won't wait until timeout to roll back. instance->migration_state = MIGRATION_CLEANING; instance->terminationTime = time(NULL); change_state(instance, TEARDOWN); save_instance_struct(instance); add_instance(&global_instances, instance); // OK if this fails--that should mean it's already been added. copy_instances(); } // If no remaining incoming or pending migrations, deauthorize all clients. // TO-DO: Consolidate with similar sequence in handlers.c into a utility function? if (!incoming_migrations_in_progress) { int incoming_migrations_pending = 0; LOGINFO("[%s] no remaining active incoming migrations -- checking to see if there are any pending migrations\n", instance->instanceId); bunchOfInstances *head = NULL; for (head = global_instances; head; head = head->next) { if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) { LOGINFO("[%s] is pending migration, state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId, migration_state_names[head->instance->migration_state]); incoming_migrations_pending++; } } // TO-DO: Add belt and suspenders? if (!incoming_migrations_pending) { LOGINFO("[%s] no remaining incoming or pending migrations -- deauthorizing all migration client keys\n", instance->instanceId); authorize_migration_keys("-D -r", NULL, NULL, NULL, FALSE); } } sem_v(inst_sem); // Set to generic EUCA_ERROR unless already set to a more-specific error. if (ret == EUCA_OK) { ret = EUCA_ERROR; } } else { LOGERROR("unexpected migration request (node %s is neither source nor destination)\n", pMeta->nodeName); ret = EUCA_ERROR; } } return ret; }
//! //! Handles the instance migration request. //! //! @param[in] nc a pointer to the node controller (NC) state //! @param[in] pMeta a pointer to the node controller (NC) metadata structure //! @param[in] instances metadata for the instance to migrate to destination //! @param[in] instancesLen number of instances in the instance list //! @param[in] action IP of the destination Node Controller //! @param[in] credentials credentials that enable the migration //! //! @return EUCA_OK on success or EUCA_*ERROR on failure //! //! @pre //! //! @post static int doMigrateInstances(struct nc_state_t *nc, ncMetadata * pMeta, ncInstance ** instances, int instancesLen, char *action, char *credentials) { int ret = EUCA_OK; int credentials_prepared = 0; if (instancesLen <= 0) { LOGERROR("called with invalid instancesLen (%d)\n", instancesLen); pMeta->replyString = strdup("internal error (invalid instancesLen)"); return (EUCA_INVALID_ERROR); } LOGDEBUG("verifying %d instance[s] for migration...\n", instancesLen); for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { LOGDEBUG("verifying instance # %d...\n", inst_idx); if (instances[inst_idx]) { ncInstance *instance_idx = instances[inst_idx]; LOGDEBUG("[%s] proposed migration action '%s' (%s > %s) [creds=%s]\n", SP(instance_idx->instanceId), SP(action), SP(instance_idx->migration_src), SP(instance_idx->migration_dst), (instance_idx->migration_credentials == NULL) ? "UNSET" : "present"); } else { pMeta->replyString = strdup("internal error (instance count mismatch)"); LOGERROR("Mismatch between migration instance count (%d) and length of instance list\n", instancesLen); return (EUCA_ERROR); } } // TO-DO: Optimize the location of this loop, placing it inside various conditionals below? for (int inst_idx = 0; inst_idx < instancesLen; inst_idx++) { ncInstance *instance_req = instances[inst_idx]; char *sourceNodeName = instance_req->migration_src; char *destNodeName = instance_req->migration_dst; LOGDEBUG("[%s] processing instance # %d (%s > %s)\n", instance_req->instanceId, inst_idx, instance_req->migration_src, instance_req->migration_dst); // this is a call to the source of migration if (!strcmp(pMeta->nodeName, sourceNodeName)) { // locate the instance structure ncInstance *instance; sem_p(inst_sem); { instance = find_instance(&global_instances, instance_req->instanceId); } sem_v(inst_sem); if (instance == NULL) { LOGERROR("[%s] cannot find instance\n", instance_req->instanceId); pMeta->replyString = strdup("failed to locate instance to migrate"); return (EUCA_NOT_FOUND_ERROR); } if (strcmp(action, "prepare") == 0) { sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); instance->migrationTime = time(NULL); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // Establish migration-credential keys if this is the first instance preparation for this host. LOGINFO("[%s] migration source preparing %s > %s [creds=%s]\n", SP(instance->instanceId), SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); if (!credentials_prepared) { if (generate_migration_keys(sourceNodeName, credentials, TRUE, instance) != EUCA_OK) { pMeta->replyString = strdup("internal error (migration credentials generation failed)"); return (EUCA_SYSTEM_ERROR); } else { credentials_prepared++; } } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; save_instance_struct(instance); copy_instances(); sem_v(inst_sem); } else if (strcmp(action, "commit") == 0) { sem_p(inst_sem); if (instance->migration_state == MIGRATION_IN_PROGRESS) { LOGWARN("[%s] duplicate request to migration source to initiate %s > %s (already migrating)\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_v(inst_sem); return (EUCA_DUPLICATE_ERROR); } else if (instance->migration_state != MIGRATION_READY) { LOGERROR("[%s] request to commit migration %s > %s when source migration_state='%s' (not 'ready')\n", instance->instanceId, SP(sourceNodeName), SP(destNodeName), migration_state_names[instance->migration_state]); sem_v(inst_sem); return (EUCA_UNSUPPORTED_ERROR); } instance->migration_state = MIGRATION_IN_PROGRESS; outgoing_migrations_in_progress++; LOGINFO("[%s] migration source initiating %s > %s [creds=%s] (1 of %d active outgoing migrations)\n", instance->instanceId, instance->migration_src, instance->migration_dst, (instance->migration_credentials == NULL) ? "UNSET" : "present", outgoing_migrations_in_progress); save_instance_struct(instance); copy_instances(); sem_v(inst_sem); // since migration may take a while, we do them in a thread pthread_t tcb = { 0 }; if (pthread_create(&tcb, NULL, migrating_thread, (void *)instance)) { LOGERROR("[%s] failed to spawn a migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } set_corrid_pthread( get_corrid()!=NULL ? get_corrid()->correlation_id : NULL , tcb); if (pthread_detach(tcb)) { LOGERROR("[%s] failed to detach the migration thread\n", instance->instanceId); return (EUCA_THREAD_ERROR); } } else if (strcmp(action, "rollback") == 0) { if ((instance->migration_state == MIGRATION_READY) || (instance->migration_state == MIGRATION_PREPARING)) { LOGINFO("[%s] rolling back migration (%s > %s) on source\n", instance->instanceId, instance->migration_src, instance->migration_dst); sem_p(inst_sem); migration_rollback(instance); sem_v(inst_sem); } else { LOGINFO("[%s] ignoring request to roll back migration on source with instance in state %s(%s) -- duplicate rollback request?\n", instance->instanceId, instance->stateName, migration_state_names[instance->migration_state]); } } else { LOGERROR("[%s] action '%s' is not valid\n", instance->instanceId, action); return (EUCA_INVALID_ERROR); } } else if (!strcmp(pMeta->nodeName, destNodeName)) { // this is a migrate request to destination if (!strcmp(action, "commit")) { LOGERROR("[%s] action '%s' for migration (%s > %s) is not valid on destination node\n", instance_req->instanceId, action, SP(sourceNodeName), SP(destNodeName)); return (EUCA_UNSUPPORTED_ERROR); } else if (!strcmp(action, "rollback")) { LOGINFO("[%s] rolling back migration (%s > %s) on destination\n", instance_req->instanceId, SP(sourceNodeName), SP(destNodeName)); sem_p(inst_sem); { ncInstance *instance = find_instance(&global_instances, instance_req->instanceId); if (instance != NULL) { LOGDEBUG("[%s] marked for cleanup\n", instance->instanceId); change_state(instance, SHUTOFF); instance->migration_state = MIGRATION_CLEANING; save_instance_struct(instance); } } sem_v(inst_sem); return EUCA_OK; } else if (strcmp(action, "prepare") != 0) { LOGERROR("[%s] action '%s' is not valid or not implemented\n", instance_req->instanceId, action); return (EUCA_INVALID_ERROR); } // Everything from here on is specific to "prepare" on a destination. // allocate a new instance struct ncInstance *instance = clone_instance(instance_req); if (instance == NULL) { LOGERROR("[%s] could not allocate instance struct\n", instance_req->instanceId); goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_PREPARING; euca_strncpy(instance->migration_src, sourceNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_dst, destNodeName, HOSTNAME_SIZE); euca_strncpy(instance->migration_credentials, credentials, CREDENTIAL_SIZE); save_instance_struct(instance); sem_v(inst_sem); // Establish migration-credential keys. LOGINFO("[%s] migration destination preparing %s > %s [creds=%s]\n", instance->instanceId, SP(instance->migration_src), SP(instance->migration_dst), (instance->migration_credentials == NULL) ? "UNSET" : "present"); // First, call config-file modification script to authorize source node. LOGDEBUG("[%s] authorizing migration source node %s\n", instance->instanceId, instance->migration_src); if (authorize_migration_keys("-a", instance->migration_src, instance->migration_credentials, instance, TRUE) != EUCA_OK) { goto failed_dest; } // Then, generate keys and restart libvirtd. if (generate_migration_keys(instance->migration_dst, instance->migration_credentials, TRUE, instance) != EUCA_OK) { goto failed_dest; } int error; if (vbr_parse(&(instance->params), pMeta) != EUCA_OK) { goto failed_dest; } // set up networking char *brname = NULL; if ((error = vnetStartNetwork(nc->vnetconfig, instance->ncnet.vlan, NULL, NULL, NULL, &brname)) != EUCA_OK) { LOGERROR("[%s] start network failed for instance, terminating it\n", instance->instanceId); EUCA_FREE(brname); goto failed_dest; } euca_strncpy(instance->params.guestNicDeviceName, brname, sizeof(instance->params.guestNicDeviceName)); EUCA_FREE(brname); // TODO: move stuff in startup_thread() into a function? set_instance_params(instance); if ((error = create_instance_backing(instance, TRUE)) // create files that back the disks || (error = gen_instance_xml(instance)) // create euca-specific instance XML file || (error = gen_libvirt_instance_xml(instance))) { // transform euca-specific XML into libvirt XML LOGERROR("[%s] failed to prepare images for migrating instance (error=%d)\n", instance->instanceId, error); goto failed_dest; } // attach any volumes for (int v = 0; v < EUCA_MAX_VOLUMES; v++) { ncVolume *volume = &instance->volumes[v]; if (strcmp(volume->stateName, VOL_STATE_ATTACHED) && strcmp(volume->stateName, VOL_STATE_ATTACHING)) continue; // skip the entry unless attached or attaching LOGDEBUG("[%s] volumes [%d] = '%s'\n", instance->instanceId, v, volume->stateName); // TODO: factor what the following out of here and doAttachVolume() in handlers_default.c int have_remote_device = 0; char *xml = NULL; char *remoteDevStr = NULL; char scUrl[512]; char localDevReal[32], localDevTag[256], remoteDevReal[132]; char *tagBuf = localDevTag; ebs_volume_data *vol_data = NULL; ret = convert_dev_names(volume->localDev, localDevReal, tagBuf); if (ret) goto unroll; //Do the ebs connect. LOGTRACE("[%s][%s] Connecting EBS volume to local host\n", instance->instanceId, volume->volumeId); get_service_url("storage", nc, scUrl); if (strlen(scUrl) == 0) { LOGERROR("[%s][%s] Failed to lookup enabled Storage Controller. Cannot attach volume %s\n", instance->instanceId, volume->volumeId, scUrl); have_remote_device = 0; goto unroll; } else { LOGTRACE("[%s][%s] Using SC URL: %s\n", instance->instanceId, volume->volumeId, scUrl); } //Do the ebs connect. LOGTRACE("[%s][%s] Connecting EBS volume to local host\n", instance->instanceId, volume->volumeId); int rc = connect_ebs_volume(scUrl, volume->attachmentToken, nc->config_use_ws_sec, nc->config_sc_policy_file, nc->ip, nc->iqn, &remoteDevStr, &vol_data); if (rc) { LOGERROR("Error connecting ebs volume %s\n", volume->attachmentToken); have_remote_device = 0; ret = EUCA_ERROR; goto unroll; } // update the volume struct with connection string obtained from SC euca_strncpy(volume->connectionString, vol_data->connect_string, sizeof(volume->connectionString)); if (!remoteDevStr || !strstr(remoteDevStr, "/dev")) { LOGERROR("[%s][%s] failed to connect to iscsi target\n", instance->instanceId, volume->volumeId); remoteDevReal[0] = '\0'; } else { LOGDEBUG("[%s][%s] attached iSCSI target of host device '%s'\n", instance->instanceId, volume->volumeId, remoteDevStr); snprintf(remoteDevReal, sizeof(remoteDevReal), "%s", remoteDevStr); have_remote_device = 1; } EUCA_FREE(remoteDevStr); // something went wrong above, abort if (!have_remote_device) { goto unroll; } // make sure there is a block device if (check_block(remoteDevReal)) { LOGERROR("[%s][%s] cannot verify that host device '%s' is available for hypervisor attach\n", instance->instanceId, volume->volumeId, remoteDevReal); goto unroll; } // generate XML for libvirt attachment request if (gen_volume_xml(volume->volumeId, instance, localDevReal, remoteDevReal) // creates vol-XXX.xml || gen_libvirt_volume_xml(volume->volumeId, instance)) { // creates vol-XXX-libvirt.xml via XSLT transform LOGERROR("[%s][%s] could not produce attach device xml\n", instance->instanceId, volume->volumeId); goto unroll; } // invoke hooks char path[EUCA_MAX_PATH]; char lpath[EUCA_MAX_PATH]; snprintf(path, sizeof(path), EUCALYPTUS_VOLUME_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX.xml snprintf(lpath, sizeof(lpath), EUCALYPTUS_VOLUME_LIBVIRT_XML_PATH_FORMAT, instance->instancePath, volume->volumeId); // vol-XXX-libvirt.xml if (call_hooks(NC_EVENT_PRE_ATTACH, lpath)) { LOGERROR("[%s][%s] cancelled volume attachment via hooks\n", instance->instanceId, volume->volumeId); goto unroll; } // read in libvirt XML, which may have been modified by the hook above if ((xml = file2str(lpath)) == NULL) { LOGERROR("[%s][%s] failed to read volume XML from %s\n", instance->instanceId, volume->volumeId, lpath); goto unroll; } continue; unroll: ret = EUCA_ERROR; // TODO: unroll all volume attachments goto failed_dest; } sem_p(inst_sem); instance->migration_state = MIGRATION_READY; instance->bootTime = time(NULL); // otherwise nc_state.booting_cleanup_threshold will kick in change_state(instance, BOOTING); // not STAGING, since in that mode we don't poll hypervisor for info LOGINFO("[%s] migration destination ready %s > %s\n", instance->instanceId, instance->migration_src, instance->migration_dst); save_instance_struct(instance); error = add_instance(&global_instances, instance); copy_instances(); sem_v(inst_sem); if (error) { if (error == EUCA_DUPLICATE_ERROR) { LOGINFO("[%s] instance struct already exists (from previous migration?), deleting and re-adding...\n", instance->instanceId); error = remove_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (remove) instance struct, failing...\n", instance->instanceId); goto failed_dest; } error = add_instance(&global_instances, instance); if (error) { LOGERROR("[%s] could not replace (add) instance struct, failing...\n", instance->instanceId); goto failed_dest; } } else { LOGERROR("[%s] could not add instance struct, failing...\n", instance->instanceId); goto failed_dest; } } continue; failed_dest: sem_p(inst_sem); // Just making sure... if (instance != NULL) { LOGERROR("[%s] setting instance to Teardown(cleaning) after destination failure to prepare for migration\n", instance->instanceId); // Set state to Teardown(cleaning) so source won't wait until timeout to roll back. instance->migration_state = MIGRATION_CLEANING; instance->terminationTime = time(NULL); change_state(instance, TEARDOWN); save_instance_struct(instance); add_instance(&global_instances, instance); // OK if this fails--that should mean it's already been added. copy_instances(); } // If no remaining incoming or pending migrations, deauthorize all clients. // TO-DO: Consolidate with similar sequence in handlers.c into a utility function? if (!incoming_migrations_in_progress) { int incoming_migrations_pending = 0; LOGINFO("[%s] no remaining active incoming migrations -- checking to see if there are any pending migrations\n", instance->instanceId); bunchOfInstances *head = NULL; for (head = global_instances; head; head = head->next) { if ((head->instance->migration_state == MIGRATION_PREPARING) || (head->instance->migration_state == MIGRATION_READY)) { LOGINFO("[%s] is pending migration, state='%s', deferring deauthorization of migration keys\n", head->instance->instanceId, migration_state_names[head->instance->migration_state]); incoming_migrations_pending++; } } // TO-DO: Add belt and suspenders? if (!incoming_migrations_pending) { LOGINFO("[%s] no remaining incoming or pending migrations -- deauthorizing all migration client keys\n", instance->instanceId); authorize_migration_keys("-D -r", NULL, NULL, NULL, FALSE); } } sem_v(inst_sem); // Set to generic EUCA_ERROR unless already set to a more-specific error. if (ret == EUCA_OK) { ret = EUCA_ERROR; } } else { LOGERROR("unexpected migration request (node %s is neither source nor destination)\n", pMeta->nodeName); ret = EUCA_ERROR; } } return ret; }