int DC_submitWU(DC_Workunit *wu) { pid_t pid; char *old_path, *new_path; if (wu->state != DC_WU_READY) { DC_log(LOG_ERR, "Only WUs in READY state can be submitted"); return DC_ERR_BADPARAM; } /* copy the exec into the workdir */ old_path = g_strdup_printf(/*"%s%c%s", wu->client_path, G_DIR_SEPARATOR, wu->client_name*/ "%s", _DC_wu_cfg(wu, cfg_executable)); new_path = g_strdup_printf("%s%c%s", wu->workdir, G_DIR_SEPARATOR, wu->client_name); if (link(old_path, new_path)) { DC_log(LOG_ERR, "Failed to link %s to %s: %s", old_path, new_path, strerror(errno)); return DC_ERR_BADPARAM; } g_free(old_path); g_free(new_path); if((pid=fork())<0) { DC_log(LOG_ERR,"Cannot fork!\nerrno=%d %s\n", errno, strerror(errno)); return DC_ERR_BADPARAM; } if(pid==0) /* client process */ { /* change into working directory of the WU */ if (chdir(wu->workdir)) { DC_log(LOG_ERR,"Cannot cd into %s\nerrno=%d %s\n", wu->workdir, errno, strerror(errno)); return DC_ERR_BADPARAM; } /* hook up stdout and stderr to specially-named files */ freopen(STDOUT_LABEL, "a", stdout); freopen(STDERR_LABEL, "a", stderr); /* execute the client */ DC_log(LOG_INFO, "Work unit : %s executes: %s", wu->name, wu->client_name); execv(wu->client_name, wu->argv); DC_log(LOG_ERR, "Cannot execute. Errno=%d %s\n", errno, strerror(errno)); exit(1); } wu->pid = pid; wu->state = DC_WU_RUNNING; return DC_OK; }
/* Sends a message to a running work unit. */ int DC_sendWUMessage(DC_Workunit *wu, const char *message) { GString *dn; int ret; /*if (!_DC_wu_check(wu)) return(DC_ERR_UNKNOWN_WU);*/ DC_log(LOG_DEBUG, "DC_sendWUMessage(%p-\"%s\", %s)", wu, wu->name, message); dn= g_string_new(wu->workdir); g_string_append(dn, "/"); g_string_append(dn, _DC_wu_cfg(wu, cfg_master_message_box)); ret= _DC_create_message(dn->str, (char*)_DCAPI_MSG_MESSAGE, message, NULL); g_string_free(dn, TRUE); return(ret); }
/* Resumes computation of a previously suspended work unit. */ int DC_resumeWU(DC_Workunit *wu) { int ret; char *id; GString *dn; if (!_DC_wu_check(wu)) return(DC_ERR_UNKNOWN_WU); DC_log(LOG_DEBUG, "DC_resumeWU(%p-\"%s\")", wu, wu->data.name); if (wu->data.state != DC_WU_SUSPENDED) { DC_log(LOG_NOTICE, "Can not resume a non-suspended wu"); return(DC_ERR_INTERNAL); } dn= g_string_new(wu->data.workdir); g_string_append(dn, "/"); g_string_append(dn, _DC_wu_cfg(wu, cfg_management_box)); _DC_create_message(dn->str, _DCAPI_MSG_COMMAND, _DCAPI_CMD_RESUME, NULL); g_string_free(dn, TRUE); ret= _DC_start_condor_job(wu); if (ret == 0) { _DC_wu_update_condor_events(wu); while (wu->condor_events->len == 0) { sleep(1); _DC_wu_update_condor_events(wu); } DC_log(LOG_DEBUG, "DC_ResumeWU..."); _DC_wu_set_state(wu, DC_WU_RUNNING); id= DC_getWUId(wu); DC_log(LOG_INFO, "Condor id of wu's job: %s", id); g_free(id); } return(ret); }
static int _DC_start_condor_job(DC_Workunit *wu) { int ret; GString *cmd; gchar *act, *act2; cmd= g_string_new("condor_submit"); cmd= g_string_append(cmd, " "); cmd= g_string_append(cmd, _DC_wu_cfg(wu, cfg_submit_file)); act= getcwd(NULL, 0); chdir(wu->data.workdir); act2= getcwd(NULL, 0); DC_log(LOG_DEBUG, "Calling \"%s\" in %s...", cmd->str, act2); ret= system(cmd->str); DC_log(LOG_DEBUG, "Returned %d", ret); chdir(act); g_free(act); g_free(act2); g_string_free(cmd, TRUE); return ret; }
/* Temporarily suspends the execution of a work unit. */ int DC_suspendWU(DC_Workunit *wu) { GString *dn; if (!_DC_wu_check(wu)) return(DC_ERR_UNKNOWN_WU); DC_log(LOG_DEBUG, "DC_suspendWU(%p-\"%s\")", wu, wu->data.name); if (wu->data.state != DC_WU_RUNNING) { DC_log(LOG_NOTICE, "Can not suspend a non-running wu"); return(DC_ERR_INTERNAL); } dn= g_string_new(wu->data.workdir); g_string_append(dn, "/"); g_string_append(dn, _DC_wu_cfg(wu, cfg_management_box)); _DC_create_message(dn->str, _DCAPI_MSG_COMMAND, _DCAPI_CMD_SUSPEND, NULL); g_string_free(dn, TRUE); /*wu->asked_to_suspend= TRUE;*/ return(DC_OK); }
void DC_destroyWU(DC_Workunit *wu) { char *path; int leave= strtol(_DC_wu_cfg(wu, cfg_leave_files), 0, 0); if (!wu) return; if (_DC_wu_table) g_hash_table_remove(_DC_wu_table, wu->name); switch (wu->state) { case DC_WU_RUNNING: /* XXX Abort the work unit */ break; default: break; } while (wu->input_files && !leave) { DC_PhysicalFile *file = (DC_PhysicalFile *)wu->input_files->data; unlink(file->path); wu->input_files = g_list_delete_link(wu->input_files, wu->input_files); _DC_destroyPhysicalFile(file); } while (wu->output_files && !leave) { char *name = (char *)wu->output_files->data; char *file = g_strdup_printf("%s%c%s", wu->workdir, G_DIR_SEPARATOR, name); unlink(file); g_free(file); g_free(wu->output_files->data); wu->output_files = g_list_delete_link(wu->output_files, wu->output_files); } /* checkpoint file */ path = g_strdup_printf("%s%c%s", wu->workdir, G_DIR_SEPARATOR, CKPT_LABEL); if (!leave) unlink(path); g_free(path); /* standard output file */ path = g_strdup_printf("%s%c%s", wu->workdir, G_DIR_SEPARATOR, STDOUT_LABEL); if (!leave) unlink(path); g_free(path); /* standard error file */ path = g_strdup_printf("%s%c%s", wu->workdir, G_DIR_SEPARATOR, STDERR_LABEL); if (!leave) unlink(path); g_free(path); if (wu->client_name && !leave) { char *path = g_strdup_printf("%s%c%s", wu->workdir, G_DIR_SEPARATOR, wu->client_name); unlink(path); g_free(path); g_free(wu->client_name); /*g_free(wu->client_path);*/ } if (wu->workdir && !leave) { const char *name; GDir *dir; int ret; dir = g_dir_open(wu->workdir, 0, NULL); /* The work directory should not contain any extra files, but * just in case */ while (dir && (name = g_dir_read_name(dir))) { GString *str = g_string_new(wu->workdir); g_string_append_c(str, G_DIR_SEPARATOR); g_string_append(str, name); DC_log(LOG_INFO, "Removing unknown file %s", str->str); unlink(str->str); g_string_free(str, TRUE); } if (dir) g_dir_close(dir); ret = rmdir(wu->workdir); if (ret) DC_log(LOG_WARNING, "Failed to remove WU working " "directory %s: %s", wu->workdir, strerror(errno)); g_free(wu->workdir); } g_free(wu->uuid_str); g_strfreev(wu->argv); g_free(wu->tag); g_free(wu->name); g_free(wu); }