static int enumeration_compare(const lEnumeration *what1, const lEnumeration *what2) { int ret; dstring str1 = DSTRING_INIT; dstring str2 = DSTRING_INIT; lWriteWhatToDString(what1, &str1); lWriteWhatToDString(what2, &str2); ret = strcmp(sge_dstring_get_string(&str1), sge_dstring_get_string(&str2)); sge_dstring_free(&str1); sge_dstring_free(&str2); return ret; }
/****** Interactive/qrsh/writeExitCode() *************************************** * * NAME * writeExitCode() -- write exit code of child process to file * * SYNOPSIS * static int writeExitCode(int myExitCode, int programExitCode) * * FUNCTION * If myExitCode != EXIT_SUCCESS, that means, if an error occured in * qrsh_starter, write this exit code to file, * else write the exit code of the child process (programExitCode). * The exit code is written to a file "qrsh_exit_code" in the * directory $TMPDIR. * * INPUTS * myExitCode - status of qrsh_starter * programExitCode - status of the child process * * RESULT * EXIT_SUCCESS, if all actions could be performed, * EXIT_FAILURE, if one of the following errors occured: * - the environment variable TMPDIR cannot be read * - the file $TMPDIR/qrsh_exit_code cannot be written * **************************************************************************** */ static int writeExitCode(int myExitCode, int programExitCode) { int exitCode; char exitCode_str[20]; char *tmpdir = NULL; char *taskid = NULL; int file; char fileName[SGE_PATH_MAX]; if(myExitCode != EXIT_SUCCESS) { exitCode = MAKEEXITSTATUS(myExitCode); } else { exitCode = programExitCode; } if((tmpdir = search_conf_val("qrsh_tmpdir")) == NULL) { qrsh_error(MSG_CONF_NOCONFVALUE_S, "qrsh_tmpdir"); return EXIT_FAILURE; } taskid = get_conf_val("pe_task_id"); if(taskid != NULL) { snprintf(fileName, SGE_PATH_MAX, "%s/qrsh_exit_code.%s", tmpdir, taskid); } else { snprintf(fileName, SGE_PATH_MAX, "%s/qrsh_exit_code", tmpdir); } if((file = SGE_OPEN3(fileName, O_WRONLY | O_APPEND | O_CREAT, 00744)) == -1) { dstring ds = DSTRING_INIT; qrsh_error(MSG_QRSH_STARTER_CANNOTOPENFILE_SS, fileName, sge_strerror(errno, &ds)); sge_dstring_free(&ds); return EXIT_FAILURE; } snprintf(exitCode_str, 20, "%d", exitCode); if (write(file, exitCode_str, strlen(exitCode_str)) != strlen(exitCode_str)) { dstring ds = DSTRING_INIT; qrsh_error(MSG_FILE_CANNOT_WRITE_SS, fileName, sge_strerror(errno, &ds)); sge_dstring_free(&ds); } SGE_CLOSE(file); return EXIT_SUCCESS; }
void test_linux_plpa() { dstring error = DSTRING_INIT; char* topology = NULL; int length = 0; int s, c; struct utsname name; if (uname(&name) != -1) { printf("Your Linux kernel version is: %s\n", name.release); } if (!_has_core_binding(&error)) { printf("Your Linux kernel seems not to offer core binding capabilities for PLPA!\nReason: %s\n", sge_dstring_get_string(&error)); } if (!_has_topology_information()) { printf("No topology information could by retrieved by PLPA!\n"); } else { /* get amount of sockets */ printf("Amount of sockets:\t\t%d\n", get_amount_of_sockets()); /* get amount of cores */ printf("Amount of cores:\t\t%d\n", get_total_amount_of_cores()); /* get topology */ get_topology_linux(&topology, &length); printf("Topology:\t\t\t%s\n", topology); sge_free(&topology); printf("Mapping of logical socket and core numbers to internal\n"); /* for each socket,core pair get the internal processor number */ /* try multi-mapping */ for (s = 0; s < get_amount_of_sockets(); s++) { for (c = 0; c < get_amount_of_cores(s); c++) { int* proc_ids = NULL; int amount = 0; if (get_processor_ids_linux(s, c, &proc_ids, &amount)) { int i = 0; printf("Internal processor ids for socket %5d core %5d: ", s , c); for (i = 0; i < amount; i++) { printf(" %5d", proc_ids[i]); } printf("\n"); sge_free(&proc_ids); } else { printf("Couldn't get processor ids for socket %5d core %5d\n", s, c); } } } } sge_dstring_free(&error); return; }
/****** uti/monitor/sge_monitor_free() ***************************************** * NAME * sge_monitor_free() -- frees the monitoring data structure * * SYNOPSIS * void sge_monitor_free(monitoring_t *monitor) * * FUNCTION * removes the line for the commlib output, and frees memory in the * monitoring structure * * INPUTS * monitoring_t *monitor - monitoring strucutre * * NOTES * MT-NOTE: sge_monitor_free() is MT safe * *******************************************************************************/ void sge_monitor_free(monitoring_t *monitor) { DENTER(GDI_LAYER, "sge_monitor_free"); sge_dstring_free(monitor->output_line1); sge_dstring_free(monitor->output_line2); sge_free(&(monitor->output_line1)); sge_free(&(monitor->output_line2)); sge_free(&(monitor->ext_data)); if(monitor->pos != -1) { sge_mutex_lock("sge_monitor_init", SGE_FUNC, __LINE__, &(Output[monitor->pos].Output_Mutex)); Output[monitor->pos].output = NULL; Output[monitor->pos].name = NULL; Output[monitor->pos].warning_timeout = NO_WARNING; Output[monitor->pos].error_timeout = NO_ERROR; sge_mutex_unlock("sge_monitor_init", SGE_FUNC, __LINE__, &(Output[monitor->pos].Output_Mutex)); } monitor->ext_data_size = 0; monitor->ext_output = NULL; monitor->ext_type = NONE_EXT; monitor->monitor_time = 0; monitor->pos = -1; monitor->output = false; monitor->work_line = NULL; monitor->thread_name = NULL; #if defined(LINUX) || defined(AIX43) || defined(AIX51) || defined(IRIX) || defined(SOLARIS) || defined(HP11) sge_mutex_lock("sge_monitor_status", SGE_FUNC, __LINE__, &global_mutex); if (mallinfo_shlib_handle != NULL) { dlclose(mallinfo_shlib_handle); mallinfo_shlib_handle = NULL; } sge_mutex_unlock("sge_monitor_status", SGE_FUNC, __LINE__, &global_mutex); #endif DEXIT; }
/****** uti/stdlib/sge_setenv() *********************************************** * NAME * sge_setenv() -- Change or add an environment variable * * SYNOPSIS * int sge_setenv(const char *name, const char *value) * * FUNCTION * Change or add an environment variable * * INPUTS * const char *name - variable name * const char *value - new value * * RESULT * int - error state * 1 - success * 0 - error * * SEE ALSO * uti/stdlib/sge_putenv() * uti/stdlib/sge_getenv() * uti/stdio/addenv() * * NOTES * MT-NOTE: sge_setenv() is MT safe *******************************************************************************/ int sge_setenv(const char *name, const char *value) { int ret = 0; if (name != NULL && value != NULL) { dstring variable = DSTRING_INIT; sge_dstring_sprintf(&variable, "%s=%s", name, value); ret = sge_putenv(sge_dstring_get_string(&variable)); sge_dstring_free(&variable); } return ret; }
/****** shepherd/shepconf/shepconf_has_notify_signal() ************************ * NAME * shepconf_has_notify_signal() -- Do we have a notification signal * * SYNOPSIS * int shepconf_has_notify_signal(char *notify_name, int *signal) * * FUNCTION * This function checks if the notification mechanism is enabled. * In this case the function will retuen 'true' and it will * return the default signal or the user defined signal for * the given "notify_name". * * INPUTS * char *notify_name - "notify_susp" or "notify_kill" * int *signal - signal id * * RESULT * int - true or false *******************************************************************************/ int shepconf_has_notify_signal(const char *notify_name, int *signal) { const char *notify_array[] = { "notify_susp", "notify_kill", NULL }; int signal_array[] = { SIGUSR1, SIGUSR2, 0 }; dstring param_name = DSTRING_INIT; char *conf_type = NULL; int conf_id; int ret = 0; /* * There are three possibilities: * a) There is a user defined signal which should be used * b) Default signal should be used * c) Notification mechanism is disabled */ sge_dstring_sprintf(¶m_name, "%s%s", notify_name, "_type"); conf_type = search_conf_val(sge_dstring_get_string(¶m_name)); sge_dstring_free(¶m_name); if (conf_type != NULL) { conf_id = atol(conf_type); } else { conf_id = 1; /* Default signal should be used */ } if (conf_id == 0) { char *conf_signal = search_conf_val(notify_name); if (conf_signal != NULL) { *signal = sge_sys_str2signal(conf_signal); ret = 1; } } else if (conf_id == 1) { int i; for (i = 0; notify_array[i] != NULL; i++) { if (!strcmp(notify_array[i], notify_name)) { break; } } *signal = signal_array[i]; ret = 1; } else { *signal = 0; ret = 0; } return ret; }
/****** Interactive/qrsh/qrsh_error() ****************************************** * NAME * qrsh_error() -- propagate qrsh startup error to shepherd and qrsh client * * SYNOPSIS * static * void qrsh_error(const char *fmt, ...) * * FUNCTION * Writes the passed error message to a special error file in the jobs * temporary directory. * Separate error files are written for jobs and tasks (started by * qrsh -inherit). * * INPUTS * const char *fmt - format string * ... - arguments to be formatted using the format string * *******************************************************************************/ static void qrsh_error(const char *fmt, ...) { char *tmpdir = NULL; char *taskid = NULL; int file; char fileName[SGE_PATH_MAX]; va_list ap; char message[MAX_STRING_SIZE]; va_start(ap, fmt); if (fmt == NULL || *fmt == '\0') { return; } vsnprintf(message, MAX_STRING_SIZE, fmt, ap); va_end(ap); if ((tmpdir = search_conf_val("qrsh_tmpdir")) == NULL) { fprintf(stderr, "%s\n", message); fprintf(stderr, MSG_CONF_NOCONFVALUE_S, "qrsh_tmpdir"); fprintf(stderr, "\n"); return; } taskid = search_conf_val("qrsh_task_id"); if (taskid != NULL) { snprintf(fileName, SGE_PATH_MAX, "%s/qrsh_error.%s", tmpdir, taskid); } else { snprintf(fileName, SGE_PATH_MAX, "%s/qrsh_error", tmpdir); } if ((file = SGE_OPEN3(fileName, O_WRONLY | O_APPEND | O_CREAT, 00744)) == -1) { fprintf(stderr, "%s\n", message); fprintf(stderr, MSG_QRSH_STARTER_CANNOTOPENFILE_SS, fileName, strerror(errno)); fprintf(stderr, "\n"); return; } if (write(file, message, strlen(message)) != strlen(message)) { dstring ds = DSTRING_INIT; fprintf(stderr, MSG_FILE_CANNOT_WRITE_SS, fileName, sge_strerror(errno, &ds)); sge_dstring_free(&ds); } close(file); }
/****** uti/sge_tmpnam/sge_tmpnam() ******************************************* * NAME * sge_tmpnam() -- Secure replacement for tmpnam() * * SYNOPSIS * char* sge_tmpnam(char *aBuffer) * * FUNCTION * Generate a string that is a unique valid filename within a given * directory. The corresponding file is created as soon as the filename * has been generated, thus avoiding any delay between filename generation * and actual file usage. The file will have read and write access for the * user only. * * The 'aBuffer' argument points to an array of at least SGE_PATH_MAX length. * 'aBuffer' will contain the generated filename upon successful completion. * In addition, 'aBuffer' will be returned. If the function fails, NULL will * be returned and 'errno' set to indicate the error. * * If the environment variable TMPDIR is defined, it's value will be used * as the path prefix for the file. If TMPDIR is not set or it does not * refer to a valid directory, the value of P_tmpdir will be used. * P_tmpdir shall be defined in <stdio.h>. If P_tmpdir is not defined or * it does not refer to a valid directory, /tmp will be used. * * NOTE: Since the file already exists, the O_EXCL flag must not be used if * the returned filename is opened for usage within an application. It is, * however, the duty of the application calling this function to delete the * file denoted by the generated filename after it is no longer needed. * * INPUTS * char *aBuffer - Array to hold filename * * RESULT * char* - Points to 'aBuffer' if successful, NULL otherwise * * NOTE * MT-NOTE: sge_tmpnam() is MT safe. ******************************************************************************/ char *sge_tmpnam(char *aBuffer, dstring *error_message) { dstring s = DSTRING_INIT; DENTER(TOP_LAYER, "sge_tmpnam"); if (aBuffer == NULL) { sge_dstring_sprintf(error_message, MSG_TMPNAM_GOT_NULL_PARAMETER); DEXIT; return NULL; } if (elect_path(&s) < 0) { sge_dstring_sprintf(error_message, MSG_TMPNAM_CANNOT_GET_TMP_PATH); sge_dstring_free(&s); DEXIT; return NULL; } if ((sge_dstring_get_string(&s))[sge_dstring_strlen(&s)-1] != '/') { sge_dstring_append_char(&s, '/'); } if (spawn_file(&s, error_message) < 0) { sge_dstring_free(&s); DEXIT; return NULL; } sge_strlcpy(aBuffer, sge_dstring_get_string(&s), SGE_PATH_MAX); sge_dstring_free(&s); DPRINTF(("sge_tmpnam: returning %s\n", aBuffer)); DEXIT; return aBuffer; }
void sge_signaler_initialize(sge_gdi_ctx_class_t *ctx) { cl_thread_settings_t* dummy_thread_p = NULL; dstring thread_name = DSTRING_INIT; DENTER(TOP_LAYER, "sge_signaler_initialize"); sge_dstring_sprintf(&thread_name, "%s%03d", threadnames[SIGNALER_THREAD], 0); cl_thread_list_setup(&(Main_Control.signal_thread_pool), "signal thread pool"); cl_thread_list_create_thread(Main_Control.signal_thread_pool, &dummy_thread_p, cl_com_get_log_list(), sge_dstring_get_string(&thread_name), 0, sge_signaler_main, NULL, NULL, CL_TT_SIGNALER); sge_dstring_free(&thread_name); DRETURN_VOID; }
void sge_event_master_initialize(sge_gdi_ctx_class_t *ctx) { cl_thread_settings_t* dummy_thread_p = NULL; dstring thread_name = DSTRING_INIT; DENTER(TOP_LAYER, "sge_event_master_initialize"); DPRINTF(("event master functionality has been initialized\n")); sge_dstring_sprintf(&thread_name, "%s%03d", threadnames[DELIVERER_THREAD], 0); cl_thread_list_setup(&(Main_Control.event_master_thread_pool), "event master thread pool"); cl_thread_list_create_thread(Main_Control.event_master_thread_pool, &dummy_thread_p, cl_com_get_log_list(), sge_dstring_get_string(&thread_name), 0, sge_event_master_main, NULL, NULL, CL_TT_DELIVERER); sge_dstring_free(&thread_name); DRETURN_VOID; }
void showError(sge_error_class_t *eh) { sge_error_iterator_class_t *iter = NULL; dstring ds = DSTRING_INIT; bool first = true; iter = eh->iterator(eh); while (iter && iter->next(iter)) { if (first) { first = true; } else { sge_dstring_append(&ds, "\n"); } sge_dstring_append(&ds, iter->get_message(iter)); } printf("%s\n", sge_dstring_get_string(&ds)); sge_dstring_free(&ds); }
static void test_dstring_performance_dynamic(int max, const char *data) { int i; struct timeval before; struct timeval after; double time; gettimeofday(&before, NULL); for (i = 0; i < max; i++) { dstring ds = DSTRING_INIT; sge_dstring_sprintf(&ds, "%s/%s", data, data); sge_dstring_free(&ds); } gettimeofday(&after, NULL); time = after.tv_usec - before.tv_usec; time = after.tv_sec - before.tv_sec + (time/1000000); printf("%d dstring creations took %.2fs\n", max, time); }
/****** sge/opt/opt_list_append_opts_from_default_files() ********************* * NAME * opt_list_append_opts_from_default_files() -- parse default files * * SYNOPSIS * void opt_list_append_opts_from_default_files( * lList **pcmdline, * lList **answer_list * char **envp) * * FUNCTION * This function reads the 3 defaults files if they exist and parses them * into an options list. * * INPUTS * lList **pcmdline - pointer to SPA_Type list, if list is NULL, it is * created if the files contain any options * lList* - answer list, AN_Type or NULL if everything ok * possible errors: * STATUS_ENOSUCHUSER - could not retrieve passwd info on me.user_name * STATUS_EDISK - home directory for user is missing or cwd * cannot be read or file could not be opened * (is just a warning) * STATUS_EEXIST - (parse_script_file), (is just a warning) * STATUS_EUNKNOWN - (parse_script_file), error opening or * reading from existing file, (is just a warning) * plus all other error stati returned by * parse_script_file, see there * char **envp - environment pointer * * NOTES * MT-NOTE: opt_list_append_opts_from_default_files() is MT safe *******************************************************************************/ void opt_list_append_opts_from_default_files(u_long32 prog_number, const char* cell_root, const char* user, lList **pcmdline, lList **answer_list, char **envp) { dstring req_file = DSTRING_INIT; char *def_files[3 + 1]; DENTER(TOP_LAYER, "opt_list_append_opts_from_default_files"); lFreeList(answer_list); /* the sge root defaults file */ get_root_file_path(&req_file, cell_root, SGE_COMMON_DEF_REQ_FILE); def_files[0] = strdup(sge_dstring_get_string(&req_file)); /* * the defaults file in the user's home directory */ get_user_home_file_path(&req_file, SGE_HOME_DEF_REQ_FILE, user, answer_list); def_files[1] = strdup(sge_dstring_get_string(&req_file)); /* * the defaults file in the current working directory */ def_files[2] = get_cwd_defaults_file_path(answer_list); def_files[3] = NULL; /* * now read all the defaults files, unaware of where they came from */ append_opts_from_default_files(prog_number, pcmdline, answer_list, envp, def_files); /* MT-NOTE !!!! */ sge_dstring_free(&req_file); DRETURN_VOID; }
void qmonAboutMsg(Widget w, XtPointer cld, XtPointer cad) { #if 0 dstring ds; char buffer[256]; const char* username = ctx->get_username(ctx); const char* qualified_hostname = ctx->get_qualified_hostname(ctx); const char* default_cell = ctx->get_default_cell(ctx); DENTER(TOP_LAYER, "qmonAboutMsg"); sge_dstring_init(&ds, buffer, sizeof(buffer)); XmtDisplayMessage(w, "about_msg", "Help", header, "About Qmon", NULL, None, XmDIALOG_MODELESS, XmDIALOG_INFORMATION, username, qualified_hostname, feature_get_product_name(FS_LONG_VERSION, &ds), default_cell, XmtLocalize(w, mailto, "mailto_msg"), SFLN_ELN); sge_dstring_free(&ds); DEXIT; #else char buffer[256]; const char* username = ctx->get_username(ctx); const char* qualified_hostname = ctx->get_qualified_hostname(ctx); const char* default_cell = ctx->get_default_cell(ctx); DENTER(TOP_LAYER, "qmonAboutMsg"); sprintf(buffer, "%s %s", GE_LONGNAME, GDI_VERSION); XmtDisplayMessage(w, "about_msg", "Help", header, "About Qmon", NULL, None, XmDIALOG_MODELESS, XmDIALOG_INFORMATION, username, qualified_hostname, buffer, default_cell, XmtLocalize(w, mailto, "mailto_msg"), SFLN_ELN); DEXIT; #endif }
char *sge_make_tmpdir(lListElem *qep, u_long32 jobid, u_long32 jataskid, uid_t uid, gid_t gid, char *tmpdir) { const char *t; DENTER(TOP_LAYER, "sge_make_tmpdir"); t = lGetString(qep, QU_tmpdir); if (t == NULL) { DRETURN(NULL); } /* Note could have multiple instantiations of same job, */ /* on same machine, under same queue */ sprintf(tmpdir, "%s/"sge_u32"."sge_u32".%s", t, jobid, jataskid, lGetString(qep, QU_qname)); DPRINTF(("making TMPDIR=%s\n", tmpdir)); sge_switch2start_user(); sge_mkdir(tmpdir, 0755, false, false); /* * chown is considered to be a security flaw, as an attacker might move the * directory between the mkdir and chown. * This is both nearly impossible here and would have no effect. * Make flawfinder ignore it */ /* Flawfinder: ignore */ if (chown(tmpdir, uid, gid) != 0) { dstring ds = DSTRING_INIT; ERROR((SGE_EVENT, MSG_FILE_NOCHOWN_SS, tmpdir, sge_strerror(errno, &ds))); sge_dstring_free(&ds); unlink(tmpdir); DRETURN(NULL); } sge_switch2admin_user(); DRETURN(tmpdir); }
/****** test_category/test_performance() *************************************** * NAME * test_performance() -- messures and outputs the time neede for n category strings * * SYNOPSIS * double test_performance(lListElem *job_elem, int max, lList* access_list) * * INPUTS * lListElem *job_elem - job object * int max - number of generated category strings * lList* access_list - access list or NULL * * RESULT * double - time needed for the run * * NOTES * MT-NOTE: test_performance() is MT safe * *******************************************************************************/ static double test_performance(lListElem *job_elem, int max, lList* access_list, const lList *project_list, const lList *rqs_list) { int i; dstring category_str = DSTRING_INIT; struct timeval before; struct timeval after; double time_new; gettimeofday(&before, NULL); for (i = 0; i < max; i++) { sge_build_job_category_dstring(&category_str, job_elem, access_list, project_list, NULL, rqs_list); sge_dstring_clear(&category_str); } gettimeofday(&after, NULL); sge_dstring_free(&category_str); time_new = after.tv_usec - before.tv_usec; time_new = after.tv_sec - before.tv_sec + (time_new/1000000); printf("tested %d category creations: new: %.2fs\n", max, time_new); return time_new; }
/****** client_check_window_change() ******************************************* * NAME * client_check_window_change() -- check if window size was change and * submit changes to pty * * SYNOPSIS * static void client_check_window_change(COMM_HANDLE *handle) * * FUNCTION * Checks if the window size of the terminal window was changed. * If the size was changed, submits the new window size to the * pty. * The actual change is detected by a signal (on Unix), this function * just checks the according flag. * * INPUTS * COMM_HANDLE *handle - pointer to the commlib handle * * RESULT * void - no result * * NOTES * MT-NOTE: client_check_window_change() is MT-safe (see comment in code) * * SEE ALSO * window_change_handler() *******************************************************************************/ static void client_check_window_change(COMM_HANDLE *handle) { struct winsize ws; char buf[200]; dstring err_msg = DSTRING_INIT; DENTER(TOP_LAYER, "client_check_window_change"); if (received_window_change_signal) { /* * here we can have a race condition between the two working threads, * but it doesn't matter - in the worst case, the new window size gets * submitted two times. */ received_window_change_signal = 0; if (ioctl(fileno(stdin), TIOCGWINSZ, &ws) >= 0) { DPRINTF(("sendig WINDOW_SIZE_CTRL_MSG with new window size: " "%d, %d, %d, %d to shepherd\n", ws.ws_row, ws.ws_col, ws.ws_xpixel, ws.ws_ypixel)); sprintf(buf, "WS %d %d %d %d", ws.ws_row, ws.ws_col, ws.ws_xpixel, ws.ws_ypixel); comm_write_message(handle, g_hostname, COMM_CLIENT, 1, (unsigned char*)buf, strlen(buf), WINDOW_SIZE_CTRL_MSG, &err_msg); } else { DPRINTF(("client_check_windows_change: ioctl() failed! " "sending dummy WINDOW_SIZE_CTRL_MSG to fullfill protocol.\n")); sprintf(buf, "WS 60 80 480 640"); comm_write_message(handle, g_hostname, COMM_CLIENT, 1, (unsigned char*)buf, strlen(buf), WINDOW_SIZE_CTRL_MSG, &err_msg); } } sge_dstring_free(&err_msg); DEXIT; }
static bool sge_bootstrap_state_setup(sge_bootstrap_state_class_t *thiz, sge_path_state_class_t *sge_paths, sge_error_class_t *eh) { #define NUM_BOOTSTRAP 14 #define REQ_BOOTSTRAP 9 dstring error_dstring = DSTRING_INIT; const char *bootstrap_file = NULL; bootstrap_entry_t name[NUM_BOOTSTRAP] = { {"admin_user", true}, {"default_domain", true}, {"ignore_fqdn", true}, {"spooling_method", true}, {"spooling_lib", true}, {"spooling_params", true}, {"binary_path", true}, {"qmaster_spool_dir", true}, {"security_mode", true}, {"job_spooling", false}, {"listener_threads", false}, {"worker_threads", false}, {"scheduler_threads", false}, {"jvm_threads", false} }; char value[NUM_BOOTSTRAP][1025]; int i; DENTER(TOP_LAYER, "sge_bootstrap_state_setup"); for (i = 0; i < NUM_BOOTSTRAP; i++) { value[i][0] = '\0'; } if (!sge_paths) { eh->error(eh, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, "sge_paths is NULL"); DEXIT; return false; } /* get filepath of bootstrap file */ bootstrap_file = sge_paths->get_bootstrap_file(sge_paths); if (bootstrap_file == NULL) { eh->error(eh, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, "%s", MSG_UTI_CANNOTRESOLVEBOOTSTRAPFILE); DEXIT; return false; } /* read bootstrapping information */ if (sge_get_confval_array(bootstrap_file, NUM_BOOTSTRAP, NUM_REQ_BOOTSTRAP, name, value, &error_dstring)) { eh->error(eh, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, "%s", sge_dstring_get_string(&error_dstring)); sge_dstring_free(&error_dstring); DEXIT; return false; } /* store bootstrapping information */ thiz->set_admin_user(thiz, value[0]); thiz->set_default_domain(thiz, value[1]); { u_long32 uval = 0; parse_ulong_val(NULL, &uval, TYPE_BOO, value[2], NULL, 0); thiz->set_ignore_fqdn(thiz, uval ? true : false); } thiz->set_spooling_method(thiz, value[3]); thiz->set_spooling_lib(thiz, value[4]); thiz->set_spooling_params(thiz, value[5]); thiz->set_binary_path(thiz, value[6]); thiz->set_qmaster_spool_dir(thiz, value[7]); thiz->set_security_mode(thiz, value[8]); if (strcmp(value[9], "")) { u_long32 uval = 0; parse_ulong_val(NULL, &uval, TYPE_BOO, value[9], NULL, 0); thiz->set_job_spooling(thiz, uval ? true : false); } else { thiz->set_job_spooling(thiz, true); } { u_long32 uval = 0; parse_ulong_val(NULL, &uval, TYPE_INT, value[10], NULL, 0); thiz->set_listener_thread_count(thiz, uval); } { u_long32 uval = 0; parse_ulong_val(NULL, &uval, TYPE_INT, value[11], NULL, 0); thiz->set_worker_thread_count(thiz, uval); } { u_long32 uval = 0; parse_ulong_val(NULL, &uval, TYPE_INT, value[12], NULL, 0); thiz->set_scheduler_thread_count(thiz, uval); } { u_long32 uval = 0; parse_ulong_val(NULL, &uval, TYPE_INT, value[13], NULL, 0); thiz->set_jvm_thread_count(thiz, uval); } #if 0 thiz->dprintf(thiz); #endif DEXIT; return true; }
/****** sge_binding_hlp/get_topology_linux() *********************************** * NAME * get_topology_linux() -- Creates the topology string for the current host. * * SYNOPSIS * bool get_topology_linux(char** topology, int* length) * * FUNCTION * Creates the topology string for the current host. When it was created * it has top be freed from outside. * * INPUTS * char** topology - The topology string for the current host. * int* length - The length of the topology string. * * RESULT * bool - when true the topology string could be generated (and memory * is allocated otherwise false * * NOTES * MT-NOTE: get_topology_linux() is MT safe * *******************************************************************************/ bool get_topology_linux(char** topology, int* length) { bool success = false; /* initialize length of topology string */ (*length) = 0; int has_topology = 0; /* check if topology is supported via PLPA */ if (plpa_have_topology_information(&has_topology) == 0 && has_topology == 1) { int num_sockets, max_socket_id; /* topology string */ dstring d_topology = DSTRING_INIT; /* build the topology string */ if (plpa_get_socket_info(&num_sockets, &max_socket_id) == 0) { int num_cores, max_core_id, ctr_cores, ctr_sockets, ctr_threads; char* s = "S"; /* socket */ char* c = "C"; /* core */ char* t = "T"; /* thread */ for (ctr_sockets = 0; ctr_sockets < num_sockets; ctr_sockets++) { int socket_id; /* internal socket id */ /* append new socket */ sge_dstring_append_char(&d_topology, *s); (*length)++; /* for each socket get the number of cores */ if (plpa_get_socket_id(ctr_sockets, &socket_id) != 0) { /* error while getting the internal socket id out of the logical */ continue; } /* get information about this socket */ if (plpa_get_core_info(socket_id, &num_cores, &max_core_id) == 0) { /* for thread counting */ int* proc_ids = NULL; int amount_of_threads = 0; /* check each core */ for (ctr_cores = 0; ctr_cores < num_cores; ctr_cores++) { sge_dstring_append_char(&d_topology, *c); (*length)++; /* check if the core has threads */ if (get_processor_ids_linux(ctr_sockets, ctr_cores, &proc_ids, &amount_of_threads) && amount_of_threads > 1) { /* print the threads */ for (ctr_threads = 0; ctr_threads < amount_of_threads; ctr_threads++) { sge_dstring_append_char(&d_topology, *t); (*length)++; } } sge_free(&proc_ids); } } } /* for each socket */ if ((*length) != 0) { /* convert d_topolgy into topology */ (*length)++; /* we need `\0` at the end */ /* copy element */ (*topology) = sge_strdup(NULL, sge_dstring_get_string(&d_topology)); success = true; } sge_dstring_free(&d_topology); } } return success; }
static spooling_field * _spool_get_fields_to_spool(lList **answer_list, const lDescr *descr, const spool_instr *instr) { spooling_field *fields; int i, j, size; int strip = 0; DENTER(TOP_LAYER, "_spool_get_fields_to_spool"); /* we don't check descr and instr, as we know they are ok * (it's a static function) */ /* count fields to spool */ for (i = 0, size = 0; mt_get_type(descr[i].mt) != lEndT; i++) { if ((descr[i].mt & instr->selection) != 0) { size++; } } /* allocate memory */ fields = (spooling_field *)malloc((size + 1) * sizeof(spooling_field)); if (fields == NULL) { answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_UNABLETOALLOCATEBYTES_DS, (size * 1) * sizeof(spooling_field), SGE_FUNC); DRETURN(NULL); } /* initialize fields */ for (i = 0; i < size; i++) { fields[i].nm = NoName; fields[i].width = 0; fields[i].name = NULL; fields[i].sub_fields = NULL; fields[i].clientdata = NULL; fields[i].read_func = NULL; fields[i].write_func = NULL; } /* do we have to strip field prefixes, e.g. "QU_" from field names? */ if (instr->copy_field_names && instr->strip_field_prefix) { dstring buffer = DSTRING_INIT; const char *prefix = object_get_name_prefix(descr, &buffer); strip = sge_strlen(prefix); sge_dstring_free(&buffer); } /* copy field info */ for (i = 0, j = 0; mt_get_type(descr[i].mt) != lEndT; i++) { if ((descr[i].mt & instr->selection) != 0) { spooling_field *sub_fields = NULL; DPRINTF(("field "SFQ" will be spooled\n", lNm2Str(descr[i].nm))); fields[j].nm = descr[i].nm; if (instr->copy_field_names) { const char *name; name = lNm2Str(descr[i].nm); if(name == NULL || strlen(name) <= strip) { answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_NONAMEFORATTRIBUTE_D, descr[i].nm); fields = spool_free_spooling_fields(fields); DEXIT; return NULL; } fields[j].name = strdup(name + strip); } if (mt_get_type(descr[i].mt) == lListT) { const lDescr *sub_descr; if (instr->sub_instr == NULL) { answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_DONTKNOWHOWTOSPOOLSUBLIST_SS, lNm2Str(descr[i].nm), SGE_FUNC); fields = spool_free_spooling_fields(fields); DEXIT; return NULL; } sub_descr = object_get_subtype(descr[i].nm); if (sub_descr == NULL) { answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_UNKNOWNOBJECTTYPEFOR_SS, lNm2Str(descr[i].nm), SGE_FUNC); fields = spool_free_spooling_fields(fields); DEXIT; return NULL; } /* recursive spooling, e.g. sharetree */ if (instr->sub_instr == instr && descr == sub_descr) { sub_fields = fields; DPRINTF(("recursive structure detected for field %s\n", lNm2Str(descr[i].nm))); } else { sub_fields = _spool_get_fields_to_spool(answer_list, sub_descr, instr->sub_instr); } } fields[j++].sub_fields = sub_fields; } } /* end of field array */ fields[j].nm = NoName; DEXIT; return fields; }
static bool check_all(dstring *sb) { bool ret = true; int i; /* sge_dstring_append */ printf("\nchecking sge_dstring_append\n"); sge_dstring_append(NULL, NULL); sge_dstring_append(sb, NULL); check_dstring(sb); sge_dstring_append(sb, "blah"); check_dstring(sb); sge_dstring_clear(sb); sge_dstring_append(sb, "too long string to fit into a static string buffer"); check_dstring(sb); sge_dstring_clear(sb); sge_dstring_append(sb, "long string that requires multiple chunks ....... "); check_dstring(sb); for (i = 0; i < 20; i++) { sge_dstring_append(sb, "long string that requires multiple chunks ....... "); } check_dstring(sb); /* sge_dstring_append_dstring */ printf("\nchecking sge_dstring_append_dstring\n"); sge_dstring_clear(sb); sge_dstring_append_dstring(NULL, NULL); { dstring second = DSTRING_INIT; sge_dstring_append(&second, "dstring"); sge_dstring_append_dstring(NULL, &second); sge_dstring_append_dstring(sb, NULL); sge_dstring_append_dstring(sb, &second); check_dstring(sb); sge_dstring_free(&second); } /* sge_dstring_append_char */ printf("\nchecking sge_dstring_append_char\n"); sge_dstring_clear(sb); sge_dstring_append_char(NULL, 'a'); sge_dstring_append_char(sb, '\0'); check_dstring(sb); sge_dstring_append_char(sb, 'a'); check_dstring(sb); sge_dstring_append_char(sb, 'b'); check_dstring(sb); /* sge_dstring_sprintf */ printf("\nchecking sge_dstring_sprintf\n"); sge_dstring_sprintf(NULL, "test %s", "string"); sge_dstring_sprintf(sb, NULL); sge_dstring_sprintf(sb, "test %s", "string"); check_dstring(sb); #if 0 /* does not build on irix */ /* sge_dstring_vsprintf */ printf("\nchecking sge_dstring_vsprintf\n"); { const char *args[] = { "string", NULL }; sge_dstring_clear(sb); sge_dstring_vsprintf(NULL, "test %s", args); sge_dstring_vsprintf(sb, NULL, args); sge_dstring_vsprintf(sb, "test %s", args); check_dstring(sb); } #endif /* sge_dstring_sprintf_append */ printf("\nchecking sge_dstring_sprintf_append\n"); sge_dstring_clear(sb); sge_dstring_sprintf_append(NULL, "test %s", "string"); sge_dstring_sprintf_append(sb, NULL); sge_dstring_sprintf_append(sb, "test %s", "string"); sge_dstring_sprintf_append(sb, " appended test %s", "string"); check_dstring(sb); /* sge_dstring_clear */ printf("\nchecking sge_dstring_clear\n"); sge_dstring_clear(NULL); sge_dstring_clear(sb); check_dstring(sb); /* sge_dstring_free */ printf("\nchecking sge_dstring_free\n"); sge_dstring_free(NULL); sge_dstring_free(sb); check_dstring(sb); /* sge_dstring_get_string */ printf("\nchecking sge_dstring_get_string\n"); sge_dstring_clear(sb); sge_dstring_append(sb, "test string"); { const char *result; result = sge_dstring_get_string(NULL); printf("sge_dstring_get_string(NULL) = %s\n", result == NULL ? "NULL" : result); result = sge_dstring_get_string(sb); printf("sge_dstring_get_string(sb) = %s\n", result == NULL ? "NULL" : result); } /* sge_dstring_copy_string */ printf("\nchecking sge_dstring_copy_string\n"); sge_dstring_copy_string(NULL, NULL); sge_dstring_copy_string(sb, NULL); sge_dstring_copy_string(NULL, "new test string"); sge_dstring_copy_string(sb, "new test string"); check_dstring(sb); /* sge_dstring_copy_dstring * check only NULL pointer behaviour, it just calls sge_dstring_copy_string */ printf("\nchecking sge_dstring_copy_dstring\n"); sge_dstring_copy_dstring(NULL, NULL); sge_dstring_copy_dstring(sb, NULL); check_dstring(sb); /* sge_dstring_strlen */ printf("\nchecking sge_dstring_strlen\n"); { int len; sge_dstring_copy_string(sb, "test string"); len = sge_dstring_strlen(NULL); printf("sge_dstring_strlen(NULL) = %d\n", len); len = sge_dstring_strlen(sb); printf("sge_dstring_strlen(sb) = %d\n", len); } /* sge_dstring_remaining */ printf("\nchecking sge_dstring_remaining\n"); { int len; sge_dstring_copy_string(sb, "test string"); len = sge_dstring_remaining(NULL); printf("sge_dstring_remaining(NULL) = %d\n", len); len = sge_dstring_remaining(sb); printf("sge_dstring_remaining(sb) = %d\n", len); } return ret; }
int main(int argc, char **argv) { int ret = 0; lList *pcmdline = NULL; lList *answer_list = NULL; sge_gdi_ctx_class_t *ctx = NULL; qrstat_env_t qrstat_env; DENTER_MAIN(TOP_LAYER, "qrsub"); /* Set up the program information name */ sge_setup_sig_handlers(QRSTAT); log_state_set_log_gui(1); if (sge_gdi2_setup(&ctx, QRSTAT, MAIN_THREAD, &answer_list) != AE_OK) { answer_list_output(&answer_list); goto error_exit; } qrstat_filter_init(&qrstat_env); qrstat_filter_set_ctx(&qrstat_env, ctx); /* * stage 1: commandline parsing */ { dstring file = DSTRING_INIT; const char *user = ctx->get_username(ctx); const char *cell_root = ctx->get_cell_root(ctx); /* arguments from SGE_ROOT/common/sge_qrstat file */ get_root_file_path(&file, cell_root, SGE_COMMON_DEF_QRSTAT_FILE); if (sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list) == true) { /* arguments from $HOME/.sge_qrstat file */ if (get_user_home_file_path(&file, SGE_HOME_DEF_QRSTAT_FILE, user, &answer_list)) { sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list); } } sge_dstring_free(&file); if (answer_list) { answer_list_output(&answer_list); lFreeList(&pcmdline); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); } } answer_list = cull_parse_cmdline(QRSTAT, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS); if (answer_list != NULL) { answer_list_output(&answer_list); lFreeList(&pcmdline); goto error_exit; } /* * stage 2: evalutate switches and modify qrstat_env */ if (!sge_parse_qrstat(ctx, &answer_list, &qrstat_env, &pcmdline)) { answer_list_output(&answer_list); lFreeList(&pcmdline); goto error_exit; } /* * stage 3: fetch data from master */ { answer_list = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_GET, &qrstat_env.ar_list, qrstat_env.where_AR_Type, qrstat_env.what_AR_Type, false); if (answer_list_has_error(&answer_list)) { answer_list_output(&answer_list); goto error_exit; } } /* * stage 4: create output in correct format */ { qrstat_report_handler_t *handler = NULL; if (qrstat_env.is_xml) { handler = qrstat_create_report_handler_xml(&qrstat_env, &answer_list); } else { handler = qrstat_create_report_handler_stdout(&qrstat_env, &answer_list); } if (!qrstat_print(&answer_list, handler, &qrstat_env)) { ret = 1; } if (qrstat_env.is_xml) { qrstat_destroy_report_handler_xml(&handler, &answer_list); } else { qrstat_destroy_report_handler_stdout(&handler, &answer_list); } } sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); DRETURN(ret); error_exit: sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); DRETURN(1); }
/****** qmaster/threads/sge_scheduler_main() ********************************** * NAME * sge_scheduler_main() -- main function of the scheduler thread * * SYNOPSIS * void * sge_scheduler_main(void *arg) * * FUNCTION * Main function of the scheduler thread, * * INPUTS * void *arg - pointer to the thread function (type cl_thread_settings_t*) * * RESULT * void * - always NULL * * NOTES * MT-NOTE: sge_scheduler_main() is MT safe * * MT-NOTE: this is a thread function. Do NOT use this function * MT-NOTE: in any other way! * * SEE ALSO * qmaster/threads/sge_scheduler_initialize() * qmaster/threads/sge_scheduler_cleanup_thread() * qmaster/threads/sge_scheduler_terminate() * qmaster/threads/sge_scheduler_main() *******************************************************************************/ void * sge_scheduler_main(void *arg) { time_t next_prof_output = 0; monitoring_t monitor; sge_gdi_ctx_class_t *ctx = NULL; sge_evc_class_t *evc = NULL; lList *alp = NULL; sge_where_what_t where_what; cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg; bool do_shutdown = false; bool do_endlessly = true; bool local_ret = true; DENTER(TOP_LAYER, "sge_scheduler_main"); memset(&where_what, 0, sizeof(where_what)); /* * startup */ if (local_ret) { /* initialize commlib thread */ cl_thread_func_startup(thread_config); /* initialize monitoring */ sge_monitor_init(&monitor, thread_config->thread_name, SCH_EXT, SCT_WARNING, SCT_ERROR); sge_qmaster_thread_init(&ctx, SCHEDD, SCHEDD_THREAD, true); /* register at profiling module */ set_thread_name(pthread_self(), "Scheduler Thread"); conf_update_thread_profiling("Scheduler Thread"); DPRINTF((SFN" started\n", thread_config->thread_name)); /* initialize schedd_runnlog logging */ schedd_set_schedd_log_file(ctx); } /* set profiling parameters */ prof_set_level_name(SGE_PROF_EVENTMASTER, NULL, NULL); prof_set_level_name(SGE_PROF_SPOOLING, NULL, NULL); prof_set_level_name(SGE_PROF_CUSTOM0, "scheduler", NULL); prof_set_level_name(SGE_PROF_CUSTOM1, "pending ticket calculation", NULL); prof_set_level_name(SGE_PROF_CUSTOM3, "job sorting", NULL); prof_set_level_name(SGE_PROF_CUSTOM4, "job dispatching", NULL); prof_set_level_name(SGE_PROF_CUSTOM5, "send orders", NULL); prof_set_level_name(SGE_PROF_CUSTOM6, "scheduler event loop", NULL); prof_set_level_name(SGE_PROF_CUSTOM7, "copy lists", NULL); prof_set_level_name(SGE_PROF_SCHEDLIB4, NULL, NULL); /* set-up needed for 'schedule' file */ serf_init(schedd_serf_record_func, schedd_serf_newline); schedd_set_serf_log_file(ctx); /* * prepare event client/mirror mechanism */ if (local_ret) { local_ret = sge_gdi2_evc_setup(&evc, ctx, EV_ID_SCHEDD, &alp, "scheduler"); DPRINTF(("prepared event client/mirror mechanism\n")); } /* * register as event mirror */ if (local_ret) { sge_mirror_initialize(evc, EV_ID_SCHEDD, "scheduler", false, &event_update_func, &sge_mod_event_client, &sge_add_event_client, &sge_remove_event_client, &sge_handle_event_ack); evc->ec_register(evc, false, NULL, &monitor); evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_RELEASED); DPRINTF(("registered at event mirror\n")); } /* * subscribe necessary data */ if (local_ret) { ensure_valid_what_and_where(&where_what); subscribe_scheduler(evc, &where_what); DPRINTF(("subscribed necessary data from event master\n")); } /* * schedulers main loop */ if (local_ret) { while (do_endlessly) { bool handled_events = false; lList *event_list = NULL; int execute = 0; double prof_copy = 0.0; double prof_total = 0.0; double prof_init = 0.0; double prof_free = 0.0; double prof_run = 0.0; lList *orders = NULL; if (sconf_get_profiling()) { prof_start(SGE_PROF_OTHER, NULL); prof_start(SGE_PROF_PACKING, NULL); prof_start(SGE_PROF_EVENTCLIENT, NULL); prof_start(SGE_PROF_MIRROR, NULL); prof_start(SGE_PROF_GDI, NULL); prof_start(SGE_PROF_HT_RESIZE, NULL); prof_start(SGE_PROF_CUSTOM0, NULL); prof_start(SGE_PROF_CUSTOM1, NULL); prof_start(SGE_PROF_CUSTOM3, NULL); prof_start(SGE_PROF_CUSTOM4, NULL); prof_start(SGE_PROF_CUSTOM5, NULL); prof_start(SGE_PROF_CUSTOM6, NULL); prof_start(SGE_PROF_CUSTOM7, NULL); prof_start(SGE_PROF_SCHEDLIB4, NULL); } else { prof_stop(SGE_PROF_OTHER, NULL); prof_stop(SGE_PROF_PACKING, NULL); prof_stop(SGE_PROF_EVENTCLIENT, NULL); prof_stop(SGE_PROF_MIRROR, NULL); prof_stop(SGE_PROF_GDI, NULL); prof_stop(SGE_PROF_HT_RESIZE, NULL); prof_stop(SGE_PROF_CUSTOM0, NULL); prof_stop(SGE_PROF_CUSTOM1, NULL); prof_stop(SGE_PROF_CUSTOM3, NULL); prof_stop(SGE_PROF_CUSTOM4, NULL); prof_stop(SGE_PROF_CUSTOM5, NULL); prof_stop(SGE_PROF_CUSTOM6, NULL); prof_stop(SGE_PROF_CUSTOM7, NULL); prof_stop(SGE_PROF_SCHEDLIB4, NULL); } /* * Wait for new events */ MONITOR_IDLE_TIME(sge_scheduler_wait_for_event(evc, &event_list), (&monitor), mconf_get_monitor_time(), mconf_is_monitor_message()); /* If we lost connection we have to register again */ if (evc->ec_need_new_registration(evc)) { lFreeList(&event_list); if (evc->ec_register(evc, false, NULL, &monitor) == true) { DPRINTF(("re-registered at event master!\n")); } } if (event_list != NULL) { /* check for shutdown */ do_shutdown = (lGetElemUlong(event_list, ET_type, sgeE_SHUTDOWN) != NULL) ? true : false; /* update mirror and free data */ if (do_shutdown == false && sge_mirror_process_event_list(evc, event_list) == SGE_EM_OK) { handled_events = true; DPRINTF(("events handled\n")); } else { DPRINTF(("events contain shutdown event - ignoring events\n")); } lFreeList(&event_list); } /* if we actually got events, start the scheduling run and further event processing */ if (handled_events == true) { lList *answer_list = NULL; scheduler_all_data_t copy; lList *master_cqueue_list = *(object_type_get_master_list(SGE_TYPE_CQUEUE)); lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB); lList *master_userset_list = *object_type_get_master_list(SGE_TYPE_USERSET); lList *master_project_list = *object_type_get_master_list(SGE_TYPE_PROJECT); lList *master_exechost_list= *object_type_get_master_list(SGE_TYPE_EXECHOST); lList *master_rqs_list= *object_type_get_master_list(SGE_TYPE_RQS); lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY); lList *master_ckpt_list = *object_type_get_master_list(SGE_TYPE_CKPT); lList *master_user_list = *object_type_get_master_list(SGE_TYPE_USER); lList *master_ar_list = *object_type_get_master_list(SGE_TYPE_AR); lList *master_pe_list = *object_type_get_master_list(SGE_TYPE_PE); lList *master_hgrp_list = *object_type_get_master_list(SGE_TYPE_HGROUP); lList *master_sharetree_list = *object_type_get_master_list(SGE_TYPE_SHARETREE); /* delay scheduling for test purposes, see issue GE-3306 */ if (SGE_TEST_DELAY_SCHEDULING > 0) { sleep(SGE_TEST_DELAY_SCHEDULING); } PROF_START_MEASUREMENT(SGE_PROF_CUSTOM6); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); if (__CONDITION(INFOPRINT)) { dstring ds; char buffer[128]; sge_dstring_init(&ds, buffer, sizeof(buffer)); DPRINTF(("================[SCHEDULING-EPOCH %s]==================\n", sge_at_time(0, &ds))); sge_dstring_free(&ds); } /* * If there were new events then * copy/filter data necessary for the scheduler run * and run the scheduler method */ memset(©, 0, sizeof(copy)); copy.dept_list = lSelect("", master_userset_list, where_what.where_dept, where_what.what_acldept); copy.acl_list = lSelect("", master_userset_list, where_what.where_acl, where_what.what_acldept); DPRINTF(("RAW CQ:%d, J:%d, H:%d, C:%d, A:%d, D:%d, P:%d, CKPT:%d," " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d\n", lGetNumberOfElem(master_cqueue_list), lGetNumberOfElem(master_job_list), lGetNumberOfElem(master_exechost_list), lGetNumberOfElem(master_centry_list), lGetNumberOfElem(copy.acl_list), lGetNumberOfElem(copy.dept_list), lGetNumberOfElem(master_project_list), lGetNumberOfElem(master_ckpt_list), lGetNumberOfElem(master_user_list), lGetNumberOfElem(master_project_list), lGetNumberOfElem(master_rqs_list), lGetNumberOfElem(master_ar_list), lGetNumberOfNodes(NULL, master_sharetree_list, STN_children), lGetNumberOfLeafs(NULL, master_sharetree_list, STN_children) )); sge_rebuild_job_category(master_job_list, master_userset_list, master_project_list, master_rqs_list); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_init = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); sge_before_dispatch(evc); /* prepare data for the scheduler itself */ copy.host_list = lCopyList("", master_exechost_list); /* * Within the scheduler we do only need QIs */ { lListElem *cqueue = NULL; lEnumeration *what_queue3 = NULL; for_each(cqueue, master_cqueue_list) { lList *qinstance_list = lGetList(cqueue, CQ_qinstances); lList *t; if (!qinstance_list) { continue; } /* all_queue_list contains all queue instances with state and full queue name only */ if (!what_queue3) { what_queue3 = lWhat("%T(%I%I)", lGetListDescr(qinstance_list), QU_full_name, QU_state); } t = lSelect("t", qinstance_list, NULL, what_queue3); if (t) { if (copy.all_queue_list == NULL) { copy.all_queue_list = lCreateList("all", lGetListDescr(t)); } lAppendList(copy.all_queue_list, t); lFreeList (&t); } t = lSelect("t", qinstance_list, where_what.where_queue, where_what.what_queue2); if (t) { if (copy.queue_list == NULL) { copy.queue_list = lCreateList("enabled", lGetListDescr(t)); } lAppendList(copy.queue_list, t); lFreeList (&t); } t = lSelect("t", qinstance_list, where_what.where_queue2, where_what.what_queue2); if (t) { if (copy.dis_queue_list == NULL) { copy.dis_queue_list = lCreateList("disabled", lGetListDescr(t)); } lAppendList(copy.dis_queue_list, t); lFreeList (&t); } } if (what_queue3) { lFreeWhat(&what_queue3); } } if (sconf_is_job_category_filtering()) { copy.job_list = sge_category_job_copy(copy.queue_list, &orders, evc->monitor_next_run); } else { copy.job_list = lCopyList("", master_job_list); } /* no need to copy these lists, they are read only used */ copy.centry_list = master_centry_list; copy.ckpt_list = master_ckpt_list; copy.hgrp_list = master_hgrp_list; /* these lists need to be copied because they are modified during scheduling run */ copy.share_tree = lCopyList("", master_sharetree_list); copy.pe_list = lCopyList("", master_pe_list); copy.user_list = lCopyList("", master_user_list); copy.project_list = lCopyList("", master_project_list); copy.rqs_list = lCopyList("", master_rqs_list); copy.ar_list = lCopyList("", master_ar_list); /* report number of reduced and raw (in brackets) lists */ DPRINTF(("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, P:%d, CKPT:%d," " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n", lGetNumberOfElem(copy.queue_list), lGetNumberOfElem(copy.all_queue_list), lGetNumberOfElem(copy.job_list), lGetNumberOfElem(master_job_list), lGetNumberOfElem(copy.host_list), lGetNumberOfElem(master_exechost_list), lGetNumberOfElem(copy.centry_list), lGetNumberOfElem(copy.acl_list), lGetNumberOfElem(copy.dept_list), lGetNumberOfElem(copy.pe_list), lGetNumberOfElem(copy.ckpt_list), lGetNumberOfElem(copy.user_list), lGetNumberOfElem(copy.project_list), lGetNumberOfElem(copy.rqs_list), lGetNumberOfElem(copy.ar_list), lGetNumberOfNodes(NULL, copy.share_tree, STN_children), lGetNumberOfLeafs(NULL, copy.share_tree, STN_children) )); if (getenv("SGE_ND")) { printf("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, " "P:%d, CKPT:%d, US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n", lGetNumberOfElem(copy.queue_list), lGetNumberOfElem(copy.all_queue_list), lGetNumberOfElem(copy.job_list), lGetNumberOfElem(master_job_list), lGetNumberOfElem(copy.host_list), lGetNumberOfElem(master_exechost_list), lGetNumberOfElem(copy.centry_list), lGetNumberOfElem(copy.acl_list), lGetNumberOfElem(copy.dept_list), lGetNumberOfElem(copy.pe_list), lGetNumberOfElem(copy.ckpt_list), lGetNumberOfElem(copy.user_list), lGetNumberOfElem(copy.project_list), lGetNumberOfElem(copy.rqs_list), lGetNumberOfElem(copy.ar_list), lGetNumberOfNodes(NULL, copy.share_tree, STN_children), lGetNumberOfLeafs(NULL, copy.share_tree, STN_children) ); } else { schedd_log("-------------START-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run); } PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_copy = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); scheduler_method(evc, &answer_list, ©, &orders); answer_list_output(&answer_list); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_run = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); /* .. which gets deleted after using */ lFreeList(&(copy.host_list)); lFreeList(&(copy.queue_list)); lFreeList(&(copy.dis_queue_list)); lFreeList(&(copy.all_queue_list)); lFreeList(&(copy.job_list)); lFreeList(&(copy.acl_list)); lFreeList(&(copy.dept_list)); lFreeList(&(copy.pe_list)); lFreeList(&(copy.share_tree)); lFreeList(&(copy.user_list)); lFreeList(&(copy.project_list)); lFreeList(&(copy.rqs_list)); lFreeList(&(copy.ar_list)); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_free = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); /* * need to sync with event master thread * if schedd configuration changed then settings in evm can be adjusted */ if (sconf_is_new_config()) { /* set scheduler interval / event delivery interval */ u_long32 interval = sconf_get_schedule_interval(); if (evc->ec_get_edtime(evc) != interval) { evc->ec_set_edtime(evc, interval); } /* set job / ja_task event flushing */ set_job_flushing(evc); /* no need to ec_commit here - we do it when resetting the busy state */ /* now we handled the new schedd config - no need to do it twice */ sconf_reset_new_config(); } /* block till master handled all GDI orders */ sge_schedd_block_until_orders_processed(evc->get_gdi_ctx(evc), NULL); schedd_order_destroy(); /* * Stop profiling for "schedd run total" and the subcategories */ PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM6); prof_total = prof_get_measurement_wallclock(SGE_PROF_CUSTOM6, true, NULL); if (prof_is_active(SGE_PROF_CUSTOM6)) { PROFILING((SGE_EVENT, "PROF: schedd run took: %.3f s (init: %.3f s, copy: %.3f s, " "run:%.3f, free: %.3f s, jobs: %d, categories: %d/%d)", prof_total, prof_init, prof_copy, prof_run, prof_free, lGetNumberOfElem(*object_type_get_master_list(SGE_TYPE_JOB)), sge_category_count(), sge_cs_category_count() )); } if (getenv("SGE_ND") != NULL) { printf("--------------STOP-SCHEDULER-RUN-------------\n"); } else { schedd_log("--------------STOP-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run); } thread_output_profiling("scheduler thread profiling summary:\n", &next_prof_output); sge_monitor_output(&monitor); } /* reset the busy state */ evc->ec_set_busy(evc, 0); evc->ec_commit(evc, NULL); /* stop logging into schedd_runlog (enabled via -tsm) */ evc->monitor_next_run = false; /* * pthread cancelation point * * sge_scheduler_cleanup_thread() is the last function which should * be called so it is pushed first */ pthread_cleanup_push(sge_scheduler_cleanup_thread, (void *) &ctx); pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_monitor, (void *)&monitor); pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_event_client, (void *)evc); cl_thread_func_testcancel(thread_config); pthread_cleanup_pop(execute); pthread_cleanup_pop(execute); pthread_cleanup_pop(execute); DPRINTF(("passed cancelation point\n")); }
/****** qmaster/threads/sge_scheduler_initialize() *************************** * NAME * sge_scheduler_initialize() -- setup and start the scheduler thread * * SYNOPSIS * void sge_scheduler_initialize(sge_gdi_ctx_class_t *ctx) * * FUNCTION * A call to this function initializes the scheduler thread if it is * not already running. * * The first call to this function (during qmaster qstart) starts * the scheduler thread only if it is enabled in the bootstrap file. * Otherwise the scheduler will not be started. * * Each subsequent call (triggered from GDI) will definitely start * the scheduler thread if it is not running. * * Main routine for the created thread is sge_scheduler_main(). * * 'Master_Scheduler' is accessed by this function. * * INPUTS * sge_gdi_ctx_class_t *ctx - context object * lList **answer_list - answer list * * RESULT * void - None * * NOTES * MT-NOTE: sge_scheduler_initialize() is MT safe * * SEE ALSO * qmaster/threads/sge_scheduler_initialize() * qmaster/threads/sge_scheduler_cleanup_thread() * qmaster/threads/sge_scheduler_terminate() * qmaster/threads/sge_scheduler_main() *******************************************************************************/ void sge_scheduler_initialize(sge_gdi_ctx_class_t *ctx, lList **answer_list) { DENTER(TOP_LAYER, "sge_scheduler_initialize"); /* initialize debugging instrumentation */ { char *debug = getenv("SGE_TEST_DELAY_SCHEDULING"); if (debug != NULL) { SGE_TEST_DELAY_SCHEDULING = atoi(debug); } } sge_mutex_lock("master scheduler struct", SGE_FUNC, __LINE__, &(Master_Scheduler.mutex)); if (Master_Scheduler.is_running == false) { bool start_thread = true; /* * when this function is called the first time we will use the setting from * the bootstrap file to identify if the scheduler should be started or not * otherwise we have to start the thread due to a manual request through GDI. * There is no option. We have to start it. */ if (Master_Scheduler.use_bootstrap == true) { start_thread = ((ctx->get_scheduler_thread_count(ctx) > 0) ? true : false); Master_Scheduler.use_bootstrap = false; } if (start_thread == true) { cl_thread_settings_t* dummy_thread_p = NULL; dstring thread_name = DSTRING_INIT; /* * initialize the thread pool */ cl_thread_list_setup(&(Main_Control.scheduler_thread_pool), "thread pool"); /* * prepare a unique scheduler thread name for each instance of an scheduler */ sge_dstring_sprintf(&thread_name, "%s%03d", threadnames[SCHEDD_THREAD], Master_Scheduler.thread_id); /* * start the scheduler */ cl_thread_list_create_thread(Main_Control.scheduler_thread_pool, &dummy_thread_p, cl_com_get_log_list(), sge_dstring_get_string(&thread_name), Master_Scheduler.thread_id, sge_scheduler_main, NULL, NULL, CL_TT_SCHEDULER); sge_dstring_free(&thread_name); /* * Increase the thread id so that the next instance of a scheduler will have a * different name and flag that scheduler is running */ Master_Scheduler.thread_id++; Master_Scheduler.is_running = true; INFO((SGE_EVENT, MSG_THREAD_XHASSTARTED_S, threadnames[SCHEDD_THREAD])); answer_list_add(answer_list, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_INFO); } else { INFO((SGE_EVENT, MSG_THREAD_XSTARTDISABLED_S, threadnames[SCHEDD_THREAD])); answer_list_add(answer_list, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_INFO); } } else { ERROR((SGE_EVENT, MSG_THREAD_XISRUNNING_S, threadnames[SCHEDD_THREAD])); answer_list_add(answer_list, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); } sge_mutex_unlock("master scheduler struct", SGE_FUNC, __LINE__, &(Master_Scheduler.mutex)); DRETURN_VOID; }
int main(int argc, char **argv) { lList *opts_cmdline = NULL; lList *opts_defaults = NULL; lList *opts_scriptfile = NULL; lList *opts_all = NULL; lListElem *job = NULL; lList *alp = NULL; lListElem *ep; int exit_status = 0; int just_verify; int tmp_ret; int wait_for_job = 0, is_immediate = 0; dstring session_key_out = DSTRING_INIT; dstring diag = DSTRING_INIT; dstring jobid = DSTRING_INIT; u_long32 start, end, step; u_long32 num_tasks; int count, stat; char *jobid_string = NULL; bool has_terse; drmaa_attr_values_t *jobids = NULL; u_long32 prog_number = 0; u_long32 myuid = 0; const char *sge_root = NULL; const char *cell_root = NULL; const char *username = NULL; const char *qualified_hostname = NULL; const char *unqualified_hostname = NULL; const char *mastername = NULL; DENTER_MAIN(TOP_LAYER, "qsub"); prof_mt_init(); /* Set up the program information name */ sge_setup_sig_handlers(QSUB); DPRINTF(("Initializing JAPI\n")); if (japi_init(NULL, NULL, NULL, QSUB, false, NULL, &diag) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); DEXIT; SGE_EXIT((void**)&ctx, 1); } prog_number = ctx->get_who(ctx); myuid = ctx->get_uid(ctx); sge_root = ctx->get_sge_root(ctx); cell_root = ctx->get_cell_root(ctx); username = ctx->get_username(ctx); qualified_hostname = ctx->get_qualified_hostname(ctx); unqualified_hostname = ctx->get_unqualified_hostname(ctx); mastername = ctx->get_master(ctx, false); /* * read switches from the various defaults files */ opt_list_append_opts_from_default_files(prog_number, cell_root, username, &opts_defaults, &alp, environ); tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } /* * append the commandline switches to the list */ opt_list_append_opts_from_qsub_cmdline(prog_number, &opts_cmdline, &alp, argv + 1, environ); tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_QSUB_WARNING_S); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } /* * show usage if -help was in commandline */ if (opt_list_has_X(opts_cmdline, "-help")) { sge_usage(QSUB, stdout); DEXIT; SGE_EXIT((void**)&ctx, 0); } /* * We will only read commandline options from scripfile if the script * itself should not be handled as binary */ if (opt_list_is_X_true(opts_cmdline, "-b") || (!opt_list_has_X(opts_cmdline, "-b") && opt_list_is_X_true(opts_defaults, "-b"))) { DPRINTF(("Skipping options from script due to -b option\n")); } else { opt_list_append_opts_from_script(prog_number, &opts_scriptfile, &alp, opts_cmdline, environ); tmp_ret = answer_list_print_err_warn(&alp, NULL, MSG_QSUB_COULDNOTREADSCRIPT_S, MSG_WARNING); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } } /* * Merge all commandline options and interprete them */ opt_list_merge_command_lines(&opts_all, &opts_defaults, &opts_scriptfile, &opts_cmdline); /* * Check if -terse is requested */ has_terse = opt_list_has_X(opts_all, "-terse"); /* If "-sync y" is set, wait for the job to end. */ /* Remove all -sync switches since cull_parse_job_parameter() * doesn't know what to do with them. */ while ((ep = lGetElemStr(opts_all, SPA_switch, "-sync"))) { if (lGetInt(ep, SPA_argval_lIntT) == TRUE) { wait_for_job = 1; } lRemoveElem(opts_all, &ep); } if (wait_for_job) { DPRINTF(("Wait for job end\n")); } alp = cull_parse_job_parameter(myuid, username, cell_root, unqualified_hostname, qualified_hostname, opts_all, &job); tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_WARNING); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } if (set_sec_cred(sge_root, mastername, job, &alp) != 0) { answer_list_output(&alp); DEXIT; SGE_EXIT((void**)&ctx, 1); } /* Check if job is immediate */ is_immediate = (int)JOB_TYPE_IS_IMMEDIATE(lGetUlong(job, JB_type)); DPRINTF(("Job is%s immediate\n", is_immediate ? "" : " not")); DPRINTF(("Everything ok\n")); if (lGetUlong(job, JB_verify)) { cull_show_job(job, 0, false); DEXIT; SGE_EXIT((void**)&ctx, 0); } if (is_immediate || wait_for_job) { pthread_t sigt; qsub_setup_sig_handlers(); if (pthread_create(&sigt, NULL, sig_thread, (void *)NULL) != 0) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S, " error preparing signal handling thread"); fprintf(stderr, "\n"); exit_status = 1; goto Error; } if (japi_enable_job_wait(username, unqualified_hostname, NULL, &session_key_out, error_handler, &diag) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { const char *msg = sge_dstring_get_string(&diag); fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S, msg?msg:" error starting event client thread"); fprintf(stderr, "\n"); exit_status = 1; goto Error; } } job_get_submit_task_ids(job, &start, &end, &step); num_tasks = (end - start) / step + 1; if (num_tasks > 1) { int error = japi_run_bulk_jobs(&jobids, &job, start, end, step, &diag); if (error != DRMAA_ERRNO_SUCCESS) { /* No active session here means that japi_enable_job_wait() was * interrupted by the signal handler, in which case we just break out * quietly. */ if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) { fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } /* BUGFIX: Issuezilla #1013 * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code * back into a GDI error code. This is the easy solution. The * correct solution would be to address issue #859, presumably by * having JAPI reuse the GDI error codes instead of the JAPI error * codes. */ if (error == DRMAA_ERRNO_TRY_LATER) { exit_status = STATUS_NOTOK_DOAGAIN; } else { exit_status = 1; } goto Error; } DPRINTF(("job id is: %ld\n", jobids->it.ji.jobid)); jobid_string = get_bulk_jobid_string((long)jobids->it.ji.jobid, start, end, step); } else if (num_tasks == 1) { int error = japi_run_job(&jobid, &job, &diag); if (error != DRMAA_ERRNO_SUCCESS) { if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) { fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } /* BUGFIX: Issuezilla #1013 * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code * back into a GDI error code. This is the easy solution. The * correct solution would be to address issue #859, presumably by * having JAPI reuse the GDI error codes instead of the DRMAA error * codes. */ if (error == DRMAA_ERRNO_TRY_LATER) { exit_status = STATUS_NOTOK_DOAGAIN; } else { exit_status = 1; } goto Error; } jobid_string = strdup(sge_dstring_get_string(&jobid)); DPRINTF(("job id is: %s\n", jobid_string)); sge_dstring_free(&jobid); } else { fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, "invalid task structure"); fprintf(stderr, "\n"); exit_status = 1; goto Error; } /* only success message is printed to stdout */ just_verify = (lGetUlong(job, JB_verify_suitable_queues)==JUST_VERIFY || lGetUlong(job, JB_verify_suitable_queues)==POKE_VERIFY); DPRINTF(("Just verifying job\n")); if (!just_verify) { const char *output = sge_dstring_get_string(&diag); /* print the tersed output */ if (has_terse) { printf("%s", jobid_string); } else if (output != NULL) { printf("%s", output); } else { printf(MSG_QSUB_YOURJOBHASBEENSUBMITTED_SS, jobid_string, lGetString(job, JB_job_name)); } printf("\n"); } else { printf(MSG_JOB_VERIFYFOUNDQ); printf("\n"); } if ((wait_for_job || is_immediate) && !just_verify) { int event; if (is_immediate) { fprintf(stderr, "%s\n", MSG_QSUB_WAITINGFORIMMEDIATEJOBTOBESCHEDULED); /* We only need to wait for the first task to be scheduled to be able * to say that the job is running. */ tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_START, &event, NULL, &diag); if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_START)) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_YOURIMMEDIATEJOBXHASBEENSUCCESSFULLYSCHEDULED_S, jobid_string); fprintf(stderr, "\n"); } /* A job finish event here means that the job was rejected. */ else if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_FINISH)) { fprintf(stderr, "\n%s\n", MSG_QSUB_YOURQSUBREQUESTCOULDNOTBESCHEDULEDDTRYLATER); exit_status = 1; goto Error; } else { /* Since we told japi_wait to wait forever, we know that if it gets * a timeout, it's because it's been interrupted to exit, in which * case we don't complain. Same for no active session. */ if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) && (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } exit_status = 1; goto Error; } } if (wait_for_job) { /* Rather than using japi_synchronize on ALL for bulk jobs, we use * japi_wait on ANY num_tasks times because with synchronize, we would * have to wait for all the tasks to finish before we know if any * finished. */ for (count = 0; count < num_tasks; count++) { /* Since there's only one running job in the session, we can just * wait for ANY. */ if ((tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_FINISH, &event, NULL, &diag)) != DRMAA_ERRNO_SUCCESS) { if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) && (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } exit_status = 1; goto Error; } /* report how job finished */ /* If the job is an array job, use the first non-zero exit code as * the exit code for qsub. */ if (exit_status == 0) { exit_status = report_exit_status(stat, sge_dstring_get_string(&jobid)); } /* If we've already found a non-zero exit code, just print the exit * info for the task. */ else { report_exit_status(stat, sge_dstring_get_string(&jobid)); } } } } Error: FREE(jobid_string); lFreeList(&alp); lFreeList(&opts_all); if ((tmp_ret = japi_exit(JAPI_EXIT_NO_FLAG, &diag)) != DRMAA_ERRNO_SUCCESS) { if (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTFINALIZEENV_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } else { struct timespec ts; /* We know that if we get a DRMAA_ERRNO_NO_ACTIVE_SESSION here, it's * because the signal handler thread called japi_exit(). We know this * because if the call to japi_init() fails, we just exit directly. * If the call to japi_init() succeeds, then we have an active session, * so coming here because of an error would not result in the * DRMAA_ERRNO_NO_ACTIVE_SESSION error. */ DPRINTF(("Sleeping for 15 seconds to wait for the exit to finish.\n")); sge_relative_timespec(15, &ts); sge_mutex_lock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex); while (!exited) { if (pthread_cond_timedwait(&exit_cv, &exit_mutex, &ts) == ETIMEDOUT) { DPRINTF(("Exit has not finished after 15 seconds. Exiting.\n")); break; } } sge_mutex_unlock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex); } } sge_prof_cleanup(); /* This is an exit() instead of an SGE_EXIT() because when the qmaster is * supended, SGE_EXIT() hangs. */ exit(exit_status); DEXIT; return exit_status; }
int main(int argc, char **argv) { lList *pcmdline = NULL; lList *alp = NULL; sge_gdi_ctx_class_t *ctx = NULL; lList *ar_lp = NULL; lListElem *ar = NULL; DENTER_MAIN(TOP_LAYER, "qrsub"); /* Set up the program information name */ sge_setup_sig_handlers(QRSUB); log_state_set_log_gui(1); if (sge_gdi2_setup(&ctx, QRSUB, MAIN_THREAD, &alp) != AE_OK) { answer_list_output(&alp); goto error_exit; } /* ** stage 1 of commandline parsing */ { dstring file = DSTRING_INIT; const char *user = ctx->get_username(ctx); const char *cell_root = ctx->get_cell_root(ctx); /* arguments from SGE_ROOT/common/sge_ar_request file */ get_root_file_path(&file, cell_root, SGE_COMMON_DEF_AR_REQ_FILE); if ((alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE)) == NULL) { /* arguments from $HOME/.sge_ar_request file */ if (get_user_home_file_path(&file, SGE_HOME_DEF_AR_REQ_FILE, user, &alp)) { lFreeList(&alp); alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE); } } sge_dstring_free(&file); if (alp) { answer_list_output(&alp); lFreeList(&pcmdline); goto error_exit; } } alp = cull_parse_cmdline(QRSUB, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS); if (answer_list_print_err_warn(&alp, NULL, "qrsub: ", MSG_WARNING) > 0) { lFreeList(&pcmdline); goto error_exit; } if (!pcmdline) { /* no command line option is present: print help to stderr */ sge_usage(QRSUB, stderr); fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT); goto error_exit; } /* ** stage 2 of command line parsing */ ar = lCreateElem(AR_Type); if (!sge_parse_qrsub(ctx, pcmdline, &alp, &ar)) { answer_list_output(&alp); lFreeList(&pcmdline); goto error_exit; } ar_lp = lCreateList(NULL, AR_Type); lAppendElem(ar_lp, ar); alp = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_ADD | SGE_GDI_RETURN_NEW_VERSION, &ar_lp, NULL, NULL); lFreeList(&ar_lp); answer_list_on_error_print_or_exit(&alp, stdout); if (answer_list_has_error(&alp)) { sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); if (answer_list_has_status(&alp, STATUS_NOTOK_DOAGAIN)) { DRETURN(25); } else { DRETURN(1); } } sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); DRETURN(0); error_exit: sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); DRETURN(1); }
int main(int argc, char *argv[]) { lListElem *queue, *copy; const lDescr *descr; spooling_field *fields; dstring queue_str = DSTRING_INIT; dstring copy_str = DSTRING_INIT; lList *answer_list; int i; lInit(nmv); descr = QU_Type; copy = lCreateElem(descr); /* lWriteElemTo(queue, stdout); */ for(i = 0; mt_get_type(descr[i].mt) != lEndT; i++) { int nm; const char *name; const char *value, *reread_value; nm = descr[i].nm; name = lNm2Str(nm); value = object_append_field_to_dstring(queue, &answer_list, &queue_str, nm, '\0'); reread_value = NULL; if(value != NULL) { if(!object_parse_field_from_string(copy, &answer_list, nm, value)) { fprintf(stderr, "setting value for field %s failed\n", name); } else { reread_value = object_append_field_to_dstring(copy, &answer_list, ©_str, nm, '\0'); } } #if 1 printf("%s\t%s\t%s\n", name, value == NULL ? "<null>" : value, reread_value == NULL ? "<null>" : reread_value); #endif if(sge_strnullcmp(value, reread_value) != 0) { fprintf(stderr, "regression test for object_[gs]et_field_contents failed for attribute "SFQ": "SFQ" != "SFQ"\n", name, value != NULL ? value : "<null>", reread_value != NULL ? reread_value : "<null>"); } } fields = spool_get_fields_to_spool(&answer_list, QU_Type, &spool_config_instr); printf("\nthe following fields will be spooled:"); for(i = 0; fields[i].nm != NoName; i++) { printf(" %s", lNm2Str(fields[i].nm)); } printf("\n"); fields = spool_free_spooling_fields(fields); /* cleanup */ lFreeElem(&queue); lFreeElem(©); sge_dstring_free(&queue_str); sge_dstring_free(©_str); return EXIT_SUCCESS; }
/****** tty_to_commlib() ******************************************************* * NAME * tty_to_commlib() -- tty_to_commlib thread entry point and main loop * * SYNOPSIS * void* tty_to_commlib(void *t_conf) * * FUNCTION * Entry point and main loop of the tty_to_commlib thread. * Reads data from the tty and writes it to the commlib. * * INPUTS * void *t_conf - pointer to cl_thread_settings_t struct of the thread * * RESULT * void* - always NULL * * NOTES * MT-NOTE: tty_to_commlib is MT-safe ? * * SEE ALSO *******************************************************************************/ void* tty_to_commlib(void *t_conf) { char *pbuf; fd_set read_fds; struct timeval timeout; dstring err_msg = DSTRING_INIT; dstring dbuf = DSTRING_INIT; int do_exit = 0; int ret, nread = 0; DENTER(TOP_LAYER, "tty_to_commlib"); thread_func_startup(t_conf); /* * allocate working buffer */ pbuf = (char*)malloc(BUFSIZE); if (pbuf == NULL) { DPRINTF(("tty_to_commlib can't allocate working buffer: %s (%d)\n", strerror(errno), errno)); do_exit = 1; } while (do_exit == 0) { FD_ZERO(&read_fds); if (g_nostdin == 0) { /* wait for input on tty */ FD_SET(STDIN_FILENO, &read_fds); } timeout.tv_sec = 1; timeout.tv_usec = 0; if (received_signal == SIGCONT) { received_signal = 0; if (continue_handler (g_comm_handle, g_hostname) == 1) { do_exit = 1; continue; } if (g_raw_mode_state == 1) { /* restore raw-mode after SIGCONT */ if (terminal_enter_raw_mode () != 0) { DPRINTF(("tty_to_commlib: couldn't enter raw mode for pty\n")); do_exit = 1; continue; } } } DPRINTF(("tty_to_commlib: Waiting in select() for data\n")); ret = select(STDIN_FILENO+1, &read_fds, NULL, NULL, &timeout); thread_testcancel(t_conf); client_check_window_change(g_comm_handle); if (received_signal == SIGHUP || received_signal == SIGINT || received_signal == SIGQUIT || received_signal == SIGTERM) { /* If we receive one of these signals, we must terminate */ do_exit = 1; continue; } if (ret > 0) { if (g_nostdin == 1) { /* We should never get here if STDIN is closed */ DPRINTF(("tty_to_commlib: STDIN ready to read while it should be closed!!!\n")); } DPRINTF(("tty_to_commlib: trying to read() from stdin\n")); nread = read(STDIN_FILENO, pbuf, BUFSIZE-1); pbuf[nread] = '\0'; sge_dstring_append (&dbuf, pbuf); DPRINTF(("tty_to_commlib: nread = %d\n", nread)); if (nread < 0 && (errno == EINTR || errno == EAGAIN)) { DPRINTF(("tty_to_commlib: EINTR or EAGAIN\n")); /* do nothing */ } else if (nread <= 0) { do_exit = 1; } else { DPRINTF(("tty_to_commlib: writing to commlib: %d bytes\n", nread)); if (suspend_handler(g_comm_handle, g_hostname, g_is_rsh, g_suspend_remote, g_pid, &dbuf) == 1) { if (comm_write_message(g_comm_handle, g_hostname, COMM_CLIENT, 1, (unsigned char*)pbuf, (unsigned long)nread, STDIN_DATA_MSG, &err_msg) != nread) { DPRINTF(("tty_to_commlib: couldn't write all data\n")); } else { DPRINTF(("tty_to_commlib: data successfully written\n")); } } comm_flush_write_messages(g_comm_handle, &err_msg); } } else { /* * We got either a select timeout or a select error. In both cases, * it's a good chance to check if our client is still alive. */ DPRINTF(("tty_to_commlib: Checking if client is still alive\n")); if (comm_get_connection_count(g_comm_handle, &err_msg) == 0) { DPRINTF(("tty_to_commlib: Client is not alive! -> exiting.\n")); do_exit = 1; } else { DPRINTF(("tty_to_commlib: Client is still alive\n")); } } } /* while (do_exit == 0) */ /* clean up */ sge_dstring_free(&dbuf); sge_free(&pbuf); thread_func_cleanup(t_conf); sge_dstring_free(&err_msg); DPRINTF(("tty_to_commlib: exiting tty_to_commlib thread!\n")); DEXIT; return NULL; }
int main(int argc, char *argv[]) { qevent_options enabled_options; dstring errors = DSTRING_INIT; int i, gdi_setup; lList *alp = NULL; sge_gdi_ctx_class_t *ctx = NULL; sge_evc_class_t *evc = NULL; DENTER_MAIN(TOP_LAYER, "qevent"); /* sge_mt_init(); */ /* dump pid to file */ qevent_dump_pid_file(); /* parse command line */ enabled_options.error_message = &errors; qevent_set_option_struct(&enabled_options); qevent_parse_command_line(argc, argv, &enabled_options); /* check if help option is set */ if (enabled_options.help_option) { qevent_show_usage(); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 0); } /* are there command line parsing errors ? */ if (sge_dstring_get_string(enabled_options.error_message)) { ERROR((SGE_EVENT, "%s", sge_dstring_get_string(enabled_options.error_message) )); qevent_show_usage(); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 1); } log_state_set_log_gui(1); sge_setup_sig_handlers(QEVENT); /* setup event client */ gdi_setup = sge_gdi2_setup(&ctx, QEVENT, MAIN_THREAD, &alp); if (gdi_setup != AE_OK) { answer_list_output(&alp); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 1); } /* TODO: how is the memory we allocate here released ???, SGE_EXIT doesn't */ if (false == sge_gdi2_evc_setup(&evc, ctx, EV_ID_ANY, &alp, NULL)) { answer_list_output(&alp); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 1); } /* ok, start over ... */ /* check for testsuite option */ if (enabled_options.testsuite_option) { /* only for testsuite */ qevent_testsuite_mode(evc); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 0); } /* check for subscribe option */ if (enabled_options.subscribe_option) { /* only for testsuite */ qevent_subscribe_mode(evc); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 0); } if (enabled_options.trigger_option_count > 0) { lCondition *where =NULL; lEnumeration *what = NULL; sge_mirror_initialize(evc, EV_ID_ANY, "sge_mirror -trigger", true, NULL, NULL, NULL, NULL, NULL); evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_ACK); /* put out information about -trigger option */ for (i=0;i<enabled_options.trigger_option_count;i++) { INFO((SGE_EVENT, "trigger script for %s events: %s\n", qevent_get_event_name((enabled_options.trigger_option_events)[i]), (enabled_options.trigger_option_scripts)[i])); switch((enabled_options.trigger_option_events)[i]) { case QEVENT_JB_END: /* build mask for the job structure to contain only the needed elements */ where = NULL; what = lWhat("%T(%I %I %I %I %I %I %I %I)", JB_Type, JB_job_number, JB_ja_tasks, JB_ja_structure, JB_ja_n_h_ids, JB_ja_u_h_ids, JB_ja_s_h_ids,JB_ja_o_h_ids, JB_ja_template); /* register for job events */ sge_mirror_subscribe(evc, SGE_TYPE_JOB, analyze_jatask_event, NULL, NULL, where, what); evc->ec_set_flush(evc, sgeE_JOB_DEL,true, 1); /* the mirror interface registers more events, than we need, thus we free the ones, we do not need */ /* evc->ec_unsubscribe(evc, sgeE_JOB_LIST); */ evc->ec_unsubscribe(evc, sgeE_JOB_MOD); evc->ec_unsubscribe(evc, sgeE_JOB_MOD_SCHED_PRIORITY); evc->ec_unsubscribe(evc, sgeE_JOB_USAGE); evc->ec_unsubscribe(evc, sgeE_JOB_FINAL_USAGE); /* evc->ec_unsubscribe(evc, sgeE_JOB_ADD); */ /* free the what and where mask */ lFreeWhere(&where); lFreeWhat(&what); break; case QEVENT_JB_TASK_END: /* build mask for the job structure to contain only the needed elements */ where = NULL; what = lWhat("%T(%I)", JAT_Type, JAT_status); /* register for JAT events */ sge_mirror_subscribe(evc, SGE_TYPE_JATASK, analyze_jatask_event, NULL, NULL, where, what); evc->ec_set_flush(evc, sgeE_JATASK_DEL,true, 1); /* the mirror interface registers more events, than we need, thus we free the ones, we do not need */ evc->ec_unsubscribe(evc, sgeE_JATASK_ADD); evc->ec_unsubscribe(evc, sgeE_JATASK_MOD); /* free the what and where mask */ lFreeWhere(&where); lFreeWhat(&what); break; } } while(!shut_me_down) { sge_mirror_error error = sge_mirror_process_events(evc); if (error == SGE_EM_TIMEOUT && !shut_me_down ) { sleep(10); continue; } } sge_mirror_shutdown(evc); sge_dstring_free(enabled_options.error_message); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 0); return 0; } ERROR((SGE_EVENT, "no option selected\n" )); qevent_show_usage(); sge_dstring_free(enabled_options.error_message); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); return 1; }
/****** commlib_to_tty() ******************************************************* * NAME * commlib_to_tty() -- commlib_to_tty thread entry point and main loop * * SYNOPSIS * void* commlib_to_tty(void *t_conf) * * FUNCTION * Entry point and main loop of the commlib_to_tty thread. * Reads data from the commlib and writes it to the tty. * * INPUTS * void *t_conf - pointer to cl_thread_settings_t struct of the thread * * RESULT * void* - always NULL * * NOTES * MT-NOTE: commlib_to_tty is MT-safe ? * * SEE ALSO *******************************************************************************/ void* commlib_to_tty(void *t_conf) { recv_message_t recv_mess; dstring err_msg = DSTRING_INIT; int ret = 0, do_exit = 0; DENTER(TOP_LAYER, "commlib_to_tty"); thread_func_startup(t_conf); while (do_exit == 0) { /* * wait blocking for a message from commlib */ recv_mess.cl_message = NULL; recv_mess.data = NULL; DPRINTF(("commlib_to_tty: recv_message()\n")); ret = comm_recv_message(g_comm_handle, CL_TRUE, &recv_mess, &err_msg); if (ret != COMM_RETVAL_OK) { /* check if we are still connected to anybody. */ /* if not - exit. */ DPRINTF(("commlib_to_tty: error receiving message: %s\n", sge_dstring_get_string(&err_msg))); if (comm_get_connection_count(g_comm_handle, &err_msg) == 0) { DPRINTF(("commlib_to_tty: no endpoint found\n")); do_exit = 1; continue; } } DPRINTF(("commlib_to_tty: received a message\n")); thread_testcancel(t_conf); client_check_window_change(g_comm_handle); if (received_signal == SIGHUP || received_signal == SIGINT || received_signal == SIGQUIT || received_signal == SIGTERM) { /* If we receive one of these signals, we must terminate */ DPRINTF(("commlib_to_tty: shutting down because of signal %d\n", received_signal)); do_exit = 1; continue; } DPRINTF(("'parsing' message\n")); /* * 'parse' message * A 1 byte prefix tells us what kind of message it is. * See sge_ijs_comm.h for message types. */ if (recv_mess.cl_message != NULL) { char buf[100]; switch (recv_mess.type) { case STDOUT_DATA_MSG: /* copy recv_mess.data to buf to append '\0' */ memcpy(buf, recv_mess.data, MIN(99, recv_mess.cl_message->message_length - 1)); buf[MIN(99, recv_mess.cl_message->message_length - 1)] = 0; DPRINTF(("commlib_to_tty: received stdout message, writing to tty.\n")); DPRINTF(("commlib_to_tty: message is: %s\n", buf)); /* TODO: If it's not possible to write all data to the tty, retry blocking * until all data was written. The commlib must block then, too. */ if (sge_writenbytes(STDOUT_FILENO, recv_mess.data, (int)(recv_mess.cl_message->message_length-1)) != (int)(recv_mess.cl_message->message_length-1)) { DPRINTF(("commlib_to_tty: sge_writenbytes() error\n")); } break; case STDERR_DATA_MSG: DPRINTF(("commlib_to_tty: received stderr message, writing to tty.\n")); /* TODO: If it's not possible to write all data to the tty, retry blocking * until all data was written. The commlib must block then, too. */ if (sge_writenbytes(STDERR_FILENO, recv_mess.data, (int)(recv_mess.cl_message->message_length-1)) != (int)(recv_mess.cl_message->message_length-1)) { DPRINTF(("commlib_to_tty: sge_writenbytes() error\n")); } break; case WINDOW_SIZE_CTRL_MSG: /* control message */ /* we don't expect a control message */ DPRINTF(("commlib_to_tty: received window size message! " "This was unexpected!\n")); break; case REGISTER_CTRL_MSG: /* control message */ /* a client registered with us. With the next loop, the * cl_commlib_trigger function will send the WINDOW_SIZE_CTRL_MSG * (and perhaps some data messages), which is already in the * send_messages list of the connection, to the client. */ DPRINTF(("commlib_to_tty: received register message!\n")); /* Send the settings in response */ sprintf(buf, "noshell = %d", g_noshell); ret = (int)comm_write_message(g_comm_handle, g_hostname, COMM_CLIENT, 1, (unsigned char*)buf, strlen(buf)+1, SETTINGS_CTRL_MSG, &err_msg); DPRINTF(("commlib_to_tty: sent SETTINGS_CTRL_MSG, ret = %d\n", ret)); break; case UNREGISTER_CTRL_MSG: /* control message */ /* the client wants to quit, as this is the last message the client * sends, we can be sure to have received all messages from the * client. We answer with a UNREGISTER_RESPONSE_CTRL_MSG so * the client knows that it can quit now. We can quit, also. */ DPRINTF(("commlib_to_tty: received unregister message!\n")); DPRINTF(("commlib_to_tty: writing UNREGISTER_RESPONSE_CTRL_MSG\n")); /* copy recv_mess.data to buf to append '\0' */ memcpy(buf, recv_mess.data, MIN(99, recv_mess.cl_message->message_length - 1)); buf[MIN(99, recv_mess.cl_message->message_length - 1)] = 0; /* the UNREGISTER_CTRL_MSG contains the exit status of the * qrsh_starter in case of qrsh <command> and the exit status * of the shell for qlogin/qrsh <no command>. * If the job was signalled, the exit code is 128+signal. */ sscanf(buf, "%d", &g_exit_status); comm_write_message(g_comm_handle, g_hostname, COMM_CLIENT, 1, (unsigned char*)" ", 1, UNREGISTER_RESPONSE_CTRL_MSG, &err_msg); DPRINTF(("commlib_to_tty: received exit_status from shepherd: %d\n", g_exit_status)); comm_flush_write_messages(g_comm_handle, &err_msg); do_exit = 1; #if 0 cl_log_list_set_log_level(cl_com_get_log_list(), CL_LOG_OFF); cl_com_set_error_func(NULL); #endif break; } } comm_free_message(&recv_mess, &err_msg); } thread_func_cleanup(t_conf); DPRINTF(("commlib_to_tty: exiting commlib_to_tty thread!\n")); sge_dstring_free(&err_msg); DEXIT; return NULL; }