int main(int argc, char **argv) { lList *pcmdline = NULL; lList *alp = NULL; sge_gdi_ctx_class_t *ctx = NULL; lList *ar_lp = NULL; lListElem *ar = NULL; DENTER_MAIN(TOP_LAYER, "qrsub"); /* Set up the program information name */ sge_setup_sig_handlers(QRSUB); log_state_set_log_gui(1); if (sge_gdi2_setup(&ctx, QRSUB, MAIN_THREAD, &alp) != AE_OK) { answer_list_output(&alp); goto error_exit; } /* ** stage 1 of commandline parsing */ { dstring file = DSTRING_INIT; const char *user = ctx->get_username(ctx); const char *cell_root = ctx->get_cell_root(ctx); /* arguments from SGE_ROOT/common/sge_ar_request file */ get_root_file_path(&file, cell_root, SGE_COMMON_DEF_AR_REQ_FILE); if ((alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE)) == NULL) { /* arguments from $HOME/.sge_ar_request file */ if (get_user_home_file_path(&file, SGE_HOME_DEF_AR_REQ_FILE, user, &alp)) { lFreeList(&alp); alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE); } } sge_dstring_free(&file); if (alp) { answer_list_output(&alp); lFreeList(&pcmdline); goto error_exit; } } alp = cull_parse_cmdline(QRSUB, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS); if (answer_list_print_err_warn(&alp, NULL, "qrsub: ", MSG_WARNING) > 0) { lFreeList(&pcmdline); goto error_exit; } if (!pcmdline) { /* no command line option is present: print help to stderr */ sge_usage(QRSUB, stderr); fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT); goto error_exit; } /* ** stage 2 of command line parsing */ ar = lCreateElem(AR_Type); if (!sge_parse_qrsub(ctx, pcmdline, &alp, &ar)) { answer_list_output(&alp); lFreeList(&pcmdline); goto error_exit; } ar_lp = lCreateList(NULL, AR_Type); lAppendElem(ar_lp, ar); alp = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_ADD | SGE_GDI_RETURN_NEW_VERSION, &ar_lp, NULL, NULL); lFreeList(&ar_lp); answer_list_on_error_print_or_exit(&alp, stdout); if (answer_list_has_error(&alp)) { sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); if (answer_list_has_status(&alp, STATUS_NOTOK_DOAGAIN)) { DRETURN(25); } else { DRETURN(1); } } sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); DRETURN(0); error_exit: sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); DRETURN(1); }
int main(int argc, char *argv[]) { qevent_options enabled_options; dstring errors = DSTRING_INIT; int i, gdi_setup; lList *alp = NULL; sge_gdi_ctx_class_t *ctx = NULL; sge_evc_class_t *evc = NULL; DENTER_MAIN(TOP_LAYER, "qevent"); /* sge_mt_init(); */ /* dump pid to file */ qevent_dump_pid_file(); /* parse command line */ enabled_options.error_message = &errors; qevent_set_option_struct(&enabled_options); qevent_parse_command_line(argc, argv, &enabled_options); /* check if help option is set */ if (enabled_options.help_option) { qevent_show_usage(); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 0); } /* are there command line parsing errors ? */ if (sge_dstring_get_string(enabled_options.error_message)) { ERROR((SGE_EVENT, "%s", sge_dstring_get_string(enabled_options.error_message) )); qevent_show_usage(); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 1); } log_state_set_log_gui(1); sge_setup_sig_handlers(QEVENT); /* setup event client */ gdi_setup = sge_gdi2_setup(&ctx, QEVENT, MAIN_THREAD, &alp); if (gdi_setup != AE_OK) { answer_list_output(&alp); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 1); } /* TODO: how is the memory we allocate here released ???, SGE_EXIT doesn't */ if (false == sge_gdi2_evc_setup(&evc, ctx, EV_ID_ANY, &alp, NULL)) { answer_list_output(&alp); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 1); } /* ok, start over ... */ /* check for testsuite option */ if (enabled_options.testsuite_option) { /* only for testsuite */ qevent_testsuite_mode(evc); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 0); } /* check for subscribe option */ if (enabled_options.subscribe_option) { /* only for testsuite */ qevent_subscribe_mode(evc); sge_dstring_free(enabled_options.error_message); SGE_EXIT((void**)&ctx, 0); } if (enabled_options.trigger_option_count > 0) { lCondition *where =NULL; lEnumeration *what = NULL; sge_mirror_initialize(evc, EV_ID_ANY, "sge_mirror -trigger", true, NULL, NULL, NULL, NULL, NULL); evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_ACK); /* put out information about -trigger option */ for (i=0;i<enabled_options.trigger_option_count;i++) { INFO((SGE_EVENT, "trigger script for %s events: %s\n", qevent_get_event_name((enabled_options.trigger_option_events)[i]), (enabled_options.trigger_option_scripts)[i])); switch((enabled_options.trigger_option_events)[i]) { case QEVENT_JB_END: /* build mask for the job structure to contain only the needed elements */ where = NULL; what = lWhat("%T(%I %I %I %I %I %I %I %I)", JB_Type, JB_job_number, JB_ja_tasks, JB_ja_structure, JB_ja_n_h_ids, JB_ja_u_h_ids, JB_ja_s_h_ids,JB_ja_o_h_ids, JB_ja_template); /* register for job events */ sge_mirror_subscribe(evc, SGE_TYPE_JOB, analyze_jatask_event, NULL, NULL, where, what); evc->ec_set_flush(evc, sgeE_JOB_DEL,true, 1); /* the mirror interface registers more events, than we need, thus we free the ones, we do not need */ /* evc->ec_unsubscribe(evc, sgeE_JOB_LIST); */ evc->ec_unsubscribe(evc, sgeE_JOB_MOD); evc->ec_unsubscribe(evc, sgeE_JOB_MOD_SCHED_PRIORITY); evc->ec_unsubscribe(evc, sgeE_JOB_USAGE); evc->ec_unsubscribe(evc, sgeE_JOB_FINAL_USAGE); /* evc->ec_unsubscribe(evc, sgeE_JOB_ADD); */ /* free the what and where mask */ lFreeWhere(&where); lFreeWhat(&what); break; case QEVENT_JB_TASK_END: /* build mask for the job structure to contain only the needed elements */ where = NULL; what = lWhat("%T(%I)", JAT_Type, JAT_status); /* register for JAT events */ sge_mirror_subscribe(evc, SGE_TYPE_JATASK, analyze_jatask_event, NULL, NULL, where, what); evc->ec_set_flush(evc, sgeE_JATASK_DEL,true, 1); /* the mirror interface registers more events, than we need, thus we free the ones, we do not need */ evc->ec_unsubscribe(evc, sgeE_JATASK_ADD); evc->ec_unsubscribe(evc, sgeE_JATASK_MOD); /* free the what and where mask */ lFreeWhere(&where); lFreeWhat(&what); break; } } while(!shut_me_down) { sge_mirror_error error = sge_mirror_process_events(evc); if (error == SGE_EM_TIMEOUT && !shut_me_down ) { sleep(10); continue; } } sge_mirror_shutdown(evc); sge_dstring_free(enabled_options.error_message); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 0); return 0; } ERROR((SGE_EVENT, "no option selected\n" )); qevent_show_usage(); sge_dstring_free(enabled_options.error_message); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); return 1; }
int main(int argc, char **argv) { int ret = 0; lList *pcmdline = NULL; lList *answer_list = NULL; sge_gdi_ctx_class_t *ctx = NULL; qrstat_env_t qrstat_env; DENTER_MAIN(TOP_LAYER, "qrsub"); /* Set up the program information name */ sge_setup_sig_handlers(QRSTAT); log_state_set_log_gui(1); if (sge_gdi2_setup(&ctx, QRSTAT, MAIN_THREAD, &answer_list) != AE_OK) { answer_list_output(&answer_list); goto error_exit; } qrstat_filter_init(&qrstat_env); qrstat_filter_set_ctx(&qrstat_env, ctx); /* * stage 1: commandline parsing */ { dstring file = DSTRING_INIT; const char *user = ctx->get_username(ctx); const char *cell_root = ctx->get_cell_root(ctx); /* arguments from SGE_ROOT/common/sge_qrstat file */ get_root_file_path(&file, cell_root, SGE_COMMON_DEF_QRSTAT_FILE); if (sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list) == true) { /* arguments from $HOME/.sge_qrstat file */ if (get_user_home_file_path(&file, SGE_HOME_DEF_QRSTAT_FILE, user, &answer_list)) { sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list); } } sge_dstring_free(&file); if (answer_list) { answer_list_output(&answer_list); lFreeList(&pcmdline); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); } } answer_list = cull_parse_cmdline(QRSTAT, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS); if (answer_list != NULL) { answer_list_output(&answer_list); lFreeList(&pcmdline); goto error_exit; } /* * stage 2: evalutate switches and modify qrstat_env */ if (!sge_parse_qrstat(ctx, &answer_list, &qrstat_env, &pcmdline)) { answer_list_output(&answer_list); lFreeList(&pcmdline); goto error_exit; } /* * stage 3: fetch data from master */ { answer_list = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_GET, &qrstat_env.ar_list, qrstat_env.where_AR_Type, qrstat_env.what_AR_Type, false); if (answer_list_has_error(&answer_list)) { answer_list_output(&answer_list); goto error_exit; } } /* * stage 4: create output in correct format */ { qrstat_report_handler_t *handler = NULL; if (qrstat_env.is_xml) { handler = qrstat_create_report_handler_xml(&qrstat_env, &answer_list); } else { handler = qrstat_create_report_handler_stdout(&qrstat_env, &answer_list); } if (!qrstat_print(&answer_list, handler, &qrstat_env)) { ret = 1; } if (qrstat_env.is_xml) { qrstat_destroy_report_handler_xml(&handler, &answer_list); } else { qrstat_destroy_report_handler_stdout(&handler, &answer_list); } } sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); DRETURN(ret); error_exit: sge_gdi2_shutdown((void**)&ctx); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 1); DRETURN(1); }
int main(int argc, char **argv) { lList *opts_cmdline = NULL; lList *opts_defaults = NULL; lList *opts_scriptfile = NULL; lList *opts_all = NULL; lListElem *job = NULL; lList *alp = NULL; lListElem *ep; int exit_status = 0; int just_verify; int tmp_ret; int wait_for_job = 0, is_immediate = 0; dstring session_key_out = DSTRING_INIT; dstring diag = DSTRING_INIT; dstring jobid = DSTRING_INIT; u_long32 start, end, step; u_long32 num_tasks; int count, stat; char *jobid_string = NULL; bool has_terse; drmaa_attr_values_t *jobids = NULL; u_long32 prog_number = 0; u_long32 myuid = 0; const char *sge_root = NULL; const char *cell_root = NULL; const char *username = NULL; const char *qualified_hostname = NULL; const char *unqualified_hostname = NULL; const char *mastername = NULL; DENTER_MAIN(TOP_LAYER, "qsub"); prof_mt_init(); /* Set up the program information name */ sge_setup_sig_handlers(QSUB); DPRINTF(("Initializing JAPI\n")); if (japi_init(NULL, NULL, NULL, QSUB, false, NULL, &diag) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); DEXIT; SGE_EXIT((void**)&ctx, 1); } prog_number = ctx->get_who(ctx); myuid = ctx->get_uid(ctx); sge_root = ctx->get_sge_root(ctx); cell_root = ctx->get_cell_root(ctx); username = ctx->get_username(ctx); qualified_hostname = ctx->get_qualified_hostname(ctx); unqualified_hostname = ctx->get_unqualified_hostname(ctx); mastername = ctx->get_master(ctx, false); /* * read switches from the various defaults files */ opt_list_append_opts_from_default_files(prog_number, cell_root, username, &opts_defaults, &alp, environ); tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } /* * append the commandline switches to the list */ opt_list_append_opts_from_qsub_cmdline(prog_number, &opts_cmdline, &alp, argv + 1, environ); tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_QSUB_WARNING_S); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } /* * show usage if -help was in commandline */ if (opt_list_has_X(opts_cmdline, "-help")) { sge_usage(QSUB, stdout); DEXIT; SGE_EXIT((void**)&ctx, 0); } /* * We will only read commandline options from scripfile if the script * itself should not be handled as binary */ if (opt_list_is_X_true(opts_cmdline, "-b") || (!opt_list_has_X(opts_cmdline, "-b") && opt_list_is_X_true(opts_defaults, "-b"))) { DPRINTF(("Skipping options from script due to -b option\n")); } else { opt_list_append_opts_from_script(prog_number, &opts_scriptfile, &alp, opts_cmdline, environ); tmp_ret = answer_list_print_err_warn(&alp, NULL, MSG_QSUB_COULDNOTREADSCRIPT_S, MSG_WARNING); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } } /* * Merge all commandline options and interprete them */ opt_list_merge_command_lines(&opts_all, &opts_defaults, &opts_scriptfile, &opts_cmdline); /* * Check if -terse is requested */ has_terse = opt_list_has_X(opts_all, "-terse"); /* If "-sync y" is set, wait for the job to end. */ /* Remove all -sync switches since cull_parse_job_parameter() * doesn't know what to do with them. */ while ((ep = lGetElemStr(opts_all, SPA_switch, "-sync"))) { if (lGetInt(ep, SPA_argval_lIntT) == TRUE) { wait_for_job = 1; } lRemoveElem(opts_all, &ep); } if (wait_for_job) { DPRINTF(("Wait for job end\n")); } alp = cull_parse_job_parameter(myuid, username, cell_root, unqualified_hostname, qualified_hostname, opts_all, &job); tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_WARNING); if (tmp_ret > 0) { DEXIT; SGE_EXIT((void**)&ctx, tmp_ret); } if (set_sec_cred(sge_root, mastername, job, &alp) != 0) { answer_list_output(&alp); DEXIT; SGE_EXIT((void**)&ctx, 1); } /* Check if job is immediate */ is_immediate = (int)JOB_TYPE_IS_IMMEDIATE(lGetUlong(job, JB_type)); DPRINTF(("Job is%s immediate\n", is_immediate ? "" : " not")); DPRINTF(("Everything ok\n")); if (lGetUlong(job, JB_verify)) { cull_show_job(job, 0, false); DEXIT; SGE_EXIT((void**)&ctx, 0); } if (is_immediate || wait_for_job) { pthread_t sigt; qsub_setup_sig_handlers(); if (pthread_create(&sigt, NULL, sig_thread, (void *)NULL) != 0) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S, " error preparing signal handling thread"); fprintf(stderr, "\n"); exit_status = 1; goto Error; } if (japi_enable_job_wait(username, unqualified_hostname, NULL, &session_key_out, error_handler, &diag) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { const char *msg = sge_dstring_get_string(&diag); fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S, msg?msg:" error starting event client thread"); fprintf(stderr, "\n"); exit_status = 1; goto Error; } } job_get_submit_task_ids(job, &start, &end, &step); num_tasks = (end - start) / step + 1; if (num_tasks > 1) { int error = japi_run_bulk_jobs(&jobids, &job, start, end, step, &diag); if (error != DRMAA_ERRNO_SUCCESS) { /* No active session here means that japi_enable_job_wait() was * interrupted by the signal handler, in which case we just break out * quietly. */ if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) { fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } /* BUGFIX: Issuezilla #1013 * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code * back into a GDI error code. This is the easy solution. The * correct solution would be to address issue #859, presumably by * having JAPI reuse the GDI error codes instead of the JAPI error * codes. */ if (error == DRMAA_ERRNO_TRY_LATER) { exit_status = STATUS_NOTOK_DOAGAIN; } else { exit_status = 1; } goto Error; } DPRINTF(("job id is: %ld\n", jobids->it.ji.jobid)); jobid_string = get_bulk_jobid_string((long)jobids->it.ji.jobid, start, end, step); } else if (num_tasks == 1) { int error = japi_run_job(&jobid, &job, &diag); if (error != DRMAA_ERRNO_SUCCESS) { if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) { fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } /* BUGFIX: Issuezilla #1013 * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code * back into a GDI error code. This is the easy solution. The * correct solution would be to address issue #859, presumably by * having JAPI reuse the GDI error codes instead of the DRMAA error * codes. */ if (error == DRMAA_ERRNO_TRY_LATER) { exit_status = STATUS_NOTOK_DOAGAIN; } else { exit_status = 1; } goto Error; } jobid_string = strdup(sge_dstring_get_string(&jobid)); DPRINTF(("job id is: %s\n", jobid_string)); sge_dstring_free(&jobid); } else { fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, "invalid task structure"); fprintf(stderr, "\n"); exit_status = 1; goto Error; } /* only success message is printed to stdout */ just_verify = (lGetUlong(job, JB_verify_suitable_queues)==JUST_VERIFY || lGetUlong(job, JB_verify_suitable_queues)==POKE_VERIFY); DPRINTF(("Just verifying job\n")); if (!just_verify) { const char *output = sge_dstring_get_string(&diag); /* print the tersed output */ if (has_terse) { printf("%s", jobid_string); } else if (output != NULL) { printf("%s", output); } else { printf(MSG_QSUB_YOURJOBHASBEENSUBMITTED_SS, jobid_string, lGetString(job, JB_job_name)); } printf("\n"); } else { printf(MSG_JOB_VERIFYFOUNDQ); printf("\n"); } if ((wait_for_job || is_immediate) && !just_verify) { int event; if (is_immediate) { fprintf(stderr, "%s\n", MSG_QSUB_WAITINGFORIMMEDIATEJOBTOBESCHEDULED); /* We only need to wait for the first task to be scheduled to be able * to say that the job is running. */ tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_START, &event, NULL, &diag); if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_START)) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_YOURIMMEDIATEJOBXHASBEENSUCCESSFULLYSCHEDULED_S, jobid_string); fprintf(stderr, "\n"); } /* A job finish event here means that the job was rejected. */ else if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_FINISH)) { fprintf(stderr, "\n%s\n", MSG_QSUB_YOURQSUBREQUESTCOULDNOTBESCHEDULEDDTRYLATER); exit_status = 1; goto Error; } else { /* Since we told japi_wait to wait forever, we know that if it gets * a timeout, it's because it's been interrupted to exit, in which * case we don't complain. Same for no active session. */ if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) && (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } exit_status = 1; goto Error; } } if (wait_for_job) { /* Rather than using japi_synchronize on ALL for bulk jobs, we use * japi_wait on ANY num_tasks times because with synchronize, we would * have to wait for all the tasks to finish before we know if any * finished. */ for (count = 0; count < num_tasks; count++) { /* Since there's only one running job in the session, we can just * wait for ANY. */ if ((tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_FINISH, &event, NULL, &diag)) != DRMAA_ERRNO_SUCCESS) { if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) && (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } exit_status = 1; goto Error; } /* report how job finished */ /* If the job is an array job, use the first non-zero exit code as * the exit code for qsub. */ if (exit_status == 0) { exit_status = report_exit_status(stat, sge_dstring_get_string(&jobid)); } /* If we've already found a non-zero exit code, just print the exit * info for the task. */ else { report_exit_status(stat, sge_dstring_get_string(&jobid)); } } } } Error: FREE(jobid_string); lFreeList(&alp); lFreeList(&opts_all); if ((tmp_ret = japi_exit(JAPI_EXIT_NO_FLAG, &diag)) != DRMAA_ERRNO_SUCCESS) { if (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION) { fprintf(stderr, "\n"); fprintf(stderr, MSG_QSUB_COULDNOTFINALIZEENV_S, sge_dstring_get_string(&diag)); fprintf(stderr, "\n"); } else { struct timespec ts; /* We know that if we get a DRMAA_ERRNO_NO_ACTIVE_SESSION here, it's * because the signal handler thread called japi_exit(). We know this * because if the call to japi_init() fails, we just exit directly. * If the call to japi_init() succeeds, then we have an active session, * so coming here because of an error would not result in the * DRMAA_ERRNO_NO_ACTIVE_SESSION error. */ DPRINTF(("Sleeping for 15 seconds to wait for the exit to finish.\n")); sge_relative_timespec(15, &ts); sge_mutex_lock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex); while (!exited) { if (pthread_cond_timedwait(&exit_cv, &exit_mutex, &ts) == ETIMEDOUT) { DPRINTF(("Exit has not finished after 15 seconds. Exiting.\n")); break; } } sge_mutex_unlock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex); } } sge_prof_cleanup(); /* This is an exit() instead of an SGE_EXIT() because when the qmaster is * supended, SGE_EXIT() hangs. */ exit(exit_status); DEXIT; return exit_status; }
/****** qmaster/sge_qmaster_main/main() **************************************** * NAME * main() -- qmaster entry point * * SYNOPSIS * int main(int argc, char* argv[]) * * FUNCTION * Qmaster entry point. * * NOTE: The main thread must block all signals before any additional thread * is created. Failure to do so will ruin signal handling! * * INPUTS * int argc - number of commandline arguments * char* argv[] - commandline arguments * * RESULT * 0 - success * * NOTES * We check whether 'SGE_ROOT' is set before we daemonize. Once qmaster is * a daemon, we are no longer connected to a terminal and hence can not * output an error message to stdout or stderr. * * We need to inovke 'prepare_enroll()' *before* the user id is switched via * 'become_admin_user()'. This is because qmaster must be able to bind a so * called reserved port (requires root privileges) if configured to do so. * *******************************************************************************/ int main(int argc, char* argv[]) { int max_enroll_tries; int ret_val; int file_descriptor_settings_result = 0; bool has_daemonized = false; sge_gdi_ctx_class_t *ctx = NULL; u_long32 start_time = sge_get_gmt(); monitoring_t monitor; DENTER_MAIN(TOP_LAYER, "qmaster"); sge_monitor_init(&monitor, "MAIN", NONE_EXT, MT_WARNING, MT_ERROR); prof_mt_init(); sge_get_root_dir(true, NULL, 0, true); #ifdef __SGE_COMPILE_WITH_GETTEXT__ sge_init_language_func((gettext_func_type)gettext, (setlocale_func_type)setlocale, (bindtextdomain_func_type)bindtextdomain, (textdomain_func_type)textdomain); sge_init_language(NULL,NULL); #endif /* * qmaster doesn't support any commandline anymore, * but we should show version string and -help option */ if (argc != 1) { sigset_t sig_set; sigfillset(&sig_set); pthread_sigmask(SIG_SETMASK, &sig_set, NULL); sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, true); sge_process_qmaster_cmdline(argv); SGE_EXIT((void**)&ctx, 1); } /* * daemonize qmaster * set file descriptor limits * and initialize libraries to be used in multi threaded environment * also take care that finished child processed of this process become * zombie jobs */ has_daemonized = sge_daemonize_qmaster(); file_descriptor_settings_result = set_file_descriptor_limit(); #if !defined(INTERIX) && !defined(CYGWIN) init_sig_action_and_mask(); #endif /* init qmaster threads without becomming admin user */ sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, false); ctx->set_daemonized(ctx, has_daemonized); /* this must be done as root user to be able to bind ports < 1024 */ max_enroll_tries = 30; while (cl_com_get_handle(prognames[QMASTER],1) == NULL) { ctx->prepare_enroll(ctx); max_enroll_tries--; if (max_enroll_tries <= 0) { /* exit after 30 seconds */ CRITICAL((SGE_EVENT, MSG_QMASTER_COMMUNICATION_ERRORS )); SGE_EXIT((void**)&ctx, 1); } if (cl_com_get_handle(prognames[QMASTER],1) == NULL) { /* sleep when prepare_enroll() failed */ sleep(1); } } /* * now the commlib up and running. Set qmaster application status function * (commlib callback function for qping status information response * messages (SIRM)) */ ret_val = cl_com_set_status_func(sge_qmaster_application_status); if (ret_val != CL_RETVAL_OK) { ERROR((SGE_EVENT, cl_get_error_text(ret_val))); } /* * now we become admin user change into the correct root directory set the * the target for logging messages */ sge_become_admin_user(ctx->get_admin_user(ctx)); sge_chdir_exit(ctx->get_qmaster_spool_dir(ctx), 1); log_state_set_log_file(ERR_FILE); ctx->set_exit_func(ctx, sge_exit_func); #if defined(SOLARIS) /* Init shared SMF libs if necessary */ if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) { SGE_EXIT((void**)&ctx, 1); } #endif /* * We do increment the heartbeat manually here. This is the 'startup heartbeat'. * The first time the hearbeat will be incremented through the heartbeat event * handler is after about HEARTBEAT_INTERVAL seconds. The hardbeat event handler * is setup during the initialisazion of the timer thread. */ inc_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, HEARTBEAT_INTERVAL, NULL); /* * Event master module has to be initialized already here because * sge_setup_qmaster() might already access it although event delivery * thread is not running. * * Corresponding shutdown is done in sge_event_master_terminate(); * * EB: In my opinion the init function should called in * sge_event_master_initialize(). Is it possible to move that call? */ sge_event_master_init(); sge_setup_qmaster(ctx, argv); #ifndef USE_POLL if (file_descriptor_settings_result == 1) { WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_LARGER_THAN_LIMIT_U, sge_u32c(FD_SETSIZE))); WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE1_U, sge_u32c(FD_SETSIZE - 20))); WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE2)); WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE3)); } #endif /* * Setup all threads and initialize corresponding modules. * Order is important! */ sge_signaler_initialize(ctx); sge_event_master_initialize(ctx); sge_timer_initialize(ctx, &monitor); sge_worker_initialize(ctx); #if 0 sge_test_initialize(ctx); #endif sge_listener_initialize(ctx); sge_scheduler_initialize(ctx, NULL); #ifndef NO_JNI sge_jvm_initialize(ctx, NULL); #endif INFO((SGE_EVENT, "qmaster startup took "sge_u32" seconds", sge_get_gmt() - start_time)); /* * Block till signal from signal thread arrives us */ sge_thread_wait_for_signal(); /* * Shutdown all threads and shutdown corresponding modules. * Order is important! */ #ifndef NO_JNI sge_jvm_terminate(ctx, NULL); #endif sge_scheduler_terminate(ctx, NULL); sge_listener_terminate(); #if 0 sge_test_terminate(ctx); #endif sge_worker_terminate(ctx); sge_timer_terminate(); sge_event_master_terminate(); sge_signaler_terminate(); /* * Remaining shutdown operations */ sge_clean_lists(); sge_monitor_free(&monitor); sge_shutdown((void**)&ctx, sge_qmaster_get_exit_state()); sge_prof_cleanup(); DEXIT; return 0; } /* main() */
int main(int argc, char **argv) { int ret = STATUS_OK; lList *alp = NULL; lList *request_list = NULL; lList *cmdline = NULL; lListElem *aep; int all_jobs = 0; int all_users = 0; u_long32 gdi_cmd = SGE_GDI_MOD; int tmp_ret; int me_who; sge_gdi_ctx_class_t *ctx = NULL; DENTER_MAIN(TOP_LAYER, "qalter"); prof_mt_init(); /* ** get command name: qalter or qresub */ if (!strcmp(sge_basename(argv[0], '/'), "qresub")) { DPRINTF(("QRESUB\n")); me_who = QRESUB; } else if (!strcmp(sge_basename(argv[0], '/'), "qhold")) { DPRINTF(("QHOLD\n")); me_who = QHOLD; } else if (!strcmp(sge_basename(argv[0], '/'), "qrls")) { DPRINTF(("QRLS\n")); me_who = QRLS; } else { DPRINTF(("QALTER\n")); me_who = QALTER; } log_state_set_log_gui(1); sge_setup_sig_handlers(me_who); if (sge_gdi2_setup(&ctx, me_who, MAIN_THREAD, &alp) != AE_OK) { answer_list_output(&alp); SGE_EXIT((void**)&ctx, 1); } /* ** begin to work */ opt_list_append_opts_from_qalter_cmdline(me_who, &cmdline, &alp, argv + 1, environ); tmp_ret = answer_list_print_err_warn(&alp, MSG_QALTER, MSG_QALTER, MSG_QALTERWARNING); if (tmp_ret > 0) { SGE_EXIT((void**)&ctx, tmp_ret); } /* handling the case that no command line parameter was specified */ if ((me_who == QHOLD || me_who == QRLS) && lGetNumberOfElem(cmdline) == 1) { /* -h option is set implicitly for QHOLD and QRLS */ sge_usage(me_who, stderr); fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT); SGE_EXIT((void**)&ctx, 1); } else if ((me_who == QRESUB || me_who == QALTER) && lGetNumberOfElem(cmdline) == 0) { /* qresub and qalter have nothing set */ sge_usage(me_who, stderr); fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT); SGE_EXIT((void**)&ctx, 1); } else if (opt_list_has_X(cmdline, "-help")) { /* -help was specified */ sge_usage(me_who, stdout); SGE_EXIT((void**)&ctx, 0); } alp = qalter_parse_job_parameter(me_who, cmdline, &request_list, &all_jobs, &all_users); DPRINTF(("all_jobs = %d, all_user = %d\n", all_jobs, all_users)); if (request_list && verify) { /* got a request list containing one element for each job to be modified save jobid all fields contain the same fields so we may use show_job() with the first job in our list The jobid's in our request list get printed before show_job() */ cull_show_job(lFirst(request_list), FLG_QALTER, false); sge_prof_cleanup(); SGE_EXIT((void**)&ctx, 0); } tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING); if (tmp_ret > 0) { SGE_EXIT((void**)&ctx, tmp_ret); } if ((me_who == QALTER) || (me_who == QHOLD) || (me_who == QRLS) ) { DPRINTF(("QALTER\n")); gdi_cmd = SGE_GDI_MOD; } else if (me_who == QRESUB){ DPRINTF(("QRESUB\n")); gdi_cmd = SGE_GDI_COPY; } else { printf("unknown binary name.\n"); SGE_EXIT((void**)&ctx, 1); } if (all_jobs) gdi_cmd |= SGE_GDI_ALL_JOBS; if (all_users) gdi_cmd |= SGE_GDI_ALL_USERS; alp = ctx->gdi(ctx, SGE_JB_LIST, gdi_cmd, &request_list, NULL, NULL); for_each (aep, alp) { printf("%s\n", lGetString(aep, AN_text)); if (ret == STATUS_OK) { ret = lGetUlong(aep, AN_status); } }
/*-------------------------------------------------------------------------*/ int main(int argc, char **argv) { int ret; int my_pid; int ret_val; int printed_points = 0; int max_enroll_tries; static char tmp_err_file_name[SGE_PATH_MAX]; time_t next_prof_output = 0; int execd_exit_state = 0; lList **master_job_list = NULL; sge_gdi_ctx_class_t *ctx = NULL; lList *alp = NULL; DENTER_MAIN(TOP_LAYER, "execd"); #if defined(LINUX) gen_procList (); #endif prof_mt_init(); set_thread_name(pthread_self(),"Execd Thread"); prof_set_level_name(SGE_PROF_CUSTOM1, "Execd Thread", NULL); prof_set_level_name(SGE_PROF_CUSTOM2, "Execd Dispatch", NULL); #ifdef __SGE_COMPILE_WITH_GETTEXT__ /* init language output for gettext() , it will use the right language */ sge_init_language_func((gettext_func_type) gettext, (setlocale_func_type) setlocale, (bindtextdomain_func_type) bindtextdomain, (textdomain_func_type) textdomain); sge_init_language(NULL,NULL); #endif /* __SGE_COMPILE_WITH_GETTEXT__ */ /* This needs a better solution */ umask(022); /* Initialize path for temporary logging until we chdir to spool */ my_pid = getpid(); sprintf(tmp_err_file_name,"%s."sge_U32CFormat"", TMP_ERR_FILE_EXECD, sge_u32c(my_pid)); log_state_set_log_file(tmp_err_file_name); /* exit func for SGE_EXIT() */ sge_sig_handler_in_main_loop = 0; sge_setup_sig_handlers(EXECD); if (sge_setup2(&ctx, EXECD, MAIN_THREAD, &alp, false) != AE_OK) { answer_list_output(&alp); SGE_EXIT((void**)&ctx, 1); } ctx->set_exit_func(ctx, execd_exit_func); #if defined(SOLARIS) /* Init shared SMF libs if necessary */ if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) { SGE_EXIT((void**)&ctx, 1); } #endif /* prepare daemonize */ if (!getenv("SGE_ND")) { sge_daemonize_prepare(ctx); } if ((ret=sge_occupy_first_three())>=0) { CRITICAL((SGE_EVENT, MSG_FILE_REDIRECTFD_I, ret)); SGE_EXIT((void**)&ctx, 1); } lInit(nmv); /* unset XAUTHORITY if set */ if (getenv("XAUTHORITY") != NULL) { sge_unsetenv("XAUTHORITY"); } parse_cmdline_execd(argv); /* exit if we can't get communication handle (bind port) */ max_enroll_tries = 30; while (cl_com_get_handle(prognames[EXECD],1) == NULL) { ctx->prepare_enroll(ctx); max_enroll_tries--; if (max_enroll_tries <= 0 || shut_me_down) { /* exit after 30 seconds */ if (printed_points != 0) { printf("\n"); } CRITICAL((SGE_EVENT, MSG_COM_ERROR)); SGE_EXIT((void**)&ctx, 1); } if (cl_com_get_handle(prognames[EXECD],1) == NULL) { /* sleep when prepare_enroll() failed */ sleep(1); if (max_enroll_tries < 27) { printf("."); printed_points++; fflush(stdout); } } } if (printed_points != 0) { printf("\n"); } /* * now the commlib up and running. Set execd application status function * ( commlib callback function for qping status information response * messages (SIRM) ) */ ret_val = cl_com_set_status_func(sge_execd_application_status); if (ret_val != CL_RETVAL_OK) { ERROR((SGE_EVENT, cl_get_error_text(ret_val)) ); } /* test connection */ { cl_com_SIRM_t* status = NULL; ret_val = cl_commlib_get_endpoint_status(ctx->get_com_handle(ctx), (char *)ctx->get_master(ctx, true), (char*)prognames[QMASTER], 1, &status); if (ret_val != CL_RETVAL_OK) { ERROR((SGE_EVENT, cl_get_error_text(ret_val))); ERROR((SGE_EVENT, MSG_CONF_NOCONFBG)); } cl_com_free_sirm_message(&status); } /* finalize daeamonize */ if (!getenv("SGE_ND")) { sge_daemonize_finalize(ctx); } /* daemonizes if qmaster is unreachable */ sge_setup_sge_execd(ctx, tmp_err_file_name); /* are we using qidle or not */ sge_ls_qidle(mconf_get_use_qidle()); sge_ls_gnu_ls(1); DPRINTF(("use_qidle: %d\n", mconf_get_use_qidle())); /* test load sensor (internal or external) */ { lList *report_list = sge_build_load_report(ctx->get_qualified_hostname(ctx), ctx->get_binary_path(ctx)); lFreeList(&report_list); } /* here we have to wait for qmaster registration */ while (sge_execd_register_at_qmaster(ctx, false) != 0) { if (sge_get_com_error_flag(EXECD, SGE_COM_ACCESS_DENIED, true)) { /* This is no error */ DPRINTF(("***** got SGE_COM_ACCESS_DENIED from qmaster *****\n")); } if (sge_get_com_error_flag(EXECD, SGE_COM_ENDPOINT_NOT_UNIQUE, false)) { execd_exit_state = SGE_COM_ENDPOINT_NOT_UNIQUE; break; } if (shut_me_down != 0) { break; } sleep(30); } /* * Terminate on SIGTERM or hard communication error */ if (execd_exit_state != 0 || shut_me_down != 0) { sge_shutdown((void**)&ctx, execd_exit_state); DRETURN(execd_exit_state); } /* * We write pid file when we are connected to qmaster. Otherwise an old * execd might overwrite our pidfile. */ sge_write_pid(EXECD_PID_FILE); /* * At this point we are sure we are the only sge_execd and we are connected * to the current qmaster. First we have to report any reaped children * that might exist. */ starting_up(); /* * Log a warning message if execd hasn't been started by a superuser */ if (!sge_is_start_user_superuser()) { WARNING((SGE_EVENT, MSG_SWITCH_USER_NOT_ROOT)); } #ifdef COMPILE_DC if (ptf_init()) { CRITICAL((SGE_EVENT, MSG_EXECD_NOSTARTPTF)); SGE_EXIT((void**)&ctx, 1); } INFO((SGE_EVENT, MSG_EXECD_STARTPDCANDPTF)); #endif master_job_list = object_type_get_master_list(SGE_TYPE_JOB); *master_job_list = lCreateList("Master_Job_List", JB_Type); job_list_read_from_disk(master_job_list, "Master_Job_List", 0, SPOOL_WITHIN_EXECD, job_initialize_job); /* clean up jobs hanging around (look in active_dir) */ clean_up_old_jobs(ctx, 1); execd_trash_load_report(); sge_set_flush_lr_flag(true); sge_sig_handler_in_main_loop = 1; if (thread_prof_active_by_id(pthread_self())) { prof_start(SGE_PROF_CUSTOM1, NULL); prof_start(SGE_PROF_CUSTOM2, NULL); prof_start(SGE_PROF_GDI_REQUEST, NULL); } else { prof_stop(SGE_PROF_CUSTOM1, NULL); prof_stop(SGE_PROF_CUSTOM2, NULL); prof_stop(SGE_PROF_GDI_REQUEST, NULL); } PROF_START_MEASUREMENT(SGE_PROF_CUSTOM1); /* Start dispatching */ execd_exit_state = sge_execd_process_messages(ctx); /* * This code is only reached when dispatcher terminates and execd goes down. */ /* log if we received SIGPIPE signal */ if (sge_sig_handler_sigpipe_received) { sge_sig_handler_sigpipe_received = 0; INFO((SGE_EVENT, "SIGPIPE received\n")); } #if defined(LINUX) free_procList(); #endif lFreeList(master_job_list); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM1); if (prof_is_active(SGE_PROF_ALL)) { time_t now = (time_t)sge_get_gmt(); if (now > next_prof_output) { prof_output_info(SGE_PROF_ALL, false, "profiling summary:\n"); prof_reset(SGE_PROF_ALL,NULL); next_prof_output = now + 60; } } sge_prof_cleanup(); sge_shutdown((void**)&ctx, execd_exit_state); DRETURN(execd_exit_state); }