/****** qmaster/setup_qmaster/sge_setup_job_resend() *************************** * NAME * sge_setup_job_resend() -- Setup job resend events. * * SYNOPSIS * void sge_setup_job_resend(void) * * FUNCTION * Register a job resend event for each job or array task which does have a * 'JTRANSFERING' status. * * INPUTS * void - none * * RESULT * void - none * * NOTES * MT-NOTE: sge_setup_job_resend() is not MT safe * *******************************************************************************/ void sge_setup_job_resend(void) { lListElem *job = NULL; object_description *object_base = object_type_get_object_description(); DENTER(TOP_LAYER, "sge_setup_job_resend"); job = lFirst(*object_base[SGE_TYPE_JOB].list); while (NULL != job) { lListElem *task; u_long32 job_num; job_num = lGetUlong(job, JB_job_number); task = lFirst(lGetList(job, JB_ja_tasks)); while (NULL != task) { if (lGetUlong(task, JAT_status) == JTRANSFERING) { lListElem *granted_queue, *qinstance, *host; const char *qname; u_long32 task_num, when; te_event_t ev; task_num = lGetUlong(task, JAT_task_number); granted_queue = lFirst(lGetList(task, JAT_granted_destin_identifier_list)); qname = lGetString(granted_queue, JG_qname); qinstance = cqueue_list_locate_qinstance(*object_base[SGE_TYPE_CQUEUE].list, qname); host = host_list_locate(*object_base[SGE_TYPE_EXECHOST].list, lGetHost(qinstance, QU_qhostname)); when = lGetUlong(task, JAT_start_time); when += MAX(load_report_interval(host), MAX_JOB_DELIVER_TIME); ev = te_new_event((time_t)when, TYPE_JOB_RESEND_EVENT, ONE_TIME_EVENT, job_num, task_num, "job-resend_event"); te_add_event(ev); te_free_event(&ev); DPRINTF(("Did add job resend for "sge_u32"/"sge_u32" at %d\n", job_num, task_num, when)); } task = lNext(task); } job = lNext(job); } DEXIT; return; } /* sge_setup_job_resend() */
/****** qmaster/sge_qmaster_main/sge_start_heartbeat() ***************************** * NAME * sge_start_heartbeat() -- Start qmaster heartbeat. * * SYNOPSIS * static void sge_start_heartbeat(void) * * FUNCTION * Add heartbeat event and register according event handler. * * INPUTS * void - none * * RESULT * void - none * * NOTES * MT-NOTE: sge_start_heartbeat() is MT safe * *******************************************************************************/ void heartbeat_initialize(void) { te_event_t ev = NULL; DENTER(TOP_LAYER, "heartbeat_initialize"); te_register_event_handler(increment_heartbeat, TYPE_HEARTBEAT_EVENT); ev = te_new_event(HEARTBEAT_INTERVAL, TYPE_HEARTBEAT_EVENT, RECURRING_EVENT, 0, 0, "heartbeat-event"); te_add_event(ev); te_free_event(&ev); /* this is for testsuite shadowd test */ if (getenv("SGE_TEST_HEARTBEAT_TIMEOUT") != NULL) { int test_timeout = atoi(getenv("SGE_TEST_HEARTBEAT_TIMEOUT")); set_inc_qmaster_heartbeat_test_mode(test_timeout); DPRINTF(("heartbeat timeout test enabled (timeout="sge_U32CFormat")\n", sge_u32c(test_timeout))); } DEXIT; return; } /* sge_start_heartbeat(void) */
/****** qmaster/sge_mod_configuration() **************************************** * NAME * sge_mod_configuration() -- modify cluster configuration * * SYNOPSIS * int sge_mod_configuration(lListElem *aConf, lList **anAnswer, char *aUser, * char *aHost) * * FUNCTION * Modify cluster configuration. 'confp' is a pointer to a 'CONF_Type' list * element and does contain the modified configuration entry. Adding a new * configuration entry is also viewed as a modification. * * INPUTS * lListElem *aConf - CONF_Type element containing the modified conf * lList **anAnswer - answer list * char *aUser - target user * char *aHost - target host * * RESULT * int - 0 success * -1 error * * NOTES * MT-NOTE: sge_mod_configuration() is MT safe * *******************************************************************************/ int sge_mod_configuration(sge_gdi_ctx_class_t *ctx, lListElem *aConf, lList **anAnswer, char *aUser, char *aHost) { lListElem *old_conf; const char *tmp_name = NULL; char unique_name[CL_MAXHOSTLEN]; int ret = -1; const char *cell_root = ctx->get_cell_root(ctx); const char *qualified_hostname = ctx->get_qualified_hostname(ctx); u_long32 progid = ctx->get_who(ctx); DENTER(TOP_LAYER, "sge_mod_configuration"); if (!aConf || !aUser || !aHost) { CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC)); answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } if ((tmp_name = lGetHost(aConf, CONF_name)) == NULL) { CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS, lNm2Str(CONF_name), SGE_FUNC)); answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } if ((ret = sge_resolve_hostname(tmp_name, unique_name, EH_name, sizeof(unique_name))) != CL_RETVAL_OK) { DPRINTF(("%s: error %s resolving host %s\n", SGE_FUNC, cl_get_error_text(ret), tmp_name)); ERROR((SGE_EVENT, MSG_SGETEXT_CANTRESOLVEHOST_S, tmp_name)); answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } if ((ret = check_config(anAnswer, aConf))) { DRETURN(ret); } if ((old_conf = sge_get_configuration_for_host(unique_name)) != NULL) { int ret = -1; ret = do_mod_config(ctx, unique_name, old_conf, aConf, anAnswer); lFreeElem(&old_conf); if (ret == 0) { INFO((SGE_EVENT, MSG_SGETEXT_MODIFIEDINLIST_SSSS, aUser, aHost, unique_name, MSG_OBJ_CONF)); answer_list_add(anAnswer, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO); } else { DRETURN(STATUS_EUNKNOWN); } } else { do_add_config(ctx, unique_name, aConf, anAnswer); INFO((SGE_EVENT, MSG_SGETEXT_ADDEDTOLIST_SSSS, aUser, aHost, unique_name, MSG_OBJ_CONF)); answer_list_add(anAnswer, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO); } if (strcmp(SGE_GLOBAL_NAME, unique_name) == 0) { sge_add_event(0, sgeE_GLOBAL_CONFIG, 0, 0, NULL, NULL, NULL, NULL); } /* ** is the configuration change relevant for the qmaster itsself? ** if so, initialise conf struct anew */ if (strcmp(unique_name, SGE_GLOBAL_NAME) == 0 || sge_hostcmp(unique_name, qualified_hostname) == 0) { lListElem *local = NULL; lListElem *global = NULL; lList *answer_list = NULL; char* qmaster_params = NULL; int accounting_flush_time = mconf_get_accounting_flush_time(); if ((local = sge_get_configuration_for_host(qualified_hostname)) == NULL) { WARNING((SGE_EVENT, MSG_CONF_NOLOCAL_S, qualified_hostname)); } if ((global = sge_get_configuration_for_host(SGE_GLOBAL_NAME)) == NULL) { ERROR((SGE_EVENT, SFNMAX, MSG_CONF_NOGLOBAL)); } if (merge_configuration(&answer_list, progid, cell_root, global, local, NULL) != 0) { ERROR((SGE_EVENT, MSG_CONF_CANTMERGECONFIGURATIONFORHOST_S, qualified_hostname)); } answer_list_output(&answer_list); /* Restart the accounting flush event if needed. */ if ((accounting_flush_time == 0) && (mconf_get_accounting_flush_time() != 0)) { te_event_t ev = te_new_event(time(NULL), TYPE_ACCOUNTING_TRIGGER, ONE_TIME_EVENT, 1, 0, NULL); te_add_event(ev); te_free_event(&ev); } lFreeElem(&local); lFreeElem(&global); sge_show_conf(); /* 'max_unheard' may have changed */ cl_commlib_set_connection_param(cl_com_get_handle("qmaster", 1), HEARD_FROM_TIMEOUT, mconf_get_max_unheard()); /* fetching qmaster_params and begin to parse */ qmaster_params = mconf_get_qmaster_params(); /* updating the commlib paramterlist and gdi_timeout with new or changed parameters */ cl_com_update_parameter_list(qmaster_params); sge_free(&qmaster_params); } /* invalidate configuration cache */ mconf_set_new_config(true); DRETURN(STATUS_OK); }