/****** sgeobj/suser/suser_unregister_job() *********************************** * NAME * suser_unregister_job() -- unregister a job * * SYNOPSIS * void suser_unregister_job(const lListElem *job) * * FUNCTION * Decrease the jobcounter for the job owner of "job". * * INPUTS * const lListElem *job - JB_Type element * * RESULT * void - NONE * * SEE ALSO * sgeobj/suser/SU_Type * sgeobj/suser/Master_SUser_List ******************************************************************************/ void suser_unregister_job(const lListElem *job) { const char *submit_user = NULL; lListElem *suser = NULL; DENTER(TOP_LAYER, "suser_unregister_job"); submit_user = lGetString(job, JB_owner); suser = suser_list_find(*object_type_get_master_list(SGE_TYPE_SUSER), submit_user); if (suser != NULL) { suser_decrease_job_counter(suser); } DEXIT; }
/****** Eventmirror/pe_task/pe_task_update_master_list_usage() ***************** * NAME * pe_task_update_master_list_usage() -- update a parallel tasks usage * * SYNOPSIS * bool * pe_task_update_master_list_usage(lList *job_list, lListElem *event) * * FUNCTION * Updates the scaled usage of a parallel task. * * INPUTS * lListElem *job_list - the master job list * lListElem *event - event object containing the new usage list * * RESULT * bool - true, if the operation succeeds, else false * * SEE ALSO * Eventmirror/job/job_update_master_list_usage() * Eventmirror/ja_task/ja_task_update_master_list_usage() *******************************************************************************/ sge_callback_result pe_task_update_master_list_usage(lList *job_list, lListElem *event) { lList *tmp = NULL; u_long32 job_id, ja_task_id; const char *pe_task_id; lListElem *job, *ja_task, *pe_task; DENTER(TOP_LAYER, "pe_task_update_master_list_usage"); job_id = lGetUlong(event, ET_intkey); ja_task_id = lGetUlong(event, ET_intkey2); pe_task_id = lGetString(event, ET_strkey); job = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), job_id); if (job == NULL) { dstring id_dstring = DSTRING_INIT; ERROR((SGE_EVENT, MSG_JOB_CANTFINDJOBFORUPDATEIN_SS, job_get_id_string(job_id, 0, NULL, &id_dstring), SGE_FUNC)); sge_dstring_free(&id_dstring); DEXIT; return SGE_EMA_FAILURE; } ja_task = job_search_task(job, NULL, ja_task_id); if (ja_task == NULL) { dstring id_dstring = DSTRING_INIT; ERROR((SGE_EVENT, MSG_JOB_CANTFINDJATASKFORUPDATEIN_SS, job_get_id_string(job_id, ja_task_id, NULL, &id_dstring), SGE_FUNC)); sge_dstring_free(&id_dstring); DEXIT; return SGE_EMA_FAILURE; } pe_task = ja_task_search_pe_task(ja_task, pe_task_id); if (pe_task == NULL) { dstring id_dstring = DSTRING_INIT; ERROR((SGE_EVENT, MSG_JOB_CANTFINDPETASKFORUPDATEIN_SS, job_get_id_string(job_id, ja_task_id, pe_task_id, &id_dstring), SGE_FUNC)); sge_dstring_free(&id_dstring); DEXIT; return SGE_EMA_FAILURE; } lXchgList(event, ET_new_version, &tmp); lXchgList(pe_task, PET_scaled_usage, &tmp); lXchgList(event, ET_new_version, &tmp); DEXIT; return SGE_EMA_OK; }
bool cqueue_verify_subordinate_list(lListElem *cqueue, lList **answer_list, lListElem *attr_elem) { bool ret = true; DENTER(CQUEUE_VERIFY_LAYER, "cqueue_verify_subordinate_list"); if (cqueue != NULL && attr_elem != NULL) { const lList *master_list = *(object_type_get_master_list(SGE_TYPE_CQUEUE)); const char *cqueue_name = lGetString(cqueue, CQ_name); lList *so_list = lGetList(attr_elem, ASOLIST_value); lListElem *so; for_each(so, so_list) { const char *so_name = lGetString(so, SO_name); /* * Check for recursions to ourself */ if (strcmp(cqueue_name, so_name) != 0) { const lListElem *cqueue = NULL; /* * Check if cqueue exists */ cqueue = cqueue_list_locate(master_list, so_name); if (cqueue != NULL) { /* * Success */ ; } else { ERROR((SGE_EVENT, MSG_CQUEUE_UNKNOWNSUB_SS, so_name, cqueue_name)); answer_list_add(answer_list, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); ret = false; } } else { ERROR((SGE_EVENT, MSG_CQUEUE_SUBITSELF_S, cqueue_name)); answer_list_add(answer_list, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); ret = false; } } } DEXIT; return ret; }
/****** sgeobj/suser/suser_get_job_count() ************************************ * NAME * suser_job_count() - number of jobs for a given user * * SYNOPSIS * void suser_job_count(const lListElem *job) * * FUNCTION * number of jobs for a given user * * INPUTS * const lListElem *job - JB_Type element * * RESULT * number of jobs in the system ******************************************************************************/ int suser_job_count(const lListElem *job) { const char *submit_user = NULL; lListElem *suser = NULL; int ret = 0; DENTER(TOP_LAYER, "suser_job_job"); submit_user = lGetString(job, JB_owner); suser = suser_list_find(*object_type_get_master_list(SGE_TYPE_SUSER), submit_user); if (suser != NULL) { ret = suser_get_job_counter(suser); } DEXIT; return ret; }
/****** sgeobj/userset/userset_list_validate_acl_list() *********************** * NAME * userset_list_validate_acl_list() -- validate an acl list * * SYNOPSIS * int * userset_list_validate_acl_list(lList *acl_list, lList **alpp) * * FUNCTION * Checks if all entries of an acl list (e.g. user list of a pe) * are contained in the master userset list. * * INPUTS * lList *acl_list - the acl list to check * lList **alpp - answer list pointer * * RESULT * int - STATUS_OK, if everything is OK *******************************************************************************/ int userset_list_validate_acl_list(lList *acl_list, lList **alpp) { lListElem *usp; DENTER(TOP_LAYER, "userset_list_validate_acl_list"); for_each (usp, acl_list) { if (!lGetElemStr(*object_type_get_master_list(SGE_TYPE_USERSET), US_name, lGetString(usp, US_name))) { ERROR((SGE_EVENT, MSG_CQUEUE_UNKNOWNUSERSET_S, lGetString(usp, US_name) ? lGetString(usp, US_name) : "<NULL>")); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } } DRETURN(STATUS_OK); }
/****** sgeobj/suser/suser_check_new_job() ************************************ * NAME * suser_check_new_job() -- checks, if a job can be registered * * SYNOPSIS * int suser_check_new_job(const lListElem *job, u_long32 max_u_jobs, * int force_registration) * * FUNCTION * This function checks whether a new "job" would exceed the maxium * number of allowed jobs per user ("max_u_jobs"). JB_owner of "job" * is the username which will be used by this function to compare * the current number of registered jobs with "max_u_jobs". If the * limit would be exceeded than the function will return 1 otherwise 0. * * INPUTS * const lListElem *job - JB_Type element * u_long32 max_u_jobs - maximum number of allowed jobs per user * int force_registration - force job registration * * RESULT * int - 1 => limit would be exceeded * 0 => otherwise ******************************************************************************/ int suser_check_new_job(const lListElem *job, u_long32 max_u_jobs) { const char *submit_user = NULL; lListElem *suser = NULL; int ret = 1; DENTER(TOP_LAYER, "suser_check_new_job"); submit_user = lGetString(job, JB_owner); suser = suser_list_add(object_type_get_master_list(SGE_TYPE_SUSER), NULL, submit_user); if (suser != NULL) { if(max_u_jobs == 0 || max_u_jobs > suser_get_job_counter(suser)) ret = 0; else ret = 1; } DRETURN(ret); }
/****** sge_userset/userset_list_validate_access() ***************************** * NAME * userset_list_validate_access() -- all user sets names in list must exist * * SYNOPSIS * int userset_list_validate_access(lList *acl_list, int nm, lList **alpp) * * FUNCTION * All the user set names in the acl_list must be defined in the qmaster * user set lists. The user set is diferentiated from user names by @ sign * * INPUTS * lList *acl_list - the acl list to check * int nm - field name * lList **alpp - answer list pointer * * RESULT * int - STATUS_OK if no error, STATUS_EUNKNOWN otherwise * * NOTES * MT-NOTE: userset_list_validate_access() is not MT safe * *******************************************************************************/ int userset_list_validate_access(lList *acl_list, int nm, lList **alpp) { lListElem *usp; char *user; DENTER(TOP_LAYER, "userset_list_validate_access"); for_each (usp, acl_list) { user = (char *) lGetString(usp, nm); if (is_hgroup_name(user) == true){ user++; /* jump ower the @ sign */ if (!lGetElemStr(*object_type_get_master_list(SGE_TYPE_USERSET), US_name, user)) { ERROR((SGE_EVENT, MSG_CQUEUE_UNKNOWNUSERSET_S, user ? user : "******")); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } } }
bool cqueue_verify_pe_list(lListElem *cqueue, lList **answer_list, lListElem *attr_elem) { bool ret = true; DENTER(CQUEUE_VERIFY_LAYER, "cqueue_verify_pe_list"); if (cqueue != NULL && attr_elem != NULL) { lList *pe_list = lGetList(attr_elem, ASTRLIST_value); if (pe_list != NULL) { const lList *master_list = *(object_type_get_master_list(SGE_TYPE_PE)); if (!pe_list_do_all_exist(master_list, answer_list, pe_list, true)) { ret = false; } } } DEXIT; return ret; }
/****** sgeobj/suser/suser_register_new_job() ********************************* * NAME * suser_register_new_job() -- try to register a new job * * SYNOPSIS * int suser_register_new_job(const lListElem *job, * u_long32 max_u_jobs, * int force_registration) * * FUNCTION * This function checks whether a new "job" would exceed the maxium * number of allowed jobs per user ("max_u_jobs"). JB_owner of "job" * is the username which will be used by this function to compare * the current number of registered jobs with "max_u_jobs". If the * limit would be exceeded than the function will return 1 otherwise * it will increase the jobcounter of the job owner and return 0. * In some situation it may be necessary to force the incrementation * of the jobcounter (reading jobs from spool area). This may be done * with "force_registration". * * INPUTS * const lListElem *job - JB_Type element * u_long32 max_u_jobs - maximum number of allowed jobs per user * int force_registration - force job registration * * RESULT * int - 1 => limit would be exceeded * 0 => otherwise * * SEE ALSO * sgeobj/suser/SU_Type * sgeobj/suser/Master_SUser_List * qmaster/job/job_list_register_new_job() ******************************************************************************/ int suser_register_new_job(const lListElem *job, u_long32 max_u_jobs, int force_registration) { const char *submit_user = NULL; lListElem *suser = NULL; int ret = 0; DENTER(TOP_LAYER, "suser_register_new_job"); if(!force_registration){ ret = suser_check_new_job(job, max_u_jobs); } if( ret == 0){ submit_user = lGetString(job, JB_owner); suser = suser_list_add(object_type_get_master_list(SGE_TYPE_SUSER), NULL, submit_user); suser_increase_job_counter(suser); } DEXIT; return ret; }
bool cqueue_verify_calendar(lListElem *cqueue, lList **answer_list, lListElem *attr_elem) { bool ret = true; DENTER(CQUEUE_VERIFY_LAYER, "cqueue_verify_calendar"); if (cqueue != NULL && attr_elem != NULL) { const char *name = lGetString(attr_elem, ASTR_value); if (name != NULL && strcasecmp("none", name)) { lListElem *calendar = calendar_list_locate(*object_type_get_master_list(SGE_TYPE_CALENDAR), name); if (calendar == NULL) { sprintf(SGE_EVENT, MSG_CQUEUE_UNKNOWNCALENDAR_S, name); answer_list_add(answer_list, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); ret = false; } } } DEXIT; return ret; }
static bool add_job(int job_id) { bool write_ok; lListElem *job; lList *answer_list = NULL; lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB); const char *key; dstring key_dstring; char key_buffer[100]; sge_dstring_init(&key_dstring, key_buffer, sizeof(key_buffer)); job = lAddElemUlong(&master_job_list, JB_job_number, job_id, JB_Type); key = job_get_key(job_id, 0, NULL, &key_dstring); #if LOCAL_TRANSACTION spool_transaction(&answer_list, spool_get_default_context(), STC_begin); answer_list_output(&answer_list); #endif write_ok = spool_write_object(&answer_list, spool_get_default_context(), job, key, SGE_TYPE_JOB, false); answer_list_output(&answer_list); if (delay > 0) { sge_usleep(delay * 1000); } #if LOCAL_TRANSACTION spool_transaction(&answer_list, spool_get_default_context(), write_ok ? STC_commit : STC_rollback); answer_list_output(&answer_list); #endif return write_ok; }
static int check_config(lList **alpp, lListElem *conf) { lListElem *ep; const char *name, *value; const char *conf_name; DENTER(TOP_LAYER, "check_config"); conf_name = lGetHost(conf, CONF_name); for_each(ep, lGetList(conf, CONF_entries)) { name = lGetString(ep, CF_name); value = lGetString(ep, CF_value); if (name == NULL) { ERROR((SGE_EVENT, MSG_CONF_NAMEISNULLINCONFIGURATIONLISTOFX_S, conf_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } if (value == NULL) { ERROR((SGE_EVENT, MSG_CONF_VALUEISNULLFORATTRXINCONFIGURATIONLISTOFY_SS, name, conf_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } if (!strcmp(name, "loglevel")) { u_long32 tmp_uval; if (sge_parse_loglevel_val(&tmp_uval, value) != 1) { ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORLOGLEVEL_S, value)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } } else if (strcmp(name, "jsv_url") == 0) { if (strcasecmp("none", value) != 0) { dstring input = DSTRING_INIT; dstring type = DSTRING_INIT; dstring user = DSTRING_INIT; dstring path = DSTRING_INIT; bool lret = true; sge_dstring_append(&input, value); lret = jsv_url_parse(&input, alpp, &type, &user, &path, false); sge_dstring_free(&input); sge_dstring_free(&type); sge_dstring_free(&user); sge_dstring_free(&path); if (!lret) { /* answer is written by jsv_url_parse */ DRETURN(STATUS_EEXIST); } } } else if (!strcmp(name, "shell_start_mode")) { if ((strcasecmp("unix_behavior", value) != 0) && (strcasecmp("posix_compliant", value) != 0) && (strcasecmp("script_from_stdin", value) != 0) ) { ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORSHELLSTARTMODE_S, value)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } } else if (!strcmp(name, "shell")) { if (!path_verify(name, alpp, "shell", true)) { ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORSHELL_S, value)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } } else if (!strcmp(name, "load_report_time")) { /* do not allow infinity entry for load_report_time */ if (strcasecmp(value, "infinity") == 0) { ERROR((SGE_EVENT, MSG_CONF_INFNOTALLOWEDFORATTRXINCONFLISTOFY_SS, name, conf_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } } else if (!strcmp(name, "max_unheard")) { /* do not allow infinity entry */ if (strcasecmp(value,"infinity") == 0) { ERROR((SGE_EVENT, MSG_CONF_INFNOTALLOWEDFORATTRXINCONFLISTOFY_SS, name, conf_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } } else if (!strcmp(name, "admin_user")) { struct passwd pw_struct; char *buffer; int size; size = get_pw_buffer_size(); buffer = sge_malloc(size); if (strcasecmp(value, "none") && !sge_getpwnam_r(value, &pw_struct, buffer, size)) { ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXASADMINUSER_S, value)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); sge_free(&buffer); DRETURN(STATUS_EEXIST); } sge_free(&buffer); } else if (!strcmp(name, "user_lists")||!strcmp(name, "xuser_lists")) { lList *tmp = NULL; int ok; /* parse just for .. */ if (lString2ListNone(value, &tmp, US_Type, US_name, " \t,")) { ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, conf_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } /* .. checking userset names */ ok = (userset_list_validate_acl_list(tmp, alpp) == STATUS_OK); lFreeList(&tmp); if (!ok) { DRETURN(STATUS_EEXIST); } } else if (!strcmp(name, "projects") || !strcmp(name, "xprojects")) { lList *tmp = NULL; int ok=1; /* parse just for .. */ if (lString2ListNone(value, &tmp, PR_Type, PR_name, " \t,")) { ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, conf_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } /* .. checking project names */ ok = (verify_project_list(alpp, tmp, *object_type_get_master_list(SGE_TYPE_PROJECT), name, "configuration", conf_name)==STATUS_OK); lFreeList(&tmp); if (!ok) { DRETURN(STATUS_EEXIST); } } else if (!strcmp(name, "prolog") || !strcmp(name, "epilog") || !strcmp(name, "mailer")) { if (strcasecmp(value, "none")) { const char *t, *script = value; /* skip user name */ if ((t = strpbrk(script, "@ ")) && *t == '@') script = &t[1]; /* force use of absolute paths if string <> none */ if (script[0] != '/' ) { ERROR((SGE_EVENT, MSG_CONF_THEPATHGIVENFORXMUSTSTARTWITHANY_S, name)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } /* ensure that variables are valid */ if (replace_params(script, NULL, 0, prolog_epilog_variables)) { ERROR((SGE_EVENT, MSG_CONF_PARAMETERXINCONFIGURATION_SS, name, err_msg)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } } } else if (!strcmp(name, "auto_user_oticket") || !strcmp(name, "auto_user_fshare")) { u_long32 uval = 0; if (!extended_parse_ulong_val(NULL, &uval, TYPE_INT, value, NULL, 0, 0, true)) { ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, value ? value : "(NULL)")); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } } /* * check paths, see also CR 6506580. * The following must be none or a valid absolute path: * - load_sensor * - set_token_cmd * - pag_cmd * - shepherd_cmd * * The following must be a valid absolute path: * - mailer * - xterm * - *_daemon, may also be "builtin" */ else if (strcmp(name, "set_token_cmd") == 0 || strcmp(name, "pag_cmd") == 0 || strcmp(name, "shepherd_cmd") == 0) { if (strcasecmp(value, "none") != 0) { if (!path_verify(value, alpp, name, true)) { answer_list_log(alpp, false, false); DRETURN(STATUS_EEXIST); } } } else if (strcmp(name, "mailer") == 0 || strcmp(name, "xterm") == 0) { if (!path_verify(value, alpp, name, true)) { answer_list_log(alpp, false, false); DRETURN(STATUS_EEXIST); } } else if (strcmp(name, "qlogin_daemon") == 0 || strcmp(name, "rlogin_daemon") == 0 || strcmp(name, "rsh_daemon") == 0) { if (strcasecmp(value, "builtin") != 0) { if (!path_verify(value, alpp, name, true)) { answer_list_log(alpp, false, false); DRETURN(STATUS_EEXIST); } } } /* load_sensor is a comma separated list of scripts */ else if (strcmp(name, "load_sensor") == 0 && strcasecmp(value, "none") != 0) { struct saved_vars_s *context = NULL; const char *path = sge_strtok_r(value, ",", &context); do { if (!path_verify(path, alpp, name, true)) { answer_list_log(alpp, false, false); sge_free_saved_vars(context); DRETURN(STATUS_EEXIST); } } while ((path = sge_strtok_r(NULL, ",", &context)) != NULL); sge_free_saved_vars(context); } }
/****** sge_manop_qmaster/sge_del_manop() ************************************** * NAME * sge_del_manop() -- delete manager or operator * * SYNOPSIS * int * sge_del_manop(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, * char *ruser, char *rhost, u_long32 target) * * FUNCTION * Deletes a manager or an operator from the corresponding master list. * * INPUTS * sge_gdi_ctx_class_t *ctx - gdi context * lListElem *ep - the manager/operator to delete * lList **alpp - answer list to return messages * char *ruser - user having triggered the action * char *rhost - host from which the action has been triggered * u_long32 target - SGE_UM_LIST or SGE_UO_LIST * * RESULT * int - STATUS_OK or STATUS_* error code * * NOTES * MT-NOTE: sge_del_manop() is MT safe - if we hold the global lock. *******************************************************************************/ int sge_del_manop(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, char *ruser, char *rhost, u_long32 target) { lListElem *found; int pos; const char *manop_name; const char *object_name; lList **lpp = NULL; int key = NoName; ev_event eve = sgeE_EVENTSIZE; DENTER(TOP_LAYER, "sge_del_manop"); if (ep == NULL || ruser == NULL || rhost == NULL) { CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } switch (target) { case SGE_UM_LIST: lpp = object_type_get_master_list(SGE_TYPE_MANAGER); object_name = MSG_OBJ_MANAGER; key = UM_name; eve = sgeE_MANAGER_DEL; break; case SGE_UO_LIST: lpp = object_type_get_master_list(SGE_TYPE_OPERATOR); object_name = MSG_OBJ_OPERATOR; key = UO_name; eve = sgeE_OPERATOR_DEL; break; default : DPRINTF(("unknown target passed to %s\n", SGE_FUNC)); DRETURN(STATUS_EUNKNOWN); } /* ep is no manop element, if ep has no UM_name/UO_name */ if ((pos = lGetPosViaElem(ep, key, SGE_NO_ABORT)) < 0) { CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS, lNm2Str(key), SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } manop_name = lGetPosString(ep, pos); if (manop_name == NULL) { CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EUNKNOWN); } /* prevent removing of root from man/op-list */ if (strcmp(manop_name, "root") == 0) { ERROR((SGE_EVENT, MSG_SGETEXT_MAY_NOT_REMOVE_USER_FROM_LIST_SS, "root", object_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } /* prevent removing the admin user from man/op-list */ if (strcmp(manop_name, ctx->get_admin_user(ctx)) == 0) { ERROR((SGE_EVENT, MSG_SGETEXT_MAY_NOT_REMOVE_USER_FROM_LIST_SS, ctx->get_admin_user(ctx), object_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } found = lGetElemStr(*lpp, key, manop_name); if (!found) { ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, object_name, manop_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DRETURN(STATUS_EEXIST); } lDechainElem(*lpp, found); /* update on file */ if (!sge_event_spool(ctx, alpp, 0, eve, 0, 0, manop_name, NULL, NULL, NULL, NULL, NULL, true, true)) { ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, object_name, manop_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR); /* chain in again */ lAppendElem(*lpp, found); DRETURN(STATUS_EDISK); } lFreeElem(&found); INFO((SGE_EVENT, MSG_SGETEXT_REMOVEDFROMLIST_SSSS, ruser, rhost, manop_name, object_name)); answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO); DRETURN(STATUS_OK); }
/****** spool/utilities/spool_default_validate_func() **************** * NAME * spool_default_validate_func() -- validate objects * * SYNOPSIS * bool * spool_default_validate_func(lList **answer_list, * const lListElem *type, * const lListElem *rule, * const lListElem *object, * const char *key, * const sge_object_type object_type) * * FUNCTION * Verifies an object. * * INPUTS * lList **answer_list - to return error messages * const lListElem *type - object type description * const lListElem *rule - rule to use * const lListElem *object - object to validate * const sge_object_type object_type - object type * * RESULT * bool - true on success, else false * * NOTES * This function should not be called directly, it is called by the * spooling framework. * * SEE ALSO *******************************************************************************/ bool spool_default_validate_func(lList **answer_list, const lListElem *type, const lListElem *rule, lListElem *object, const sge_object_type object_type) { bool ret = true; DENTER(TOP_LAYER, "spool_default_validate_func"); switch(object_type) { case SGE_TYPE_ADMINHOST: case SGE_TYPE_EXECHOST: case SGE_TYPE_SUBMITHOST: { int cl_ret; int key_nm = object_type_get_key_nm(object_type); char *old_name = strdup(lGetHost(object, key_nm)); /* try hostname resolving */ if (strcmp(old_name, SGE_GLOBAL_NAME) != 0) { cl_ret = sge_resolve_host(object, key_nm); /* if hostname resolving failed: create error */ if (cl_ret != CL_RETVAL_OK) { if (cl_ret != CL_RETVAL_GETHOSTNAME_ERROR) { answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_SPOOL_CANTRESOLVEHOSTNAME_SS, old_name, cl_get_error_text(ret)); ret = false; } else { answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, ANSWER_QUALITY_WARNING, MSG_SPOOL_CANTRESOLVEHOSTNAME_SS, old_name, cl_get_error_text(ret)); } } else { /* if hostname resolving changed hostname: spool */ const char *new_name; new_name = lGetHost(object, key_nm); if (strcmp(old_name, new_name) != 0) { spooling_write_func write_func = (spooling_write_func)lGetRef(rule, SPR_write_func); spooling_delete_func delete_func = (spooling_delete_func)lGetRef(rule, SPR_delete_func); write_func(answer_list, type, rule, object, new_name, object_type); delete_func(answer_list, type, rule, old_name, object_type); } } } sge_free(&old_name); if (object_type == SGE_TYPE_EXECHOST && ret) { lListElem *load_value; lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY); /* all spooled load values are static, therefore we tag them here */ for_each(load_value, lGetList(object, EH_load_list)) { lSetBool(load_value, HL_static, true); } /* necessary to init double values of consumable configuration */ centry_list_fill_request(lGetList(object, EH_consumable_config_list), NULL, master_centry_list, true, false, true); /* necessary to setup actual list of exechost */ debit_host_consumable(NULL, object, master_centry_list, 0, true, NULL); if (ensure_attrib_available(NULL, object, EH_consumable_config_list)) { ret = false; } } }
static lListElem * qinstance_create(sge_gdi_ctx_class_t *ctx, const lListElem *cqueue, lList **answer_list, const char *hostname, bool *is_ambiguous, monitoring_t *monitor) { dstring buffer = DSTRING_INIT; const char *cqueue_name = lGetString(cqueue, CQ_name); lList *centry_list = *(object_type_get_master_list(SGE_TYPE_CENTRY)); lListElem *ret = NULL; int index; DENTER(TOP_LAYER, "qinstance_create"); ret = lCreateElem(QU_Type); /* * Pre-initialize some fields: hostname, full_name */ lSetHost(ret, QU_qhostname, hostname); lSetString(ret, QU_qname, cqueue_name); sge_dstring_sprintf(&buffer, "%s@%s", cqueue_name, hostname); lSetString(ret, QU_full_name, sge_dstring_get_string(&buffer)); sge_dstring_free(&buffer); /* * Initialize configuration attributes from CQ */ *is_ambiguous = false; index = 0; while (cqueue_attribute_array[index].cqueue_attr != NoName) { bool tmp_is_ambiguous = false; bool tmp_has_changed_conf_attr = false; bool tmp_has_changed_state_attr = false; const char *matching_host_or_group = NULL; const char *matching_group = NULL; qinstance_modify_attribute(ctx, ret, answer_list, cqueue, cqueue_attribute_array[index].qinstance_attr, cqueue_attribute_array[index].cqueue_attr, cqueue_attribute_array[index].href_attr, cqueue_attribute_array[index].value_attr, cqueue_attribute_array[index].primary_key_attr, &matching_host_or_group, &matching_group, &tmp_is_ambiguous, &tmp_has_changed_conf_attr, &tmp_has_changed_state_attr, true, NULL, monitor); *is_ambiguous |= tmp_is_ambiguous; index++; } qinstance_set_conf_slots_used(ret); qinstance_debit_consumable(ret, NULL, centry_list, 0, true); /* * Change qinstance state */ sge_qmaster_qinstance_state_set_ambiguous(ret, *is_ambiguous); if (*is_ambiguous) { DPRINTF(("Qinstance "SFN"@"SFN" has ambiguous configuration\n", cqueue_name, hostname)); } else { DPRINTF(("Qinstance "SFN"@"SFN" has non-ambiguous configuration\n", cqueue_name, hostname)); } /* * For new qinstances we have to set some internal fields which * will be spooled later on: * - state (modification according to initial state) * - qversion */ sge_qmaster_qinstance_state_set_unknown(ret, true); qinstance_check_unknown_state(ret, *object_type_get_master_list(SGE_TYPE_EXECHOST)); sge_qmaster_qinstance_set_initial_state(ret); qinstance_initialize_sos_attr(ctx, ret, monitor); qinstance_increase_qversion(ret); DRETURN(ret); }
/* ------------------------------------------------------------ sge_add_manop() - adds an manop list to the global manager/operator list if the invoking process is the qmaster the added manop list is spooled in the MANAGER_FILE/OPERATOR_FILE */ int sge_add_manop( sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, char *ruser, char *rhost, u_long32 target /* may be SGE_UM_LIST or SGE_UO_LIST */ ) { const char *manop_name; const char *object_name; lList **lpp = NULL; lListElem *added; int pos; int key; lDescr *descr = NULL; ev_event eve = sgeE_EVENTSIZE; DENTER(TOP_LAYER, "sge_add_manop"); if ( !ep || !ruser || !rhost ) { CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EUNKNOWN; } switch (target) { case SGE_UM_LIST: lpp = object_type_get_master_list(SGE_TYPE_MANAGER); object_name = MSG_OBJ_MANAGER; key = UM_name; descr = UM_Type; eve = sgeE_MANAGER_ADD; break; case SGE_UO_LIST: lpp = object_type_get_master_list(SGE_TYPE_OPERATOR); object_name = MSG_OBJ_OPERATOR; key = UO_name; descr = UO_Type; eve = sgeE_OPERATOR_ADD; break; default : DPRINTF(("unknown target passed to %s\n", SGE_FUNC)); DEXIT; return STATUS_EUNKNOWN; } /* ep is no acl element, if ep has no UM_name/UO_name */ if ((pos = lGetPosViaElem(ep, key, SGE_NO_ABORT)) < 0) { CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS, lNm2Str(key), SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EUNKNOWN; } manop_name = lGetPosString(ep, pos); if (!manop_name) { CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EUNKNOWN; } if (lGetElemStr(*lpp, key, manop_name)) { ERROR((SGE_EVENT, MSG_SGETEXT_ALREADYEXISTS_SS, object_name, manop_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EEXIST; } /* update in interal lists */ added = lAddElemStr(lpp, key, manop_name, descr); /* update on file */ if(!sge_event_spool(ctx, alpp, 0, eve, 0, 0, manop_name, NULL, NULL, added, NULL, NULL, true, true)) { ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, object_name, manop_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR); /* remove element from list */ lRemoveElem(*lpp, &added); DEXIT; return STATUS_EDISK; } INFO((SGE_EVENT, MSG_SGETEXT_ADDEDTOLIST_SSSS, ruser, rhost, manop_name, object_name)); answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO); DEXIT; return STATUS_OK; }
/****** sge_c_report() ******************************************************* * NAME * sge_c_report() -- process execd load report * * SYNOPSIS * void sge_c_report(char *rhost, char *commproc, int id, lList *report_list) * * FUNCTION * * INPUTS * char *rhost * char *commproc * int id * lList *report_list * * RESULT * void - nothing * * NOTES * MT-NOTE: sge_c_report() is MT safe * ******************************************************************************/ void sge_c_report(sge_gdi_ctx_class_t *ctx, char *rhost, char *commproc, int id, lList *report_list, monitoring_t *monitor) { lListElem *hep = NULL; u_long32 rep_type; lListElem *report; int ret = 0; u_long32 this_seqno, last_seqno; u_long32 rversion; sge_pack_buffer pb; bool is_pb_used = false; bool send_tag_new_conf = false; DENTER(TOP_LAYER, "sge_c_report"); if (lGetNumberOfElem(report_list) == 0) { DPRINTF(("received empty report\n")); if (rhost != NULL) { WARNING((SGE_EVENT, MSG_QMASTER_RECEIVED_EMPTY_LOAD_REPORT_S, rhost)); } else { WARNING((SGE_EVENT, MSG_QMASTER_RECEIVED_EMPTY_LOAD_REPORT_S, "unknown")); } DRETURN_VOID; } /* accept reports only from execd's */ if (strcmp(prognames[EXECD], commproc)) { ERROR((SGE_EVENT, MSG_GOTSTATUSREPORTOFUNKNOWNCOMMPROC_S, commproc)); DRETURN_VOID; } /* do not process load reports from old execution daemons */ rversion = lGetUlong(lFirst(report_list), REP_version); if (verify_request_version(NULL, rversion, rhost, commproc, id)) { DRETURN_VOID; } this_seqno = lGetUlong(lFirst(report_list), REP_seqno); /* need exec host for all types of reports */ if (!(hep = host_list_locate(*object_type_get_master_list(SGE_TYPE_EXECHOST), rhost))) { ERROR((SGE_EVENT, MSG_GOTSTATUSREPORTOFUNKNOWNEXECHOST_S, rhost)); DRETURN_VOID; } /* prevent old reports being proceeded frequent loggings of outdated reports can be an indication of too high message traffic arriving at qmaster */ last_seqno = lGetUlong(hep, EH_report_seqno); if ((this_seqno < last_seqno && (last_seqno - this_seqno) <= 9000) && !(last_seqno > 9990 && this_seqno < 10)) { /* this must be an old report, log and then ignore it */ INFO((SGE_EVENT, MSG_QMASTER_RECEIVED_OLD_LOAD_REPORT_UUS, sge_u32c(this_seqno), sge_u32c(last_seqno), rhost)); DRETURN_VOID; } lSetUlong(hep, EH_report_seqno, this_seqno); /* RU: */ /* tag all reschedule_unknown list entries we hope to hear about in that job report */ update_reschedule_unknown_list(ctx, hep); /* ** process the reports one after the other ** usually there will be a load report ** and a configuration version report */ for_each(report, report_list) { rep_type = lGetUlong(report, REP_type); switch (rep_type) { case NUM_REP_REPORT_LOAD: case NUM_REP_FULL_REPORT_LOAD: MONITOR_ELOAD(monitor); /* Now handle execds load reports */ if (lGetUlong(hep, EH_lt_heard_from) == 0 && rep_type != NUM_REP_FULL_REPORT_LOAD) { host_notify_about_full_load_report(ctx, hep); } else { if (!is_pb_used) { is_pb_used = true; init_packbuffer(&pb, 1024, 0); } sge_update_load_values(ctx, rhost, lGetList(report, REP_list)); if (mconf_get_simulate_execds()) { lList *master_exechost_list = *object_type_get_master_list(SGE_TYPE_EXECHOST); lListElem *shep; lListElem *simhostElem=NULL; for_each(shep, master_exechost_list) { simhostElem = lGetSubStr(shep, CE_name, "load_report_host", EH_consumable_config_list); if (simhostElem != NULL) { const char *real_host = lGetString(simhostElem, CE_stringval); if (real_host != NULL && sge_hostcmp(real_host, rhost) == 0) { const char* sim_host = lGetHost(shep, EH_name); lListElem *clp = NULL; DPRINTF(("Copy load values of %s to simulated host %s\n", rhost, sim_host)); for_each(clp, lGetList(report, REP_list)) { if (strcmp(lGetHost(clp, LR_host), SGE_GLOBAL_NAME) != 0) { lSetHost(clp, LR_host, sim_host); } } sge_update_load_values(ctx, sim_host, lGetList(report, REP_list)); } } } } pack_ack(&pb, ACK_LOAD_REPORT, this_seqno, 0, NULL); } break; case NUM_REP_REPORT_CONF: MONITOR_ECONF(monitor); if (sge_compare_configuration(hep, lGetList(report, REP_list)) != 0) { DPRINTF(("%s: configuration on host %s is not up to date\n", SGE_FUNC, rhost)); send_tag_new_conf = true; } break; case NUM_REP_REPORT_PROCESSORS: /* ** save number of processors */ MONITOR_EPROC(monitor); ret = update_license_data(ctx, hep, lGetList(report, REP_list)); if (ret) { ERROR((SGE_EVENT, MSG_LICENCE_ERRORXUPDATINGLICENSEDATA_I, ret)); } break; case NUM_REP_REPORT_JOB: MONITOR_EJOB(monitor); if (!is_pb_used) { is_pb_used = true; init_packbuffer(&pb, 1024, 0); } process_job_report(ctx, report, hep, rhost, commproc, &pb, monitor); break; default: DPRINTF(("received invalid report type %ld\n", (long) rep_type)); }
/****** qmaster/ckpt/sge_del_ckpt() ******************************************* * * NAME * sge_del_ckpt -- delete ckpt object in Master_Ckpt_List * * SYNOPSIS * int sge_del_ckpt(lListElem *ep, lList **alpp, char *ruser, char *rhost); * * FUNCTION * This function will be called from the framework which will * add/modify/delete generic gdi objects. * The purpose of this function is it to delete ckpt objects. * * * INPUTS * ep - element which should be deleted * alpp - reference to an answer list. * ruser - username of person who invoked this gdi request * rhost - hostname of the host where someone initiated an gdi call * * RESULT * [alpp] - error messages will be added to this list * 0 - success * STATUS_EUNKNOWN - an error occured ******************************************************************************/ int sge_del_ckpt(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, char *ruser, char *rhost) { lListElem *found; int pos; const char *ckpt_name; lList **lpp = object_type_get_master_list(SGE_TYPE_CKPT); DENTER(TOP_LAYER, "sge_del_ckpt"); if ( !ep || !ruser || !rhost ) { CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EUNKNOWN; } /* ep is no ckpt element, if ep has no CK_name */ if ((pos = lGetPosViaElem(ep, CK_name, SGE_NO_ABORT)) < 0) { CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS, lNm2Str(CK_name), SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EUNKNOWN; } ckpt_name = lGetPosString(ep, pos); if (!ckpt_name) { CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC)); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EUNKNOWN; } found = ckpt_list_locate(*lpp, ckpt_name); if (!found) { ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, MSG_OBJ_CKPT, ckpt_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EEXIST; } /* * Try to find references in other objects */ { lList *local_answer_list = NULL; if (ckpt_is_referenced(found, &local_answer_list, *(object_type_get_master_list(SGE_TYPE_JOB)), *(object_type_get_master_list(SGE_TYPE_CQUEUE)))) { lListElem *answer = lFirst(local_answer_list); ERROR((SGE_EVENT, "denied: %s", lGetString(answer, AN_text))); answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR); lFreeList(&local_answer_list); DEXIT; return STATUS_EUNKNOWN; } } /* remove ckpt file 1st */ if (!sge_event_spool(ctx, alpp, 0, sgeE_CKPT_DEL, 0, 0, ckpt_name, NULL, NULL, NULL, NULL, NULL, true, true)) { ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, MSG_OBJ_CKPT, ckpt_name)); answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR); DEXIT; return STATUS_EDISK; } /* now we can remove the element */ lRemoveElem(*lpp, &found); INFO((SGE_EVENT, MSG_SGETEXT_REMOVEDFROMLIST_SSSS, ruser, rhost, ckpt_name, MSG_OBJ_CKPT)); answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO); DEXIT; return STATUS_OK; }
/****** qmaster/threads/sge_scheduler_main() ********************************** * NAME * sge_scheduler_main() -- main function of the scheduler thread * * SYNOPSIS * void * sge_scheduler_main(void *arg) * * FUNCTION * Main function of the scheduler thread, * * INPUTS * void *arg - pointer to the thread function (type cl_thread_settings_t*) * * RESULT * void * - always NULL * * NOTES * MT-NOTE: sge_scheduler_main() is MT safe * * MT-NOTE: this is a thread function. Do NOT use this function * MT-NOTE: in any other way! * * SEE ALSO * qmaster/threads/sge_scheduler_initialize() * qmaster/threads/sge_scheduler_cleanup_thread() * qmaster/threads/sge_scheduler_terminate() * qmaster/threads/sge_scheduler_main() *******************************************************************************/ void * sge_scheduler_main(void *arg) { time_t next_prof_output = 0; monitoring_t monitor; sge_gdi_ctx_class_t *ctx = NULL; sge_evc_class_t *evc = NULL; lList *alp = NULL; sge_where_what_t where_what; cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg; bool do_shutdown = false; bool do_endlessly = true; bool local_ret = true; DENTER(TOP_LAYER, "sge_scheduler_main"); memset(&where_what, 0, sizeof(where_what)); /* * startup */ if (local_ret) { /* initialize commlib thread */ cl_thread_func_startup(thread_config); /* initialize monitoring */ sge_monitor_init(&monitor, thread_config->thread_name, SCH_EXT, SCT_WARNING, SCT_ERROR); sge_qmaster_thread_init(&ctx, SCHEDD, SCHEDD_THREAD, true); /* register at profiling module */ set_thread_name(pthread_self(), "Scheduler Thread"); conf_update_thread_profiling("Scheduler Thread"); DPRINTF((SFN" started\n", thread_config->thread_name)); /* initialize schedd_runnlog logging */ schedd_set_schedd_log_file(ctx); } /* set profiling parameters */ prof_set_level_name(SGE_PROF_EVENTMASTER, NULL, NULL); prof_set_level_name(SGE_PROF_SPOOLING, NULL, NULL); prof_set_level_name(SGE_PROF_CUSTOM0, "scheduler", NULL); prof_set_level_name(SGE_PROF_CUSTOM1, "pending ticket calculation", NULL); prof_set_level_name(SGE_PROF_CUSTOM3, "job sorting", NULL); prof_set_level_name(SGE_PROF_CUSTOM4, "job dispatching", NULL); prof_set_level_name(SGE_PROF_CUSTOM5, "send orders", NULL); prof_set_level_name(SGE_PROF_CUSTOM6, "scheduler event loop", NULL); prof_set_level_name(SGE_PROF_CUSTOM7, "copy lists", NULL); prof_set_level_name(SGE_PROF_SCHEDLIB4, NULL, NULL); /* set-up needed for 'schedule' file */ serf_init(schedd_serf_record_func, schedd_serf_newline); schedd_set_serf_log_file(ctx); /* * prepare event client/mirror mechanism */ if (local_ret) { local_ret = sge_gdi2_evc_setup(&evc, ctx, EV_ID_SCHEDD, &alp, "scheduler"); DPRINTF(("prepared event client/mirror mechanism\n")); } /* * register as event mirror */ if (local_ret) { sge_mirror_initialize(evc, EV_ID_SCHEDD, "scheduler", false, &event_update_func, &sge_mod_event_client, &sge_add_event_client, &sge_remove_event_client, &sge_handle_event_ack); evc->ec_register(evc, false, NULL, &monitor); evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_RELEASED); DPRINTF(("registered at event mirror\n")); } /* * subscribe necessary data */ if (local_ret) { ensure_valid_what_and_where(&where_what); subscribe_scheduler(evc, &where_what); DPRINTF(("subscribed necessary data from event master\n")); } /* * schedulers main loop */ if (local_ret) { while (do_endlessly) { bool handled_events = false; lList *event_list = NULL; int execute = 0; double prof_copy = 0.0; double prof_total = 0.0; double prof_init = 0.0; double prof_free = 0.0; double prof_run = 0.0; lList *orders = NULL; if (sconf_get_profiling()) { prof_start(SGE_PROF_OTHER, NULL); prof_start(SGE_PROF_PACKING, NULL); prof_start(SGE_PROF_EVENTCLIENT, NULL); prof_start(SGE_PROF_MIRROR, NULL); prof_start(SGE_PROF_GDI, NULL); prof_start(SGE_PROF_HT_RESIZE, NULL); prof_start(SGE_PROF_CUSTOM0, NULL); prof_start(SGE_PROF_CUSTOM1, NULL); prof_start(SGE_PROF_CUSTOM3, NULL); prof_start(SGE_PROF_CUSTOM4, NULL); prof_start(SGE_PROF_CUSTOM5, NULL); prof_start(SGE_PROF_CUSTOM6, NULL); prof_start(SGE_PROF_CUSTOM7, NULL); prof_start(SGE_PROF_SCHEDLIB4, NULL); } else { prof_stop(SGE_PROF_OTHER, NULL); prof_stop(SGE_PROF_PACKING, NULL); prof_stop(SGE_PROF_EVENTCLIENT, NULL); prof_stop(SGE_PROF_MIRROR, NULL); prof_stop(SGE_PROF_GDI, NULL); prof_stop(SGE_PROF_HT_RESIZE, NULL); prof_stop(SGE_PROF_CUSTOM0, NULL); prof_stop(SGE_PROF_CUSTOM1, NULL); prof_stop(SGE_PROF_CUSTOM3, NULL); prof_stop(SGE_PROF_CUSTOM4, NULL); prof_stop(SGE_PROF_CUSTOM5, NULL); prof_stop(SGE_PROF_CUSTOM6, NULL); prof_stop(SGE_PROF_CUSTOM7, NULL); prof_stop(SGE_PROF_SCHEDLIB4, NULL); } /* * Wait for new events */ MONITOR_IDLE_TIME(sge_scheduler_wait_for_event(evc, &event_list), (&monitor), mconf_get_monitor_time(), mconf_is_monitor_message()); /* If we lost connection we have to register again */ if (evc->ec_need_new_registration(evc)) { lFreeList(&event_list); if (evc->ec_register(evc, false, NULL, &monitor) == true) { DPRINTF(("re-registered at event master!\n")); } } if (event_list != NULL) { /* check for shutdown */ do_shutdown = (lGetElemUlong(event_list, ET_type, sgeE_SHUTDOWN) != NULL) ? true : false; /* update mirror and free data */ if (do_shutdown == false && sge_mirror_process_event_list(evc, event_list) == SGE_EM_OK) { handled_events = true; DPRINTF(("events handled\n")); } else { DPRINTF(("events contain shutdown event - ignoring events\n")); } lFreeList(&event_list); } /* if we actually got events, start the scheduling run and further event processing */ if (handled_events == true) { lList *answer_list = NULL; scheduler_all_data_t copy; lList *master_cqueue_list = *(object_type_get_master_list(SGE_TYPE_CQUEUE)); lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB); lList *master_userset_list = *object_type_get_master_list(SGE_TYPE_USERSET); lList *master_project_list = *object_type_get_master_list(SGE_TYPE_PROJECT); lList *master_exechost_list= *object_type_get_master_list(SGE_TYPE_EXECHOST); lList *master_rqs_list= *object_type_get_master_list(SGE_TYPE_RQS); lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY); lList *master_ckpt_list = *object_type_get_master_list(SGE_TYPE_CKPT); lList *master_user_list = *object_type_get_master_list(SGE_TYPE_USER); lList *master_ar_list = *object_type_get_master_list(SGE_TYPE_AR); lList *master_pe_list = *object_type_get_master_list(SGE_TYPE_PE); lList *master_hgrp_list = *object_type_get_master_list(SGE_TYPE_HGROUP); lList *master_sharetree_list = *object_type_get_master_list(SGE_TYPE_SHARETREE); /* delay scheduling for test purposes, see issue GE-3306 */ if (SGE_TEST_DELAY_SCHEDULING > 0) { sleep(SGE_TEST_DELAY_SCHEDULING); } PROF_START_MEASUREMENT(SGE_PROF_CUSTOM6); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); if (__CONDITION(INFOPRINT)) { dstring ds; char buffer[128]; sge_dstring_init(&ds, buffer, sizeof(buffer)); DPRINTF(("================[SCHEDULING-EPOCH %s]==================\n", sge_at_time(0, &ds))); sge_dstring_free(&ds); } /* * If there were new events then * copy/filter data necessary for the scheduler run * and run the scheduler method */ memset(©, 0, sizeof(copy)); copy.dept_list = lSelect("", master_userset_list, where_what.where_dept, where_what.what_acldept); copy.acl_list = lSelect("", master_userset_list, where_what.where_acl, where_what.what_acldept); DPRINTF(("RAW CQ:%d, J:%d, H:%d, C:%d, A:%d, D:%d, P:%d, CKPT:%d," " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d\n", lGetNumberOfElem(master_cqueue_list), lGetNumberOfElem(master_job_list), lGetNumberOfElem(master_exechost_list), lGetNumberOfElem(master_centry_list), lGetNumberOfElem(copy.acl_list), lGetNumberOfElem(copy.dept_list), lGetNumberOfElem(master_project_list), lGetNumberOfElem(master_ckpt_list), lGetNumberOfElem(master_user_list), lGetNumberOfElem(master_project_list), lGetNumberOfElem(master_rqs_list), lGetNumberOfElem(master_ar_list), lGetNumberOfNodes(NULL, master_sharetree_list, STN_children), lGetNumberOfLeafs(NULL, master_sharetree_list, STN_children) )); sge_rebuild_job_category(master_job_list, master_userset_list, master_project_list, master_rqs_list); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_init = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); sge_before_dispatch(evc); /* prepare data for the scheduler itself */ copy.host_list = lCopyList("", master_exechost_list); /* * Within the scheduler we do only need QIs */ { lListElem *cqueue = NULL; lEnumeration *what_queue3 = NULL; for_each(cqueue, master_cqueue_list) { lList *qinstance_list = lGetList(cqueue, CQ_qinstances); lList *t; if (!qinstance_list) { continue; } /* all_queue_list contains all queue instances with state and full queue name only */ if (!what_queue3) { what_queue3 = lWhat("%T(%I%I)", lGetListDescr(qinstance_list), QU_full_name, QU_state); } t = lSelect("t", qinstance_list, NULL, what_queue3); if (t) { if (copy.all_queue_list == NULL) { copy.all_queue_list = lCreateList("all", lGetListDescr(t)); } lAppendList(copy.all_queue_list, t); lFreeList (&t); } t = lSelect("t", qinstance_list, where_what.where_queue, where_what.what_queue2); if (t) { if (copy.queue_list == NULL) { copy.queue_list = lCreateList("enabled", lGetListDescr(t)); } lAppendList(copy.queue_list, t); lFreeList (&t); } t = lSelect("t", qinstance_list, where_what.where_queue2, where_what.what_queue2); if (t) { if (copy.dis_queue_list == NULL) { copy.dis_queue_list = lCreateList("disabled", lGetListDescr(t)); } lAppendList(copy.dis_queue_list, t); lFreeList (&t); } } if (what_queue3) { lFreeWhat(&what_queue3); } } if (sconf_is_job_category_filtering()) { copy.job_list = sge_category_job_copy(copy.queue_list, &orders, evc->monitor_next_run); } else { copy.job_list = lCopyList("", master_job_list); } /* no need to copy these lists, they are read only used */ copy.centry_list = master_centry_list; copy.ckpt_list = master_ckpt_list; copy.hgrp_list = master_hgrp_list; /* these lists need to be copied because they are modified during scheduling run */ copy.share_tree = lCopyList("", master_sharetree_list); copy.pe_list = lCopyList("", master_pe_list); copy.user_list = lCopyList("", master_user_list); copy.project_list = lCopyList("", master_project_list); copy.rqs_list = lCopyList("", master_rqs_list); copy.ar_list = lCopyList("", master_ar_list); /* report number of reduced and raw (in brackets) lists */ DPRINTF(("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, P:%d, CKPT:%d," " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n", lGetNumberOfElem(copy.queue_list), lGetNumberOfElem(copy.all_queue_list), lGetNumberOfElem(copy.job_list), lGetNumberOfElem(master_job_list), lGetNumberOfElem(copy.host_list), lGetNumberOfElem(master_exechost_list), lGetNumberOfElem(copy.centry_list), lGetNumberOfElem(copy.acl_list), lGetNumberOfElem(copy.dept_list), lGetNumberOfElem(copy.pe_list), lGetNumberOfElem(copy.ckpt_list), lGetNumberOfElem(copy.user_list), lGetNumberOfElem(copy.project_list), lGetNumberOfElem(copy.rqs_list), lGetNumberOfElem(copy.ar_list), lGetNumberOfNodes(NULL, copy.share_tree, STN_children), lGetNumberOfLeafs(NULL, copy.share_tree, STN_children) )); if (getenv("SGE_ND")) { printf("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, " "P:%d, CKPT:%d, US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n", lGetNumberOfElem(copy.queue_list), lGetNumberOfElem(copy.all_queue_list), lGetNumberOfElem(copy.job_list), lGetNumberOfElem(master_job_list), lGetNumberOfElem(copy.host_list), lGetNumberOfElem(master_exechost_list), lGetNumberOfElem(copy.centry_list), lGetNumberOfElem(copy.acl_list), lGetNumberOfElem(copy.dept_list), lGetNumberOfElem(copy.pe_list), lGetNumberOfElem(copy.ckpt_list), lGetNumberOfElem(copy.user_list), lGetNumberOfElem(copy.project_list), lGetNumberOfElem(copy.rqs_list), lGetNumberOfElem(copy.ar_list), lGetNumberOfNodes(NULL, copy.share_tree, STN_children), lGetNumberOfLeafs(NULL, copy.share_tree, STN_children) ); } else { schedd_log("-------------START-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run); } PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_copy = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); scheduler_method(evc, &answer_list, ©, &orders); answer_list_output(&answer_list); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_run = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7); /* .. which gets deleted after using */ lFreeList(&(copy.host_list)); lFreeList(&(copy.queue_list)); lFreeList(&(copy.dis_queue_list)); lFreeList(&(copy.all_queue_list)); lFreeList(&(copy.job_list)); lFreeList(&(copy.acl_list)); lFreeList(&(copy.dept_list)); lFreeList(&(copy.pe_list)); lFreeList(&(copy.share_tree)); lFreeList(&(copy.user_list)); lFreeList(&(copy.project_list)); lFreeList(&(copy.rqs_list)); lFreeList(&(copy.ar_list)); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7); prof_free = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL); /* * need to sync with event master thread * if schedd configuration changed then settings in evm can be adjusted */ if (sconf_is_new_config()) { /* set scheduler interval / event delivery interval */ u_long32 interval = sconf_get_schedule_interval(); if (evc->ec_get_edtime(evc) != interval) { evc->ec_set_edtime(evc, interval); } /* set job / ja_task event flushing */ set_job_flushing(evc); /* no need to ec_commit here - we do it when resetting the busy state */ /* now we handled the new schedd config - no need to do it twice */ sconf_reset_new_config(); } /* block till master handled all GDI orders */ sge_schedd_block_until_orders_processed(evc->get_gdi_ctx(evc), NULL); schedd_order_destroy(); /* * Stop profiling for "schedd run total" and the subcategories */ PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM6); prof_total = prof_get_measurement_wallclock(SGE_PROF_CUSTOM6, true, NULL); if (prof_is_active(SGE_PROF_CUSTOM6)) { PROFILING((SGE_EVENT, "PROF: schedd run took: %.3f s (init: %.3f s, copy: %.3f s, " "run:%.3f, free: %.3f s, jobs: %d, categories: %d/%d)", prof_total, prof_init, prof_copy, prof_run, prof_free, lGetNumberOfElem(*object_type_get_master_list(SGE_TYPE_JOB)), sge_category_count(), sge_cs_category_count() )); } if (getenv("SGE_ND") != NULL) { printf("--------------STOP-SCHEDULER-RUN-------------\n"); } else { schedd_log("--------------STOP-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run); } thread_output_profiling("scheduler thread profiling summary:\n", &next_prof_output); sge_monitor_output(&monitor); } /* reset the busy state */ evc->ec_set_busy(evc, 0); evc->ec_commit(evc, NULL); /* stop logging into schedd_runlog (enabled via -tsm) */ evc->monitor_next_run = false; /* * pthread cancelation point * * sge_scheduler_cleanup_thread() is the last function which should * be called so it is pushed first */ pthread_cleanup_push(sge_scheduler_cleanup_thread, (void *) &ctx); pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_monitor, (void *)&monitor); pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_event_client, (void *)evc); cl_thread_func_testcancel(thread_config); pthread_cleanup_pop(execute); pthread_cleanup_pop(execute); pthread_cleanup_pop(execute); DPRINTF(("passed cancelation point\n")); }
/*-------------------------------------------------------------------------*/ int main(int argc, char **argv) { int ret; int my_pid; int ret_val; int printed_points = 0; int max_enroll_tries; static char tmp_err_file_name[SGE_PATH_MAX]; time_t next_prof_output = 0; int execd_exit_state = 0; lList **master_job_list = NULL; sge_gdi_ctx_class_t *ctx = NULL; lList *alp = NULL; DENTER_MAIN(TOP_LAYER, "execd"); #if defined(LINUX) gen_procList (); #endif prof_mt_init(); set_thread_name(pthread_self(),"Execd Thread"); prof_set_level_name(SGE_PROF_CUSTOM1, "Execd Thread", NULL); prof_set_level_name(SGE_PROF_CUSTOM2, "Execd Dispatch", NULL); #ifdef __SGE_COMPILE_WITH_GETTEXT__ /* init language output for gettext() , it will use the right language */ sge_init_language_func((gettext_func_type) gettext, (setlocale_func_type) setlocale, (bindtextdomain_func_type) bindtextdomain, (textdomain_func_type) textdomain); sge_init_language(NULL,NULL); #endif /* __SGE_COMPILE_WITH_GETTEXT__ */ /* This needs a better solution */ umask(022); /* Initialize path for temporary logging until we chdir to spool */ my_pid = getpid(); sprintf(tmp_err_file_name,"%s."sge_U32CFormat"", TMP_ERR_FILE_EXECD, sge_u32c(my_pid)); log_state_set_log_file(tmp_err_file_name); /* exit func for SGE_EXIT() */ sge_sig_handler_in_main_loop = 0; sge_setup_sig_handlers(EXECD); if (sge_setup2(&ctx, EXECD, MAIN_THREAD, &alp, false) != AE_OK) { answer_list_output(&alp); SGE_EXIT((void**)&ctx, 1); } ctx->set_exit_func(ctx, execd_exit_func); #if defined(SOLARIS) /* Init shared SMF libs if necessary */ if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) { SGE_EXIT((void**)&ctx, 1); } #endif /* prepare daemonize */ if (!getenv("SGE_ND")) { sge_daemonize_prepare(ctx); } if ((ret=sge_occupy_first_three())>=0) { CRITICAL((SGE_EVENT, MSG_FILE_REDIRECTFD_I, ret)); SGE_EXIT((void**)&ctx, 1); } lInit(nmv); /* unset XAUTHORITY if set */ if (getenv("XAUTHORITY") != NULL) { sge_unsetenv("XAUTHORITY"); } parse_cmdline_execd(argv); /* exit if we can't get communication handle (bind port) */ max_enroll_tries = 30; while (cl_com_get_handle(prognames[EXECD],1) == NULL) { ctx->prepare_enroll(ctx); max_enroll_tries--; if (max_enroll_tries <= 0 || shut_me_down) { /* exit after 30 seconds */ if (printed_points != 0) { printf("\n"); } CRITICAL((SGE_EVENT, MSG_COM_ERROR)); SGE_EXIT((void**)&ctx, 1); } if (cl_com_get_handle(prognames[EXECD],1) == NULL) { /* sleep when prepare_enroll() failed */ sleep(1); if (max_enroll_tries < 27) { printf("."); printed_points++; fflush(stdout); } } } if (printed_points != 0) { printf("\n"); } /* * now the commlib up and running. Set execd application status function * ( commlib callback function for qping status information response * messages (SIRM) ) */ ret_val = cl_com_set_status_func(sge_execd_application_status); if (ret_val != CL_RETVAL_OK) { ERROR((SGE_EVENT, cl_get_error_text(ret_val)) ); } /* test connection */ { cl_com_SIRM_t* status = NULL; ret_val = cl_commlib_get_endpoint_status(ctx->get_com_handle(ctx), (char *)ctx->get_master(ctx, true), (char*)prognames[QMASTER], 1, &status); if (ret_val != CL_RETVAL_OK) { ERROR((SGE_EVENT, cl_get_error_text(ret_val))); ERROR((SGE_EVENT, MSG_CONF_NOCONFBG)); } cl_com_free_sirm_message(&status); } /* finalize daeamonize */ if (!getenv("SGE_ND")) { sge_daemonize_finalize(ctx); } /* daemonizes if qmaster is unreachable */ sge_setup_sge_execd(ctx, tmp_err_file_name); /* are we using qidle or not */ sge_ls_qidle(mconf_get_use_qidle()); sge_ls_gnu_ls(1); DPRINTF(("use_qidle: %d\n", mconf_get_use_qidle())); /* test load sensor (internal or external) */ { lList *report_list = sge_build_load_report(ctx->get_qualified_hostname(ctx), ctx->get_binary_path(ctx)); lFreeList(&report_list); } /* here we have to wait for qmaster registration */ while (sge_execd_register_at_qmaster(ctx, false) != 0) { if (sge_get_com_error_flag(EXECD, SGE_COM_ACCESS_DENIED, true)) { /* This is no error */ DPRINTF(("***** got SGE_COM_ACCESS_DENIED from qmaster *****\n")); } if (sge_get_com_error_flag(EXECD, SGE_COM_ENDPOINT_NOT_UNIQUE, false)) { execd_exit_state = SGE_COM_ENDPOINT_NOT_UNIQUE; break; } if (shut_me_down != 0) { break; } sleep(30); } /* * Terminate on SIGTERM or hard communication error */ if (execd_exit_state != 0 || shut_me_down != 0) { sge_shutdown((void**)&ctx, execd_exit_state); DRETURN(execd_exit_state); } /* * We write pid file when we are connected to qmaster. Otherwise an old * execd might overwrite our pidfile. */ sge_write_pid(EXECD_PID_FILE); /* * At this point we are sure we are the only sge_execd and we are connected * to the current qmaster. First we have to report any reaped children * that might exist. */ starting_up(); /* * Log a warning message if execd hasn't been started by a superuser */ if (!sge_is_start_user_superuser()) { WARNING((SGE_EVENT, MSG_SWITCH_USER_NOT_ROOT)); } #ifdef COMPILE_DC if (ptf_init()) { CRITICAL((SGE_EVENT, MSG_EXECD_NOSTARTPTF)); SGE_EXIT((void**)&ctx, 1); } INFO((SGE_EVENT, MSG_EXECD_STARTPDCANDPTF)); #endif master_job_list = object_type_get_master_list(SGE_TYPE_JOB); *master_job_list = lCreateList("Master_Job_List", JB_Type); job_list_read_from_disk(master_job_list, "Master_Job_List", 0, SPOOL_WITHIN_EXECD, job_initialize_job); /* clean up jobs hanging around (look in active_dir) */ clean_up_old_jobs(ctx, 1); execd_trash_load_report(); sge_set_flush_lr_flag(true); sge_sig_handler_in_main_loop = 1; if (thread_prof_active_by_id(pthread_self())) { prof_start(SGE_PROF_CUSTOM1, NULL); prof_start(SGE_PROF_CUSTOM2, NULL); prof_start(SGE_PROF_GDI_REQUEST, NULL); } else { prof_stop(SGE_PROF_CUSTOM1, NULL); prof_stop(SGE_PROF_CUSTOM2, NULL); prof_stop(SGE_PROF_GDI_REQUEST, NULL); } PROF_START_MEASUREMENT(SGE_PROF_CUSTOM1); /* Start dispatching */ execd_exit_state = sge_execd_process_messages(ctx); /* * This code is only reached when dispatcher terminates and execd goes down. */ /* log if we received SIGPIPE signal */ if (sge_sig_handler_sigpipe_received) { sge_sig_handler_sigpipe_received = 0; INFO((SGE_EVENT, "SIGPIPE received\n")); } #if defined(LINUX) free_procList(); #endif lFreeList(master_job_list); PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM1); if (prof_is_active(SGE_PROF_ALL)) { time_t now = (time_t)sge_get_gmt(); if (now > next_prof_output) { prof_output_info(SGE_PROF_ALL, false, "profiling summary:\n"); prof_reset(SGE_PROF_ALL,NULL); next_prof_output = now + 60; } } sge_prof_cleanup(); sge_shutdown((void**)&ctx, execd_exit_state); DRETURN(execd_exit_state); }