Example #1
0
/****** sgeobj/suser/suser_unregister_job() ***********************************
*  NAME
*     suser_unregister_job() -- unregister a job 
*
*  SYNOPSIS
*     void suser_unregister_job(const lListElem *job) 
*
*  FUNCTION
*     Decrease the jobcounter for the job owner of "job".
*
*  INPUTS
*     const lListElem *job - JB_Type element 
*
*  RESULT
*     void - NONE
*
*  SEE ALSO
*     sgeobj/suser/SU_Type
*     sgeobj/suser/Master_SUser_List  
******************************************************************************/
void suser_unregister_job(const lListElem *job)
{
   const char *submit_user = NULL;
   lListElem *suser = NULL;    

   DENTER(TOP_LAYER, "suser_unregister_job");
   submit_user = lGetString(job, JB_owner);  
   suser = suser_list_find(*object_type_get_master_list(SGE_TYPE_SUSER), submit_user);
   if (suser != NULL) {
      suser_decrease_job_counter(suser);
   }
   DEXIT;
}
Example #2
0
/****** Eventmirror/pe_task/pe_task_update_master_list_usage() *****************
*  NAME
*     pe_task_update_master_list_usage() -- update a parallel tasks usage
*
*  SYNOPSIS
*     bool 
*     pe_task_update_master_list_usage(lList *job_list, lListElem *event) 
*
*  FUNCTION
*     Updates the scaled usage of a parallel task.
*
*  INPUTS
*     lListElem *job_list - the master job list
*     lListElem *event    - event object containing the new usage list
*
*  RESULT
*     bool - true, if the operation succeeds, else false
*
*  SEE ALSO
*     Eventmirror/job/job_update_master_list_usage()
*     Eventmirror/ja_task/ja_task_update_master_list_usage()
*******************************************************************************/
sge_callback_result
pe_task_update_master_list_usage(lList *job_list, lListElem *event)
{
   lList *tmp = NULL;
   u_long32 job_id, ja_task_id;
   const char *pe_task_id;
   lListElem *job, *ja_task, *pe_task;

   DENTER(TOP_LAYER, "pe_task_update_master_list_usage");

   job_id = lGetUlong(event, ET_intkey);
   ja_task_id = lGetUlong(event, ET_intkey2);
   pe_task_id = lGetString(event, ET_strkey);
   
   job = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), job_id);
   if (job == NULL) {
      dstring id_dstring = DSTRING_INIT;
      ERROR((SGE_EVENT, MSG_JOB_CANTFINDJOBFORUPDATEIN_SS, 
             job_get_id_string(job_id, 0, NULL, &id_dstring), SGE_FUNC));
      sge_dstring_free(&id_dstring);
      DEXIT;
      return SGE_EMA_FAILURE;
   }
   
   ja_task = job_search_task(job, NULL, ja_task_id);
   if (ja_task == NULL) {
      dstring id_dstring = DSTRING_INIT;
      ERROR((SGE_EVENT, MSG_JOB_CANTFINDJATASKFORUPDATEIN_SS, 
             job_get_id_string(job_id, ja_task_id, NULL, &id_dstring), SGE_FUNC));
      sge_dstring_free(&id_dstring);
      DEXIT;
      return SGE_EMA_FAILURE;
   }

   pe_task = ja_task_search_pe_task(ja_task, pe_task_id);
   if (pe_task == NULL) {
      dstring id_dstring = DSTRING_INIT;
      ERROR((SGE_EVENT, MSG_JOB_CANTFINDPETASKFORUPDATEIN_SS, 
             job_get_id_string(job_id, ja_task_id, pe_task_id, &id_dstring), SGE_FUNC));
      sge_dstring_free(&id_dstring);
      DEXIT;
      return SGE_EMA_FAILURE;
   }

   lXchgList(event, ET_new_version, &tmp);
   lXchgList(pe_task, PET_scaled_usage, &tmp);
   lXchgList(event, ET_new_version, &tmp);
   
   DEXIT;
   return SGE_EMA_OK;
}
bool
cqueue_verify_subordinate_list(lListElem *cqueue, lList **answer_list,
                               lListElem *attr_elem)
{
   bool ret = true;

   DENTER(CQUEUE_VERIFY_LAYER, "cqueue_verify_subordinate_list");
   if (cqueue != NULL && attr_elem != NULL) {
      const lList *master_list = 
                           *(object_type_get_master_list(SGE_TYPE_CQUEUE));
      const char *cqueue_name = lGetString(cqueue, CQ_name);
      lList *so_list = lGetList(attr_elem, ASOLIST_value);
      lListElem *so;

      for_each(so, so_list) {
         const char *so_name = lGetString(so, SO_name);
 
         /*
          * Check for recursions to ourself
          */
         if (strcmp(cqueue_name, so_name) != 0) {
            const lListElem *cqueue = NULL;

            /*
             * Check if cqueue exists
             */
            cqueue = cqueue_list_locate(master_list, so_name);
            if (cqueue != NULL) {
               /*
                * Success
                */
               ;
            } else {
               ERROR((SGE_EVENT, MSG_CQUEUE_UNKNOWNSUB_SS,
                      so_name, cqueue_name));
               answer_list_add(answer_list, SGE_EVENT,
                               STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
               ret = false;
            }
         } else {
            ERROR((SGE_EVENT, MSG_CQUEUE_SUBITSELF_S, cqueue_name));
            answer_list_add(answer_list, SGE_EVENT,
                            STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
            ret = false;
         }
      }
   }
   DEXIT;
   return ret;
}
Example #4
0
/****** sgeobj/suser/suser_get_job_count() ************************************
*  NAME
*     suser_job_count() - number of jobs for a given user
*
*  SYNOPSIS
*     void suser_job_count(const lListElem *job) 
*
*  FUNCTION
*     number of jobs for a given user
*
*  INPUTS
*     const lListElem *job - JB_Type element 
*
*  RESULT
*     number of jobs in the system
******************************************************************************/
int suser_job_count(const lListElem *job)
{
   const char *submit_user = NULL;
   lListElem *suser = NULL;
   int ret = 0;    

   DENTER(TOP_LAYER, "suser_job_job");
   submit_user = lGetString(job, JB_owner);  
   suser = suser_list_find(*object_type_get_master_list(SGE_TYPE_SUSER), submit_user);
   if (suser != NULL) {
      ret = suser_get_job_counter(suser);
   }
   DEXIT;
   return ret;
}
Example #5
0
/****** sgeobj/userset/userset_list_validate_acl_list() ***********************
*  NAME
*     userset_list_validate_acl_list() -- validate an acl list 
*
*  SYNOPSIS
*     int 
*     userset_list_validate_acl_list(lList *acl_list, lList **alpp)
*
*  FUNCTION
*     Checks if all entries of an acl list (e.g. user list of a pe) 
*     are contained in the master userset list.
*
*  INPUTS
*     lList *acl_list       - the acl list to check
*     lList **alpp          - answer list pointer
*
*  RESULT
*     int - STATUS_OK, if everything is OK
*******************************************************************************/
int 
userset_list_validate_acl_list(lList *acl_list, lList **alpp)
{
   lListElem *usp;

   DENTER(TOP_LAYER, "userset_list_validate_acl_list");

   for_each (usp, acl_list) {
      if (!lGetElemStr(*object_type_get_master_list(SGE_TYPE_USERSET), US_name, lGetString(usp, US_name))) {
         ERROR((SGE_EVENT, MSG_CQUEUE_UNKNOWNUSERSET_S, 
                lGetString(usp, US_name) ? lGetString(usp, US_name) : "<NULL>"));
         answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
         DRETURN(STATUS_EUNKNOWN);
      }
   }

   DRETURN(STATUS_OK);
}
Example #6
0
/****** sgeobj/suser/suser_check_new_job() ************************************
*  NAME
*     suser_check_new_job() -- checks, if a job can be registered
*
*  SYNOPSIS
*     int suser_check_new_job(const lListElem *job, u_long32 max_u_jobs, 
*                                int force_registration) 
*
*  FUNCTION
*     This function checks whether a new "job" would exceed the maxium
*     number of allowed jobs per user ("max_u_jobs"). JB_owner of "job" 
*     is the username which will be used by this function to compare
*     the current number of registered jobs with "max_u_jobs". If the
*     limit would be exceeded than the function will return 1 otherwise 0.
*
*  INPUTS
*     const lListElem *job   - JB_Type element 
*     u_long32 max_u_jobs    - maximum number of allowed jobs per user 
*     int force_registration - force job registration 
*
*  RESULT
*     int - 1 => limit would be exceeded
*           0 => otherwise
******************************************************************************/
int suser_check_new_job(const lListElem *job, u_long32 max_u_jobs)
{
   const char *submit_user = NULL;
   lListElem *suser = NULL;
   int ret = 1;

   DENTER(TOP_LAYER, "suser_check_new_job");
   submit_user = lGetString(job, JB_owner);
   suser = suser_list_add(object_type_get_master_list(SGE_TYPE_SUSER), NULL, submit_user);
   if (suser != NULL) {
      if(max_u_jobs == 0 ||  
         max_u_jobs > suser_get_job_counter(suser))
         ret = 0;
      else
         ret = 1;
   }      
   DRETURN(ret);
}
Example #7
0
/****** sge_userset/userset_list_validate_access() *****************************
*  NAME
*     userset_list_validate_access() -- all user sets names in list must exist  
*
*  SYNOPSIS
*     int userset_list_validate_access(lList *acl_list, int nm, lList **alpp) 
*
*  FUNCTION
*     All the user set names in the acl_list must be defined in the qmaster
*     user set lists. The user set is diferentiated from user names by @ sign
*
*  INPUTS
*     lList *acl_list - the acl list to check
*     int nm          - field name
*     lList **alpp    - answer list pointer
*
*  RESULT
*     int - STATUS_OK if no error,  STATUS_EUNKNOWN otherwise
*
*  NOTES
*     MT-NOTE: userset_list_validate_access() is not MT safe 
*
*******************************************************************************/
int userset_list_validate_access(lList *acl_list, int nm, lList **alpp)
{
   lListElem *usp;
   char *user;

   DENTER(TOP_LAYER, "userset_list_validate_access");

   for_each (usp, acl_list) {
      user = (char *) lGetString(usp, nm);
      if (is_hgroup_name(user) == true){
         user++;  /* jump ower the @ sign */
         if (!lGetElemStr(*object_type_get_master_list(SGE_TYPE_USERSET), US_name, user)) {
            ERROR((SGE_EVENT, MSG_CQUEUE_UNKNOWNUSERSET_S, user ? user : "******"));
            answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EUNKNOWN);
         }
      }
   }
bool
cqueue_verify_pe_list(lListElem *cqueue, lList **answer_list,
                      lListElem *attr_elem)
{
   bool ret = true;

   DENTER(CQUEUE_VERIFY_LAYER, "cqueue_verify_pe_list");
   if (cqueue != NULL && attr_elem != NULL) {
      lList *pe_list = lGetList(attr_elem, ASTRLIST_value);

      if (pe_list != NULL) {
         const lList *master_list = *(object_type_get_master_list(SGE_TYPE_PE));
         if (!pe_list_do_all_exist(master_list, answer_list, pe_list, true)) {
            ret = false;
         }
      }
   }
   DEXIT;
   return ret;
}
Example #9
0
/****** sgeobj/suser/suser_register_new_job() *********************************
*  NAME
*     suser_register_new_job() -- try to register a new job 
*
*  SYNOPSIS
*     int suser_register_new_job(const lListElem *job, 
*                                u_long32 max_u_jobs, 
*                                int force_registration) 
*
*  FUNCTION
*     This function checks whether a new "job" would exceed the maxium
*     number of allowed jobs per user ("max_u_jobs"). JB_owner of "job" 
*     is the username which will be used by this function to compare
*     the current number of registered jobs with "max_u_jobs". If the
*     limit would be exceeded than the function will return 1 otherwise
*     it will increase the jobcounter of the job owner and return 0.
*     In some situation it may be necessary to force the incrementation
*     of the jobcounter (reading jobs from spool area). This may be done
*     with "force_registration".
*
*  INPUTS
*     const lListElem *job   - JB_Type element 
*     u_long32 max_u_jobs    - maximum number of allowed jobs per user 
*     int force_registration - force job registration 
*
*  RESULT
*     int - 1 => limit would be exceeded
*           0 => otherwise
*
*  SEE ALSO
*     sgeobj/suser/SU_Type
*     sgeobj/suser/Master_SUser_List  
*     qmaster/job/job_list_register_new_job()
******************************************************************************/
int suser_register_new_job(const lListElem *job, u_long32 max_u_jobs,
                           int force_registration)
{
   const char *submit_user = NULL;
   lListElem *suser = NULL;
   int ret = 0;

   DENTER(TOP_LAYER, "suser_register_new_job");

   if(!force_registration){
      ret = suser_check_new_job(job, max_u_jobs);
   }
   if( ret == 0){    
      submit_user = lGetString(job, JB_owner);
      suser = suser_list_add(object_type_get_master_list(SGE_TYPE_SUSER), NULL, submit_user);
      suser_increase_job_counter(suser);
   }

   DEXIT;
   return ret;
}
bool
cqueue_verify_calendar(lListElem *cqueue, lList **answer_list,
                       lListElem *attr_elem)
{
   bool ret = true;

   DENTER(CQUEUE_VERIFY_LAYER, "cqueue_verify_calendar");
   if (cqueue != NULL && attr_elem != NULL) {
      const char *name = lGetString(attr_elem, ASTR_value);

      if (name != NULL && strcasecmp("none", name)) {
         lListElem *calendar = calendar_list_locate(*object_type_get_master_list(SGE_TYPE_CALENDAR), name);
         if (calendar == NULL) {
            sprintf(SGE_EVENT, MSG_CQUEUE_UNKNOWNCALENDAR_S, name);
            answer_list_add(answer_list, SGE_EVENT,
                            STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
            ret = false;
         }
      }
   }
   DEXIT;
   return ret;
}
static bool add_job(int job_id)
{
   bool write_ok;
   lListElem *job;
   lList *answer_list = NULL;
   lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB);

   const char *key;
   dstring key_dstring;
   char key_buffer[100];

   sge_dstring_init(&key_dstring, key_buffer, sizeof(key_buffer));

   job = lAddElemUlong(&master_job_list, JB_job_number, job_id, JB_Type);
   key = job_get_key(job_id, 0, NULL, &key_dstring);
#if LOCAL_TRANSACTION
   spool_transaction(&answer_list, spool_get_default_context(),
                     STC_begin); 
   answer_list_output(&answer_list);
#endif
   write_ok = spool_write_object(&answer_list, spool_get_default_context(),
                                job, key, SGE_TYPE_JOB, false);
   answer_list_output(&answer_list);

   if (delay > 0) {
      sge_usleep(delay * 1000);
   }

#if LOCAL_TRANSACTION
   spool_transaction(&answer_list, spool_get_default_context(),
                     write_ok ? STC_commit : STC_rollback); 
   answer_list_output(&answer_list);
#endif

   return write_ok;
}
static int check_config(lList **alpp, lListElem *conf)
{
   lListElem *ep;
   const char *name, *value;
   const char *conf_name;
 
   DENTER(TOP_LAYER, "check_config");
 
   conf_name = lGetHost(conf, CONF_name);
 
   for_each(ep, lGetList(conf, CONF_entries)) {
      name = lGetString(ep, CF_name);
      value = lGetString(ep, CF_value);
 
      if (name == NULL) {
         ERROR((SGE_EVENT, MSG_CONF_NAMEISNULLINCONFIGURATIONLISTOFX_S,
               conf_name));
         answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
         DRETURN(STATUS_EEXIST);
      }
      if (value == NULL) {
         ERROR((SGE_EVENT, MSG_CONF_VALUEISNULLFORATTRXINCONFIGURATIONLISTOFY_SS,
                name, conf_name));
         answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
         DRETURN(STATUS_EEXIST);
      }

      if (!strcmp(name, "loglevel")) {
         u_long32 tmp_uval;
         if (sge_parse_loglevel_val(&tmp_uval, value) != 1) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORLOGLEVEL_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (strcmp(name, "jsv_url") == 0) {
         if (strcasecmp("none", value) != 0) {
            dstring input = DSTRING_INIT;
            dstring type = DSTRING_INIT;
            dstring user = DSTRING_INIT;
            dstring path = DSTRING_INIT;
            bool lret = true;

            sge_dstring_append(&input, value);
            lret = jsv_url_parse(&input, alpp, &type, &user, &path, false); 
            sge_dstring_free(&input);
            sge_dstring_free(&type);
            sge_dstring_free(&user);
            sge_dstring_free(&path);
            if (!lret) {
               /* answer is written by jsv_url_parse */
               DRETURN(STATUS_EEXIST);
            }
         } 
      } else if (!strcmp(name, "shell_start_mode")) {
         if ((strcasecmp("unix_behavior", value) != 0) && 
             (strcasecmp("posix_compliant", value) != 0) &&
             (strcasecmp("script_from_stdin", value) != 0) ) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORSHELLSTARTMODE_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "shell")) {
         if (!path_verify(name, alpp, "shell", true)) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORSHELL_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "load_report_time")) {
         /* do not allow infinity entry for load_report_time */
         if (strcasecmp(value, "infinity") == 0) {
            ERROR((SGE_EVENT, MSG_CONF_INFNOTALLOWEDFORATTRXINCONFLISTOFY_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "max_unheard")) {
         /* do not allow infinity entry */
         if (strcasecmp(value,"infinity") == 0) {
            ERROR((SGE_EVENT, MSG_CONF_INFNOTALLOWEDFORATTRXINCONFLISTOFY_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "admin_user")) {
         struct passwd pw_struct;
         char *buffer;
         int size;

         size = get_pw_buffer_size();
         buffer = sge_malloc(size);
         if (strcasecmp(value, "none") && !sge_getpwnam_r(value, &pw_struct, buffer, size)) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXASADMINUSER_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            sge_free(&buffer);
            DRETURN(STATUS_EEXIST);
         }
         sge_free(&buffer);
      } else if (!strcmp(name, "user_lists")||!strcmp(name, "xuser_lists")) {
         lList *tmp = NULL;
         int ok;

         /* parse just for .. */ 
         if (lString2ListNone(value, &tmp, US_Type, US_name, " \t,")) {
            ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }

         /* .. checking userset names */
         ok = (userset_list_validate_acl_list(tmp, alpp) == STATUS_OK);
         lFreeList(&tmp);
         if (!ok) {
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "projects") || !strcmp(name, "xprojects")) {
         lList *tmp = NULL;
         int ok=1;

         /* parse just for .. */ 
         if (lString2ListNone(value, &tmp, PR_Type, PR_name, " \t,")) {
            ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }

         /* .. checking project names */
         ok = (verify_project_list(alpp, tmp, *object_type_get_master_list(SGE_TYPE_PROJECT),
                    name, "configuration", conf_name)==STATUS_OK);
         lFreeList(&tmp);
         if (!ok) {
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "prolog") || !strcmp(name, "epilog")
                 || !strcmp(name, "mailer")) {
         if (strcasecmp(value, "none")) {
            const char *t, *script = value;

            /* skip user name */
            if ((t = strpbrk(script, "@ ")) && *t == '@')
               script = &t[1];

            /* force use of absolute paths if string <> none */
            if (script[0] != '/' ) {
               ERROR((SGE_EVENT, MSG_CONF_THEPATHGIVENFORXMUSTSTARTWITHANY_S, name));
               answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
               DRETURN(STATUS_EEXIST);
            }
   
            /* ensure that variables are valid */
            if (replace_params(script, NULL, 0, prolog_epilog_variables)) {
               ERROR((SGE_EVENT, MSG_CONF_PARAMETERXINCONFIGURATION_SS, name, err_msg));
               answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
               DRETURN(STATUS_EEXIST);
            }
         }
      } else if (!strcmp(name, "auto_user_oticket") || !strcmp(name, "auto_user_fshare")) {
         u_long32 uval = 0;
         if (!extended_parse_ulong_val(NULL, &uval, TYPE_INT, value, NULL, 0, 0, true)) {
            ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, value ? value : "(NULL)"));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      }

      /* 
       * check paths, see also CR 6506580.
       * The following must be none or a valid absolute path:
       * - load_sensor
       * - set_token_cmd
       * - pag_cmd
       * - shepherd_cmd
       *
       * The following must be a valid absolute path:
       * - mailer
       * - xterm
       * - *_daemon, may also be "builtin"
       */
      else if (strcmp(name, "set_token_cmd") == 0 ||
          strcmp(name, "pag_cmd") == 0 ||
          strcmp(name, "shepherd_cmd") == 0) {
         if (strcasecmp(value, "none") != 0) {
            if (!path_verify(value, alpp, name, true)) {
               answer_list_log(alpp, false, false);
               DRETURN(STATUS_EEXIST);
            }
         }
      } else if (strcmp(name, "mailer") == 0 ||
          strcmp(name, "xterm") == 0) {
         if (!path_verify(value, alpp, name, true)) {
            answer_list_log(alpp, false, false);
            DRETURN(STATUS_EEXIST);
         }
      } else if (strcmp(name, "qlogin_daemon") == 0 ||
          strcmp(name, "rlogin_daemon") == 0 ||
          strcmp(name, "rsh_daemon") == 0) {
         if (strcasecmp(value, "builtin") != 0) {
            if (!path_verify(value, alpp, name, true)) {
               answer_list_log(alpp, false, false);
               DRETURN(STATUS_EEXIST);
            }
         }
      }

      /* load_sensor is a comma separated list of scripts */
      else if (strcmp(name, "load_sensor") == 0 && strcasecmp(value, "none") != 0) {
         struct saved_vars_s *context = NULL;
         const char *path = sge_strtok_r(value, ",", &context);
         do {
            if (!path_verify(path, alpp, name, true)) {
               answer_list_log(alpp, false, false);
               sge_free_saved_vars(context);
               DRETURN(STATUS_EEXIST);
            }
         } while ((path = sge_strtok_r(NULL, ",", &context)) != NULL);
         sge_free_saved_vars(context);
      }
   }
Example #13
0
/****** sge_manop_qmaster/sge_del_manop() **************************************
*  NAME
*     sge_del_manop() -- delete manager or operator
*
*  SYNOPSIS
*     int 
*     sge_del_manop(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, 
*                   char *ruser, char *rhost, u_long32 target) 
*
*  FUNCTION
*     Deletes a manager or an operator from the corresponding master list.
*
*  INPUTS
*     sge_gdi_ctx_class_t *ctx - gdi context
*     lListElem *ep            - the manager/operator to delete
*     lList **alpp             - answer list to return messages
*     char *ruser              - user having triggered the action
*     char *rhost              - host from which the action has been triggered
*     u_long32 target          - SGE_UM_LIST or SGE_UO_LIST
*
*  RESULT
*     int - STATUS_OK or STATUS_* error code
*
*  NOTES
*     MT-NOTE: sge_del_manop() is MT safe - if we hold the global lock.
*******************************************************************************/
int sge_del_manop(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, char *ruser, char *rhost, u_long32 target)
{
   lListElem *found;
   int pos;
   const char *manop_name;
   const char *object_name;
   lList **lpp = NULL;
   int key = NoName;
   ev_event eve = sgeE_EVENTSIZE;

   DENTER(TOP_LAYER, "sge_del_manop");

   if (ep == NULL || ruser == NULL || rhost == NULL) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }

   switch (target) {
      case SGE_UM_LIST:
         lpp = object_type_get_master_list(SGE_TYPE_MANAGER);
         object_name = MSG_OBJ_MANAGER;
         key = UM_name;
         eve = sgeE_MANAGER_DEL;
         break;
      case SGE_UO_LIST:
         lpp = object_type_get_master_list(SGE_TYPE_OPERATOR);
         object_name = MSG_OBJ_OPERATOR;
         key = UO_name;
         eve = sgeE_OPERATOR_DEL;
         break;
      default :
         DPRINTF(("unknown target passed to %s\n", SGE_FUNC));
         DRETURN(STATUS_EUNKNOWN);
   }

   /* ep is no manop element, if ep has no UM_name/UO_name */
   if ((pos = lGetPosViaElem(ep, key, SGE_NO_ABORT)) < 0) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS, lNm2Str(key), SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }

   manop_name = lGetPosString(ep, pos);
   if (manop_name == NULL) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }

   /* prevent removing of root from man/op-list */
   if (strcmp(manop_name, "root") == 0) {
      ERROR((SGE_EVENT, MSG_SGETEXT_MAY_NOT_REMOVE_USER_FROM_LIST_SS, "root", object_name));  
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EEXIST);
   }

   /* prevent removing the admin user from man/op-list */
   if (strcmp(manop_name, ctx->get_admin_user(ctx)) == 0) {
      ERROR((SGE_EVENT, MSG_SGETEXT_MAY_NOT_REMOVE_USER_FROM_LIST_SS,
             ctx->get_admin_user(ctx), object_name));  
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EEXIST);
   }

   found = lGetElemStr(*lpp, key, manop_name);
   if (!found) {
      ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, object_name, manop_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EEXIST);
   }
   
   lDechainElem(*lpp, found);

   /* update on file */
   if (!sge_event_spool(ctx, alpp, 0, eve,
                        0, 0, manop_name, NULL, NULL,
                        NULL, NULL, NULL, true, true)) {
      ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, object_name, manop_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR);
   
      /* chain in again */
      lAppendElem(*lpp, found);

      DRETURN(STATUS_EDISK);
   }
   lFreeElem(&found);

   INFO((SGE_EVENT, MSG_SGETEXT_REMOVEDFROMLIST_SSSS, ruser, rhost, manop_name, object_name));
   answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
   DRETURN(STATUS_OK);
}
Example #14
0
/****** spool/utilities/spool_default_validate_func() ****************
*  NAME
*     spool_default_validate_func() -- validate objects
*
*  SYNOPSIS
*     bool
*     spool_default_validate_func(lList **answer_list, 
*                               const lListElem *type, 
*                               const lListElem *rule, 
*                               const lListElem *object, 
*                               const char *key, 
*                               const sge_object_type object_type) 
*
*  FUNCTION
*     Verifies an object.
*
*  INPUTS
*     lList **answer_list - to return error messages
*     const lListElem *type           - object type description
*     const lListElem *rule           - rule to use
*     const lListElem *object         - object to validate
*     const sge_object_type object_type - object type
*
*  RESULT
*     bool - true on success, else false
*
*  NOTES
*     This function should not be called directly, it is called by the
*     spooling framework.
*
*  SEE ALSO
*******************************************************************************/
bool spool_default_validate_func(lList **answer_list, 
                          const lListElem *type, 
                          const lListElem *rule,
                          lListElem *object,
                          const sge_object_type object_type)
{
   bool ret = true;

   DENTER(TOP_LAYER, "spool_default_validate_func");

   switch(object_type) {
      case SGE_TYPE_ADMINHOST:
      case SGE_TYPE_EXECHOST:
      case SGE_TYPE_SUBMITHOST:
         {
            int cl_ret;
            int key_nm = object_type_get_key_nm(object_type);
            char *old_name = strdup(lGetHost(object, key_nm));

            /* try hostname resolving */
            if (strcmp(old_name, SGE_GLOBAL_NAME) != 0) {
               cl_ret = sge_resolve_host(object, key_nm);

               /* if hostname resolving failed: create error */
               if (cl_ret != CL_RETVAL_OK) {
                  if (cl_ret != CL_RETVAL_GETHOSTNAME_ERROR) {
                     answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, 
                                             ANSWER_QUALITY_ERROR, 
                                             MSG_SPOOL_CANTRESOLVEHOSTNAME_SS, 
                                             old_name, cl_get_error_text(ret)); 
                     ret = false;
                  } else {
                     answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, 
                                             ANSWER_QUALITY_WARNING, 
                                             MSG_SPOOL_CANTRESOLVEHOSTNAME_SS, 
                                             old_name, cl_get_error_text(ret));
                  }
               } else {
                  /* if hostname resolving changed hostname: spool */
                  const char *new_name;
                  new_name = lGetHost(object, key_nm);
                  if (strcmp(old_name, new_name) != 0) {
                     spooling_write_func write_func = 
                             (spooling_write_func)lGetRef(rule, SPR_write_func);
                     spooling_delete_func delete_func = 
                             (spooling_delete_func)lGetRef(rule, SPR_delete_func);
                     write_func(answer_list, type, rule, object, new_name, 
                                object_type);
                     delete_func(answer_list, type, rule, old_name, object_type);
                  }
               }
            }

            sge_free(&old_name);

            if (object_type == SGE_TYPE_EXECHOST && ret) {
               lListElem *load_value;
               lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY);

               /* all spooled load values are static, therefore we tag them here */
               for_each(load_value, lGetList(object, EH_load_list)) {
                  lSetBool(load_value, HL_static, true);
               }

               /* necessary to init double values of consumable configuration */
               centry_list_fill_request(lGetList(object, EH_consumable_config_list), 
                     NULL, master_centry_list, true, false, true);
               /* necessary to setup actual list of exechost */
               debit_host_consumable(NULL, object, master_centry_list, 0, true, NULL);

               if (ensure_attrib_available(NULL, object, 
                                           EH_consumable_config_list)) {
                  ret = false;
               }
            }
         }
static lListElem * 
qinstance_create(sge_gdi_ctx_class_t *ctx,
                 const lListElem *cqueue, lList **answer_list,
                 const char *hostname, bool *is_ambiguous, monitoring_t *monitor) 
{
   dstring buffer = DSTRING_INIT;
   const char *cqueue_name = lGetString(cqueue, CQ_name);
   lList *centry_list = *(object_type_get_master_list(SGE_TYPE_CENTRY));
   lListElem *ret = NULL;
   int index;

   DENTER(TOP_LAYER, "qinstance_create");
   
   ret = lCreateElem(QU_Type);

   /*
    * Pre-initialize some fields: hostname, full_name
    */
   lSetHost(ret, QU_qhostname, hostname);
   lSetString(ret, QU_qname, cqueue_name);
   sge_dstring_sprintf(&buffer, "%s@%s", cqueue_name, hostname);
   lSetString(ret, QU_full_name, sge_dstring_get_string(&buffer));
   sge_dstring_free(&buffer);

   /*
    * Initialize configuration attributes from CQ
    */
   *is_ambiguous = false;
   index = 0;
   while (cqueue_attribute_array[index].cqueue_attr != NoName) {
      bool tmp_is_ambiguous = false;
      bool tmp_has_changed_conf_attr = false;
      bool tmp_has_changed_state_attr = false;
      const char *matching_host_or_group = NULL;
      const char *matching_group = NULL;

      qinstance_modify_attribute(ctx,
                       ret, answer_list, cqueue, 
                       cqueue_attribute_array[index].qinstance_attr,
                       cqueue_attribute_array[index].cqueue_attr, 
                       cqueue_attribute_array[index].href_attr,
                       cqueue_attribute_array[index].value_attr,
                       cqueue_attribute_array[index].primary_key_attr,
                       &matching_host_or_group,
                       &matching_group,
                       &tmp_is_ambiguous, 
                       &tmp_has_changed_conf_attr,
                       &tmp_has_changed_state_attr,
                       true, NULL, monitor);

      *is_ambiguous |= tmp_is_ambiguous;

      index++;
   }

   qinstance_set_conf_slots_used(ret);
   qinstance_debit_consumable(ret, NULL, centry_list, 0, true);

   /*
    * Change qinstance state
    */
   sge_qmaster_qinstance_state_set_ambiguous(ret, *is_ambiguous);
   if (*is_ambiguous) {
      DPRINTF(("Qinstance "SFN"@"SFN" has ambiguous configuration\n",
               cqueue_name, hostname));
   } else {
      DPRINTF(("Qinstance "SFN"@"SFN" has non-ambiguous configuration\n",
               cqueue_name, hostname));
   }

   /*
    * For new qinstances we have to set some internal fields which
    * will be spooled later on:
    *    - state (modification according to initial state)
    *    - qversion
    */
   sge_qmaster_qinstance_state_set_unknown(ret, true);
   qinstance_check_unknown_state(ret, *object_type_get_master_list(SGE_TYPE_EXECHOST));
   sge_qmaster_qinstance_set_initial_state(ret);
   qinstance_initialize_sos_attr(ctx, ret, monitor);

   qinstance_increase_qversion(ret);

   DRETURN(ret);
}
Example #16
0
/* ------------------------------------------------------------

   sge_add_manop() - adds an manop list to the global manager/operator
                     list

   if the invoking process is the qmaster 
   the added manop list is spooled in the MANAGER_FILE/OPERATOR_FILE

*/
int sge_add_manop(
sge_gdi_ctx_class_t *ctx,
lListElem *ep,
lList **alpp,
char *ruser,
char *rhost,
u_long32 target  /* may be SGE_UM_LIST or SGE_UO_LIST */
) {
   const char *manop_name;
   const char *object_name;
   lList **lpp = NULL;
   lListElem *added;
   int pos;
   int key;
   lDescr *descr = NULL;
   ev_event eve = sgeE_EVENTSIZE; 

   DENTER(TOP_LAYER, "sge_add_manop");

   if ( !ep || !ruser || !rhost ) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   switch (target) {
   case SGE_UM_LIST:
      lpp = object_type_get_master_list(SGE_TYPE_MANAGER);
      object_name = MSG_OBJ_MANAGER;
      key = UM_name;
      descr = UM_Type;
      eve = sgeE_MANAGER_ADD;
      break;
   case SGE_UO_LIST:
      lpp = object_type_get_master_list(SGE_TYPE_OPERATOR);
      object_name = MSG_OBJ_OPERATOR;
      key = UO_name;
      descr = UO_Type;
      eve = sgeE_OPERATOR_ADD;
      break;
   default :
      DPRINTF(("unknown target passed to %s\n", SGE_FUNC));
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   /* ep is no acl element, if ep has no UM_name/UO_name */
   if ((pos = lGetPosViaElem(ep, key, SGE_NO_ABORT)) < 0) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS,
            lNm2Str(key), SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   manop_name = lGetPosString(ep, pos);
   if (!manop_name) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   if (lGetElemStr(*lpp, key, manop_name)) {
      ERROR((SGE_EVENT, MSG_SGETEXT_ALREADYEXISTS_SS, object_name, manop_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EEXIST;
   }

   /* update in interal lists */
   added = lAddElemStr(lpp, key, manop_name, descr);

   /* update on file */
   if(!sge_event_spool(ctx, alpp, 0, eve, 
                       0, 0, manop_name, NULL, NULL,
                       added, NULL, NULL, true, true)) {
      ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, object_name, manop_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR);
   
      /* remove element from list */
      lRemoveElem(*lpp, &added);

      DEXIT;
      return STATUS_EDISK;
   }

   INFO((SGE_EVENT, MSG_SGETEXT_ADDEDTOLIST_SSSS,
            ruser, rhost, manop_name, object_name));
   answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
   DEXIT;
   return STATUS_OK;
}
/****** sge_c_report() *******************************************************
*  NAME
*     sge_c_report() -- process execd load report
*
*  SYNOPSIS
*     void sge_c_report(char *rhost, char *commproc, int id, lList *report_list)
*
*  FUNCTION
*
*  INPUTS
*     char *rhost
*     char *commproc
*     int id
*     lList *report_list
*
*  RESULT
*     void - nothing
*
*  NOTES
*     MT-NOTE: sge_c_report() is MT safe
*
******************************************************************************/
void sge_c_report(sge_gdi_ctx_class_t *ctx, char *rhost, char *commproc, int id, lList *report_list, monitoring_t *monitor)
{
   lListElem *hep = NULL;
   u_long32 rep_type;
   lListElem *report;
   int ret = 0;
   u_long32 this_seqno, last_seqno;
   u_long32 rversion;
   sge_pack_buffer pb;   
   bool is_pb_used = false;
   bool send_tag_new_conf = false;

   DENTER(TOP_LAYER, "sge_c_report");

   if (lGetNumberOfElem(report_list) == 0) {
      DPRINTF(("received empty report\n"));
      if (rhost != NULL) {
         WARNING((SGE_EVENT, MSG_QMASTER_RECEIVED_EMPTY_LOAD_REPORT_S, rhost));
      } else {
         WARNING((SGE_EVENT, MSG_QMASTER_RECEIVED_EMPTY_LOAD_REPORT_S, "unknown"));
      }
      DRETURN_VOID;
   }

   /* accept reports only from execd's */
   if (strcmp(prognames[EXECD], commproc)) {
      ERROR((SGE_EVENT, MSG_GOTSTATUSREPORTOFUNKNOWNCOMMPROC_S, commproc));
      DRETURN_VOID;
   }

   /* do not process load reports from old execution daemons */
   rversion = lGetUlong(lFirst(report_list), REP_version);
   if (verify_request_version(NULL, rversion, rhost, commproc, id)) {
      DRETURN_VOID;
   }
   
   this_seqno = lGetUlong(lFirst(report_list), REP_seqno);

   /* need exec host for all types of reports */
   if (!(hep = host_list_locate(*object_type_get_master_list(SGE_TYPE_EXECHOST), rhost))) {
      ERROR((SGE_EVENT, MSG_GOTSTATUSREPORTOFUNKNOWNEXECHOST_S, rhost));
      DRETURN_VOID;
   }

   /* prevent old reports being proceeded 
      frequent loggings of outdated reports can be an indication 
      of too high message traffic arriving at qmaster */ 
   last_seqno = lGetUlong(hep, EH_report_seqno);

   if ((this_seqno < last_seqno && (last_seqno - this_seqno) <= 9000) &&
      !(last_seqno > 9990 && this_seqno < 10)) {
      /* this must be an old report, log and then ignore it */
      INFO((SGE_EVENT, MSG_QMASTER_RECEIVED_OLD_LOAD_REPORT_UUS, 
               sge_u32c(this_seqno), sge_u32c(last_seqno), rhost));
      DRETURN_VOID;
   }
   
   lSetUlong(hep, EH_report_seqno, this_seqno);

   /* RU: */
   /* tag all reschedule_unknown list entries we hope to 
      hear about in that job report */
   update_reschedule_unknown_list(ctx, hep);

   /*
   ** process the reports one after the other
   ** usually there will be a load report
   ** and a configuration version report
   */
   for_each(report, report_list) {
      rep_type = lGetUlong(report, REP_type);

      switch (rep_type) {
      case NUM_REP_REPORT_LOAD:
      case NUM_REP_FULL_REPORT_LOAD:
         MONITOR_ELOAD(monitor); 
         /* Now handle execds load reports */
         if (lGetUlong(hep, EH_lt_heard_from) == 0 && rep_type != NUM_REP_FULL_REPORT_LOAD) {
            host_notify_about_full_load_report(ctx, hep);
         } else {
            if (!is_pb_used) {
               is_pb_used = true;
               init_packbuffer(&pb, 1024, 0);
            }
            sge_update_load_values(ctx, rhost, lGetList(report, REP_list));

            if (mconf_get_simulate_execds()) {
               lList *master_exechost_list = *object_type_get_master_list(SGE_TYPE_EXECHOST);
               lListElem *shep;
               lListElem *simhostElem=NULL; 

               for_each(shep, master_exechost_list) {
                  simhostElem = lGetSubStr(shep, CE_name, "load_report_host", EH_consumable_config_list);
                  if (simhostElem != NULL) {
                     const char *real_host = lGetString(simhostElem, CE_stringval);
                     if (real_host != NULL && sge_hostcmp(real_host, rhost) == 0) {
                        const char* sim_host = lGetHost(shep, EH_name);
                        lListElem *clp = NULL;

                        DPRINTF(("Copy load values of %s to simulated host %s\n",
                                rhost, sim_host));

                        for_each(clp, lGetList(report, REP_list)) {
                           if (strcmp(lGetHost(clp, LR_host), SGE_GLOBAL_NAME) != 0) {
                              lSetHost(clp, LR_host, sim_host);
                           }
                        }
                        sge_update_load_values(ctx, sim_host, lGetList(report, REP_list));
                     }
                  }
               }
            }

            pack_ack(&pb, ACK_LOAD_REPORT, this_seqno, 0, NULL);
         }
         break;
      case NUM_REP_REPORT_CONF: 
         MONITOR_ECONF(monitor); 
         if (sge_compare_configuration(hep, lGetList(report, REP_list)) != 0) {
            DPRINTF(("%s: configuration on host %s is not up to date\n", SGE_FUNC, rhost));
            send_tag_new_conf = true;
         }
         break;
         
      case NUM_REP_REPORT_PROCESSORS:
         /*
         ** save number of processors
         */
         MONITOR_EPROC(monitor);
         ret = update_license_data(ctx, hep, lGetList(report, REP_list)); 
         if (ret) {
            ERROR((SGE_EVENT, MSG_LICENCE_ERRORXUPDATINGLICENSEDATA_I, ret));
         }
         break;

      case NUM_REP_REPORT_JOB:
         MONITOR_EJOB(monitor);
         if (!is_pb_used) {
            is_pb_used = true;
            init_packbuffer(&pb, 1024, 0);
         }
         process_job_report(ctx, report, hep, rhost, commproc, &pb, monitor);
         break;

      default:   
         DPRINTF(("received invalid report type %ld\n", (long) rep_type));
      }
Example #18
0
/****** qmaster/ckpt/sge_del_ckpt() *******************************************
*
*  NAME
*     sge_del_ckpt -- delete ckpt object in Master_Ckpt_List 
*
*  SYNOPSIS
*     int sge_del_ckpt(lListElem *ep, lList **alpp, char *ruser, char *rhost);
* 
*  FUNCTION
*     This function will be called from the framework which will
*     add/modify/delete generic gdi objects.
*     The purpose of this function is it to delete ckpt objects. 
*
*
*  INPUTS
*     ep          - element which should be deleted
*     alpp        - reference to an answer list.
*     ruser       - username of person who invoked this gdi request
*     rhost       - hostname of the host where someone initiated an gdi call
*
*  RESULT
*     [alpp] - error messages will be added to this list
*     0 - success
*     STATUS_EUNKNOWN - an error occured
******************************************************************************/ 
int sge_del_ckpt(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, char *ruser, char *rhost) 
{
   lListElem *found;
   int pos;
   const char *ckpt_name;
   lList **lpp = object_type_get_master_list(SGE_TYPE_CKPT);

   DENTER(TOP_LAYER, "sge_del_ckpt");

   if ( !ep || !ruser || !rhost ) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   /* ep is no ckpt element, if ep has no CK_name */
   if ((pos = lGetPosViaElem(ep, CK_name, SGE_NO_ABORT)) < 0) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS,
            lNm2Str(CK_name), SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   ckpt_name = lGetPosString(ep, pos);
   if (!ckpt_name) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }                    
   found = ckpt_list_locate(*lpp, ckpt_name);

   if (!found) {
      ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, MSG_OBJ_CKPT, ckpt_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EEXIST;
   }

   /* 
    * Try to find references in other objects
    */
   {
      lList *local_answer_list = NULL;

      if (ckpt_is_referenced(found, &local_answer_list, *(object_type_get_master_list(SGE_TYPE_JOB)),
                             *(object_type_get_master_list(SGE_TYPE_CQUEUE)))) {
         lListElem *answer = lFirst(local_answer_list);

         ERROR((SGE_EVENT, "denied: %s", lGetString(answer, AN_text)));
         answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN,
                         ANSWER_QUALITY_ERROR);
         lFreeList(&local_answer_list);
         DEXIT;
         return STATUS_EUNKNOWN;
      }
   }

   /* remove ckpt file 1st */
   if (!sge_event_spool(ctx, alpp, 0, sgeE_CKPT_DEL, 0, 0, ckpt_name, NULL, NULL,
                        NULL, NULL, NULL, true, true)) {
      ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, MSG_OBJ_CKPT, ckpt_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EDISK;
   }

   /* now we can remove the element */
   lRemoveElem(*lpp, &found);

   INFO((SGE_EVENT, MSG_SGETEXT_REMOVEDFROMLIST_SSSS,
            ruser, rhost, ckpt_name, MSG_OBJ_CKPT));
   answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
   DEXIT;
   return STATUS_OK;
}                     
/****** qmaster/threads/sge_scheduler_main() **********************************
*  NAME
*     sge_scheduler_main() -- main function of the scheduler thread 
*
*  SYNOPSIS
*     void * sge_scheduler_main(void *arg) 
*
*  FUNCTION
*     Main function of the scheduler thread, 
*
*  INPUTS
*     void *arg - pointer to the thread function (type cl_thread_settings_t*) 
*
*  RESULT
*     void * - always NULL 
*
*  NOTES
*     MT-NOTE: sge_scheduler_main() is MT safe 
*
*     MT-NOTE: this is a thread function. Do NOT use this function
*     MT-NOTE: in any other way!
*
*  SEE ALSO
*     qmaster/threads/sge_scheduler_initialize() 
*     qmaster/threads/sge_scheduler_cleanup_thread() 
*     qmaster/threads/sge_scheduler_terminate() 
*     qmaster/threads/sge_scheduler_main() 
*******************************************************************************/
void *
sge_scheduler_main(void *arg)
{
   time_t next_prof_output = 0;
   monitoring_t monitor;
   sge_gdi_ctx_class_t *ctx = NULL;
   sge_evc_class_t *evc = NULL;
   lList *alp = NULL;
   sge_where_what_t where_what;
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg;
   bool do_shutdown = false;
   bool do_endlessly = true;
   bool local_ret = true;

   DENTER(TOP_LAYER, "sge_scheduler_main");

   memset(&where_what, 0, sizeof(where_what));

   /*
    * startup
    */
   if (local_ret) {
      /* initialize commlib thread */
      cl_thread_func_startup(thread_config);

      /* initialize monitoring */
      sge_monitor_init(&monitor, thread_config->thread_name, SCH_EXT, SCT_WARNING, SCT_ERROR);
      sge_qmaster_thread_init(&ctx, SCHEDD, SCHEDD_THREAD, true);

      /* register at profiling module */
      set_thread_name(pthread_self(), "Scheduler Thread");
      conf_update_thread_profiling("Scheduler Thread");
      DPRINTF((SFN" started\n", thread_config->thread_name));

      /* initialize schedd_runnlog logging */
      schedd_set_schedd_log_file(ctx);
   }

   /* set profiling parameters */
   prof_set_level_name(SGE_PROF_EVENTMASTER, NULL, NULL);
   prof_set_level_name(SGE_PROF_SPOOLING, NULL, NULL);
   prof_set_level_name(SGE_PROF_CUSTOM0, "scheduler", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM1, "pending ticket calculation", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM3, "job sorting", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM4, "job dispatching", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM5, "send orders", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM6, "scheduler event loop", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM7, "copy lists", NULL);
   prof_set_level_name(SGE_PROF_SCHEDLIB4, NULL, NULL);

   /* set-up needed for 'schedule' file */
   serf_init(schedd_serf_record_func, schedd_serf_newline);
   schedd_set_serf_log_file(ctx);

   /*
    * prepare event client/mirror mechanism
    */
   if (local_ret) {
      local_ret = sge_gdi2_evc_setup(&evc, ctx, EV_ID_SCHEDD, &alp, "scheduler");
      DPRINTF(("prepared event client/mirror mechanism\n"));
   }

   /*
    * register as event mirror
    */
   if (local_ret) {
      sge_mirror_initialize(evc, EV_ID_SCHEDD, "scheduler",
                            false, &event_update_func, &sge_mod_event_client,
                            &sge_add_event_client, &sge_remove_event_client,
                            &sge_handle_event_ack);
      evc->ec_register(evc, false, NULL, &monitor);
      evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_RELEASED);
      DPRINTF(("registered at event mirror\n"));
   }

   /*
    * subscribe necessary data
    */
   if (local_ret) {
      ensure_valid_what_and_where(&where_what);
      subscribe_scheduler(evc, &where_what);
      DPRINTF(("subscribed necessary data from event master\n"));
   }

   /* 
    * schedulers main loop
    */
   if (local_ret) {
      while (do_endlessly) {
         bool handled_events = false;
         lList *event_list = NULL;
         int execute = 0;
         double prof_copy = 0.0;
         double prof_total = 0.0;
         double prof_init = 0.0;
         double prof_free = 0.0;
         double prof_run = 0.0;
         lList *orders = NULL;

         if (sconf_get_profiling()) {
            prof_start(SGE_PROF_OTHER, NULL);
            prof_start(SGE_PROF_PACKING, NULL);
            prof_start(SGE_PROF_EVENTCLIENT, NULL);
            prof_start(SGE_PROF_MIRROR, NULL);
            prof_start(SGE_PROF_GDI, NULL);
            prof_start(SGE_PROF_HT_RESIZE, NULL);
            prof_start(SGE_PROF_CUSTOM0, NULL);
            prof_start(SGE_PROF_CUSTOM1, NULL);
            prof_start(SGE_PROF_CUSTOM3, NULL);
            prof_start(SGE_PROF_CUSTOM4, NULL);
            prof_start(SGE_PROF_CUSTOM5, NULL);
            prof_start(SGE_PROF_CUSTOM6, NULL);
            prof_start(SGE_PROF_CUSTOM7, NULL);
            prof_start(SGE_PROF_SCHEDLIB4, NULL);
         } else {
            prof_stop(SGE_PROF_OTHER, NULL);
            prof_stop(SGE_PROF_PACKING, NULL);
            prof_stop(SGE_PROF_EVENTCLIENT, NULL);
            prof_stop(SGE_PROF_MIRROR, NULL);
            prof_stop(SGE_PROF_GDI, NULL);
            prof_stop(SGE_PROF_HT_RESIZE, NULL);
            prof_stop(SGE_PROF_CUSTOM0, NULL);
            prof_stop(SGE_PROF_CUSTOM1, NULL);
            prof_stop(SGE_PROF_CUSTOM3, NULL);
            prof_stop(SGE_PROF_CUSTOM4, NULL);
            prof_stop(SGE_PROF_CUSTOM5, NULL);
            prof_stop(SGE_PROF_CUSTOM6, NULL);
            prof_stop(SGE_PROF_CUSTOM7, NULL);
            prof_stop(SGE_PROF_SCHEDLIB4, NULL);
         }

         /*
          * Wait for new events
          */
         MONITOR_IDLE_TIME(sge_scheduler_wait_for_event(evc, &event_list), (&monitor), mconf_get_monitor_time(), 
                           mconf_is_monitor_message());

         /* If we lost connection we have to register again */
         if (evc->ec_need_new_registration(evc)) {
            lFreeList(&event_list);
            if (evc->ec_register(evc, false, NULL, &monitor) == true) {
               DPRINTF(("re-registered at event master!\n"));
            }
         }

         if (event_list != NULL) {
            /* check for shutdown */
            do_shutdown = (lGetElemUlong(event_list, ET_type, sgeE_SHUTDOWN) != NULL) ? true : false;

            /* update mirror and free data */
            if (do_shutdown == false && sge_mirror_process_event_list(evc, event_list) == SGE_EM_OK) {
               handled_events = true;
               DPRINTF(("events handled\n"));
            } else {
               DPRINTF(("events contain shutdown event - ignoring events\n"));
            }
            lFreeList(&event_list);
         }
 
         /* if we actually got events, start the scheduling run and further event processing */
         if (handled_events == true) {
            lList *answer_list = NULL;
            scheduler_all_data_t copy;
            lList *master_cqueue_list = *(object_type_get_master_list(SGE_TYPE_CQUEUE));
            lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB);
            lList *master_userset_list = *object_type_get_master_list(SGE_TYPE_USERSET);
            lList *master_project_list = *object_type_get_master_list(SGE_TYPE_PROJECT);
            lList *master_exechost_list= *object_type_get_master_list(SGE_TYPE_EXECHOST);
            lList *master_rqs_list= *object_type_get_master_list(SGE_TYPE_RQS);
            lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY);
            lList *master_ckpt_list = *object_type_get_master_list(SGE_TYPE_CKPT);
            lList *master_user_list = *object_type_get_master_list(SGE_TYPE_USER);
            lList *master_ar_list = *object_type_get_master_list(SGE_TYPE_AR);
            lList *master_pe_list = *object_type_get_master_list(SGE_TYPE_PE);
            lList *master_hgrp_list = *object_type_get_master_list(SGE_TYPE_HGROUP);
            lList *master_sharetree_list = *object_type_get_master_list(SGE_TYPE_SHARETREE);

            /* delay scheduling for test purposes, see issue GE-3306 */
            if (SGE_TEST_DELAY_SCHEDULING > 0) {
               sleep(SGE_TEST_DELAY_SCHEDULING);
            }

            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM6);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            if (__CONDITION(INFOPRINT)) {
               dstring ds;
               char buffer[128];

               sge_dstring_init(&ds, buffer, sizeof(buffer));
               DPRINTF(("================[SCHEDULING-EPOCH %s]==================\n",
                        sge_at_time(0, &ds)));
               sge_dstring_free(&ds);
            }

            /*
             * If there were new events then
             * copy/filter data necessary for the scheduler run
             * and run the scheduler method
             */
            memset(&copy, 0, sizeof(copy));

            copy.dept_list = lSelect("", master_userset_list, where_what.where_dept, where_what.what_acldept);
            copy.acl_list = lSelect("", master_userset_list, where_what.where_acl, where_what.what_acldept);

            DPRINTF(("RAW CQ:%d, J:%d, H:%d, C:%d, A:%d, D:%d, P:%d, CKPT:%d,"
                     " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d\n",
               lGetNumberOfElem(master_cqueue_list),
               lGetNumberOfElem(master_job_list),
               lGetNumberOfElem(master_exechost_list),
               lGetNumberOfElem(master_centry_list),
               lGetNumberOfElem(copy.acl_list),
               lGetNumberOfElem(copy.dept_list),
               lGetNumberOfElem(master_project_list),
               lGetNumberOfElem(master_ckpt_list),
               lGetNumberOfElem(master_user_list),
               lGetNumberOfElem(master_project_list),
               lGetNumberOfElem(master_rqs_list),
               lGetNumberOfElem(master_ar_list),
               lGetNumberOfNodes(NULL, master_sharetree_list, STN_children),
               lGetNumberOfLeafs(NULL, master_sharetree_list, STN_children)
            ));

            sge_rebuild_job_category(master_job_list, master_userset_list,
                                        master_project_list, master_rqs_list);

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_init = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            sge_before_dispatch(evc);

            /* prepare data for the scheduler itself */
            copy.host_list = lCopyList("", master_exechost_list);

            /*
             * Within the scheduler we do only need QIs
             */
            {
               lListElem *cqueue = NULL;
               lEnumeration *what_queue3 = NULL;

               for_each(cqueue, master_cqueue_list) {
                  lList *qinstance_list = lGetList(cqueue, CQ_qinstances);
                  lList *t;

                  if (!qinstance_list) {
                     continue;
                  }

                  /* all_queue_list contains all queue instances with state and full queue name only */
                  if (!what_queue3) {
                     what_queue3 = lWhat("%T(%I%I)", lGetListDescr(qinstance_list), QU_full_name, QU_state);
                  }
                  t = lSelect("t", qinstance_list, NULL, what_queue3);
                  if (t) {
                     if (copy.all_queue_list == NULL) {
                        copy.all_queue_list = lCreateList("all", lGetListDescr(t));
                     }
                     lAppendList(copy.all_queue_list, t);
                     lFreeList (&t);
                  }

                  t = lSelect("t", qinstance_list, where_what.where_queue, where_what.what_queue2);
                  if (t) {
                     if (copy.queue_list == NULL) {
                        copy.queue_list = lCreateList("enabled", lGetListDescr(t));
                     }
                     lAppendList(copy.queue_list, t);
                     lFreeList (&t);
                  }

                  t = lSelect("t", qinstance_list, where_what.where_queue2, where_what.what_queue2);
                  if (t) {
                     if (copy.dis_queue_list == NULL) {
                        copy.dis_queue_list = lCreateList("disabled", lGetListDescr(t));
                     }
                     lAppendList(copy.dis_queue_list, t);
                     lFreeList (&t);
                  }
               }
               if (what_queue3) {
                  lFreeWhat(&what_queue3);
               }
            }

            if (sconf_is_job_category_filtering()) {
               copy.job_list = sge_category_job_copy(copy.queue_list, &orders, evc->monitor_next_run);
            } else {
               copy.job_list = lCopyList("", master_job_list);
            }

            /* no need to copy these lists, they are read only used */
            copy.centry_list = master_centry_list;
            copy.ckpt_list = master_ckpt_list;
            copy.hgrp_list = master_hgrp_list;

            /* these lists need to be copied because they are modified during scheduling run */
            copy.share_tree = lCopyList("", master_sharetree_list);
            copy.pe_list = lCopyList("", master_pe_list);
            copy.user_list = lCopyList("", master_user_list);
            copy.project_list = lCopyList("", master_project_list);
            copy.rqs_list = lCopyList("", master_rqs_list);
            copy.ar_list = lCopyList("", master_ar_list);

            /* report number of reduced and raw (in brackets) lists */
            DPRINTF(("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, P:%d, CKPT:%d,"
                     " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n",
               lGetNumberOfElem(copy.queue_list),
               lGetNumberOfElem(copy.all_queue_list),
               lGetNumberOfElem(copy.job_list),
               lGetNumberOfElem(master_job_list),
               lGetNumberOfElem(copy.host_list),
               lGetNumberOfElem(master_exechost_list),
               lGetNumberOfElem(copy.centry_list),
               lGetNumberOfElem(copy.acl_list),
               lGetNumberOfElem(copy.dept_list),
               lGetNumberOfElem(copy.pe_list),
               lGetNumberOfElem(copy.ckpt_list),
               lGetNumberOfElem(copy.user_list),
               lGetNumberOfElem(copy.project_list),
               lGetNumberOfElem(copy.rqs_list),
               lGetNumberOfElem(copy.ar_list),
               lGetNumberOfNodes(NULL, copy.share_tree, STN_children),
               lGetNumberOfLeafs(NULL, copy.share_tree, STN_children)
            ));

            if (getenv("SGE_ND")) {
               printf("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, "
                  "P:%d, CKPT:%d, US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n",
                  lGetNumberOfElem(copy.queue_list),
                  lGetNumberOfElem(copy.all_queue_list),
                  lGetNumberOfElem(copy.job_list),
                  lGetNumberOfElem(master_job_list),
                  lGetNumberOfElem(copy.host_list),
                  lGetNumberOfElem(master_exechost_list),
                  lGetNumberOfElem(copy.centry_list),
                  lGetNumberOfElem(copy.acl_list),
                  lGetNumberOfElem(copy.dept_list),
                  lGetNumberOfElem(copy.pe_list),
                  lGetNumberOfElem(copy.ckpt_list),
                  lGetNumberOfElem(copy.user_list),
                  lGetNumberOfElem(copy.project_list),
                  lGetNumberOfElem(copy.rqs_list),
                  lGetNumberOfElem(copy.ar_list),
                  lGetNumberOfNodes(NULL, copy.share_tree, STN_children),
                  lGetNumberOfLeafs(NULL, copy.share_tree, STN_children)
                 );
            } else {
               schedd_log("-------------START-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run);
            }

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_copy = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            scheduler_method(evc, &answer_list, &copy, &orders);
            answer_list_output(&answer_list);

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_run = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            /* .. which gets deleted after using */
            lFreeList(&(copy.host_list));
            lFreeList(&(copy.queue_list));
            lFreeList(&(copy.dis_queue_list));
            lFreeList(&(copy.all_queue_list));
            lFreeList(&(copy.job_list));
            lFreeList(&(copy.acl_list));
            lFreeList(&(copy.dept_list));
            lFreeList(&(copy.pe_list));
            lFreeList(&(copy.share_tree));
            lFreeList(&(copy.user_list));
            lFreeList(&(copy.project_list));
            lFreeList(&(copy.rqs_list));
            lFreeList(&(copy.ar_list));

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_free = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);

            /* 
             * need to sync with event master thread
             * if schedd configuration changed then settings in evm can be adjusted
             */
            if (sconf_is_new_config()) {
               /* set scheduler interval / event delivery interval */
               u_long32 interval = sconf_get_schedule_interval();
               if (evc->ec_get_edtime(evc) != interval) {
                  evc->ec_set_edtime(evc, interval);
               }

               /* set job / ja_task event flushing */
               set_job_flushing(evc);

               /* no need to ec_commit here - we do it when resetting the busy state */

               /* now we handled the new schedd config - no need to do it twice */
               sconf_reset_new_config();
            }

            /* block till master handled all GDI orders */
            sge_schedd_block_until_orders_processed(evc->get_gdi_ctx(evc), NULL);
            schedd_order_destroy();

            /*
             * Stop profiling for "schedd run total" and the subcategories
             */
            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM6);
            prof_total = prof_get_measurement_wallclock(SGE_PROF_CUSTOM6, true, NULL);

            if (prof_is_active(SGE_PROF_CUSTOM6)) {
               PROFILING((SGE_EVENT, "PROF: schedd run took: %.3f s (init: %.3f s, copy: %.3f s, "
                          "run:%.3f, free: %.3f s, jobs: %d, categories: %d/%d)",
                           prof_total, prof_init, prof_copy, prof_run, prof_free,
                           lGetNumberOfElem(*object_type_get_master_list(SGE_TYPE_JOB)), sge_category_count(),
                           sge_cs_category_count() ));
            }
            if (getenv("SGE_ND") != NULL) {
               printf("--------------STOP-SCHEDULER-RUN-------------\n");
            } else {
               schedd_log("--------------STOP-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run);
            }

            thread_output_profiling("scheduler thread profiling summary:\n", &next_prof_output);

            sge_monitor_output(&monitor);
         }

         /* reset the busy state */
         evc->ec_set_busy(evc, 0);
         evc->ec_commit(evc, NULL);

         /* stop logging into schedd_runlog (enabled via -tsm) */
         evc->monitor_next_run = false;

         /*
          * pthread cancelation point
          *
          * sge_scheduler_cleanup_thread() is the last function which should
          * be called so it is pushed first
          */
         pthread_cleanup_push(sge_scheduler_cleanup_thread, (void *) &ctx);
         pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_monitor,
                              (void *)&monitor);
         pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_event_client,
                              (void *)evc);
         cl_thread_func_testcancel(thread_config);
         pthread_cleanup_pop(execute);
         pthread_cleanup_pop(execute);
         pthread_cleanup_pop(execute);
         DPRINTF(("passed cancelation point\n"));
      }
Example #20
0
/*-------------------------------------------------------------------------*/
int main(int argc, char **argv)
{
   int ret;
   int my_pid;
   int ret_val;
   int printed_points = 0;
   int max_enroll_tries;
   static char tmp_err_file_name[SGE_PATH_MAX];
   time_t next_prof_output = 0;
   int execd_exit_state = 0;
   lList **master_job_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *alp = NULL;

   DENTER_MAIN(TOP_LAYER, "execd");

#if defined(LINUX)
   gen_procList ();
#endif

   prof_mt_init();

   set_thread_name(pthread_self(),"Execd Thread");

   prof_set_level_name(SGE_PROF_CUSTOM1, "Execd Thread", NULL); 
   prof_set_level_name(SGE_PROF_CUSTOM2, "Execd Dispatch", NULL); 

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   /* This needs a better solution */
   umask(022);
      
   /* Initialize path for temporary logging until we chdir to spool */
   my_pid = getpid();
   sprintf(tmp_err_file_name,"%s."sge_U32CFormat"", TMP_ERR_FILE_EXECD, sge_u32c(my_pid));
   log_state_set_log_file(tmp_err_file_name);

   /* exit func for SGE_EXIT() */
   sge_sig_handler_in_main_loop = 0;
   sge_setup_sig_handlers(EXECD);

   if (sge_setup2(&ctx, EXECD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }
   ctx->set_exit_func(ctx, execd_exit_func);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /* prepare daemonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_prepare(ctx);
   }

   if ((ret=sge_occupy_first_three())>=0) {
      CRITICAL((SGE_EVENT, MSG_FILE_REDIRECTFD_I, ret));
      SGE_EXIT((void**)&ctx, 1);
   }

   lInit(nmv);

   /* unset XAUTHORITY if set */
   if (getenv("XAUTHORITY") != NULL) {
      sge_unsetenv("XAUTHORITY");
   }

   parse_cmdline_execd(argv);   
   
   /* exit if we can't get communication handle (bind port) */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[EXECD],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;

      if (max_enroll_tries <= 0 || shut_me_down) {
         /* exit after 30 seconds */
         if (printed_points != 0) {
            printf("\n");
         }
         CRITICAL((SGE_EVENT, MSG_COM_ERROR));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[EXECD],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
        if (max_enroll_tries < 27) {
           printf(".");
           printed_points++;
           fflush(stdout);
        }
      }
   }

   if (printed_points != 0) {
      printf("\n");
   }

   /*
    * now the commlib up and running. Set execd application status function 
    * ( commlib callback function for qping status information response 
    *   messages (SIRM) )
    */
   ret_val = cl_com_set_status_func(sge_execd_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)) );
   }

   /* test connection */
   {
      cl_com_SIRM_t* status = NULL;
      ret_val = cl_commlib_get_endpoint_status(ctx->get_com_handle(ctx),
                                               (char *)ctx->get_master(ctx, true),
                                               (char*)prognames[QMASTER], 1, &status);
      if (ret_val != CL_RETVAL_OK) {
         ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
         ERROR((SGE_EVENT, MSG_CONF_NOCONFBG));
      }
      cl_com_free_sirm_message(&status);
   }
   
   /* finalize daeamonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_finalize(ctx);
   }

   /* daemonizes if qmaster is unreachable */   
   sge_setup_sge_execd(ctx, tmp_err_file_name);

   /* are we using qidle or not */
   sge_ls_qidle(mconf_get_use_qidle());
   sge_ls_gnu_ls(1);
   
   DPRINTF(("use_qidle: %d\n", mconf_get_use_qidle()));

   /* test load sensor (internal or external) */
   {
      lList *report_list = sge_build_load_report(ctx->get_qualified_hostname(ctx), ctx->get_binary_path(ctx));
      lFreeList(&report_list);
   }
   
   /* here we have to wait for qmaster registration */
   while (sge_execd_register_at_qmaster(ctx, false) != 0) {
      if (sge_get_com_error_flag(EXECD, SGE_COM_ACCESS_DENIED, true)) {
         /* This is no error */
         DPRINTF(("*****  got SGE_COM_ACCESS_DENIED from qmaster  *****\n"));
      }
      if (sge_get_com_error_flag(EXECD, SGE_COM_ENDPOINT_NOT_UNIQUE, false)) {
         execd_exit_state = SGE_COM_ENDPOINT_NOT_UNIQUE;
         break;
      }
      if (shut_me_down != 0) {
         break;
      }
      sleep(30);
   }

   /* 
    * Terminate on SIGTERM or hard communication error
    */
   if (execd_exit_state != 0 || shut_me_down != 0) {
      sge_shutdown((void**)&ctx, execd_exit_state);
      DRETURN(execd_exit_state);
   }

   /*
    * We write pid file when we are connected to qmaster. Otherwise an old
    * execd might overwrite our pidfile.
    */
   sge_write_pid(EXECD_PID_FILE);

   /*
    * At this point we are sure we are the only sge_execd and we are connected
    * to the current qmaster. First we have to report any reaped children
    * that might exist.
    */
   starting_up();

   /*
    * Log a warning message if execd hasn't been started by a superuser
    */
   if (!sge_is_start_user_superuser()) {
      WARNING((SGE_EVENT, MSG_SWITCH_USER_NOT_ROOT));
   }   

#ifdef COMPILE_DC
   if (ptf_init()) {
      CRITICAL((SGE_EVENT, MSG_EXECD_NOSTARTPTF));
      SGE_EXIT((void**)&ctx, 1);
   }
   INFO((SGE_EVENT, MSG_EXECD_STARTPDCANDPTF));
#endif

   master_job_list = object_type_get_master_list(SGE_TYPE_JOB);
   *master_job_list = lCreateList("Master_Job_List", JB_Type);
   job_list_read_from_disk(master_job_list, "Master_Job_List",
                           0, SPOOL_WITHIN_EXECD, 
                          job_initialize_job);
   

   /* clean up jobs hanging around (look in active_dir) */
   clean_up_old_jobs(ctx, 1);
   execd_trash_load_report();
   sge_set_flush_lr_flag(true);

   sge_sig_handler_in_main_loop = 1;

   if (thread_prof_active_by_id(pthread_self())) {
      prof_start(SGE_PROF_CUSTOM1, NULL);
      prof_start(SGE_PROF_CUSTOM2, NULL);
      prof_start(SGE_PROF_GDI_REQUEST, NULL);
   } else {
      prof_stop(SGE_PROF_CUSTOM1, NULL);
      prof_stop(SGE_PROF_CUSTOM2, NULL);
      prof_stop(SGE_PROF_GDI_REQUEST, NULL);
   }

   PROF_START_MEASUREMENT(SGE_PROF_CUSTOM1);

   /* Start dispatching */
   execd_exit_state = sge_execd_process_messages(ctx);


   /*
    * This code is only reached when dispatcher terminates and execd goes down.
    */

   /* log if we received SIGPIPE signal */
   if (sge_sig_handler_sigpipe_received) {
       sge_sig_handler_sigpipe_received = 0;
       INFO((SGE_EVENT, "SIGPIPE received\n"));
   }

#if defined(LINUX)
   free_procList();
#endif
   lFreeList(master_job_list);

   PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM1);
   if (prof_is_active(SGE_PROF_ALL)) {
     time_t now = (time_t)sge_get_gmt();

      if (now > next_prof_output) {
         prof_output_info(SGE_PROF_ALL, false, "profiling summary:\n");
         prof_reset(SGE_PROF_ALL,NULL);
         next_prof_output = now + 60;
      }
   }
   sge_prof_cleanup();

   sge_shutdown((void**)&ctx, execd_exit_state);
   DRETURN(execd_exit_state);
}