예제 #1
0
/****** qmaster/setup_qmaster/sge_setup_job_resend() ***************************
*  NAME
*     sge_setup_job_resend() -- Setup job resend events.
*
*  SYNOPSIS
*     void sge_setup_job_resend(void)
*
*  FUNCTION
*     Register a job resend event for each job or array task which does have a
*     'JTRANSFERING' status.
*
*  INPUTS
*     void - none
*
*  RESULT
*     void - none
*
*  NOTES
*     MT-NOTE: sge_setup_job_resend() is not MT safe
*
*******************************************************************************/
void sge_setup_job_resend(void)
{
    lListElem *job = NULL;
    object_description *object_base = object_type_get_object_description();

    DENTER(TOP_LAYER, "sge_setup_job_resend");

    job = lFirst(*object_base[SGE_TYPE_JOB].list);

    while (NULL != job)
    {
        lListElem *task;
        u_long32 job_num;

        job_num = lGetUlong(job, JB_job_number);

        task = lFirst(lGetList(job, JB_ja_tasks));

        while (NULL != task)
        {
            if (lGetUlong(task, JAT_status) == JTRANSFERING)
            {
                lListElem *granted_queue, *qinstance, *host;
                const char *qname;
                u_long32 task_num, when;
                te_event_t ev;

                task_num = lGetUlong(task, JAT_task_number);

                granted_queue = lFirst(lGetList(task, JAT_granted_destin_identifier_list));

                qname = lGetString(granted_queue, JG_qname);

                qinstance = cqueue_list_locate_qinstance(*object_base[SGE_TYPE_CQUEUE].list, qname);

                host = host_list_locate(*object_base[SGE_TYPE_EXECHOST].list, lGetHost(qinstance, QU_qhostname));

                when = lGetUlong(task, JAT_start_time);

                when += MAX(load_report_interval(host), MAX_JOB_DELIVER_TIME);

                ev = te_new_event((time_t)when, TYPE_JOB_RESEND_EVENT, ONE_TIME_EVENT, job_num, task_num, "job-resend_event");
                te_add_event(ev);
                te_free_event(&ev);

                DPRINTF(("Did add job resend for "sge_u32"/"sge_u32" at %d\n", job_num, task_num, when));
            }

            task = lNext(task);
        }

        job = lNext(job);
    }

    DEXIT;
    return;
} /* sge_setup_job_resend() */
예제 #2
0
/****** qmaster/sge_qmaster_main/sge_start_heartbeat() *****************************
*  NAME
*     sge_start_heartbeat() -- Start qmaster heartbeat. 
*
*  SYNOPSIS
*     static void sge_start_heartbeat(void) 
*
*  FUNCTION
*     Add heartbeat event and register according event handler. 
*
*  INPUTS
*     void - none 
*
*  RESULT
*     void - none 
*
*  NOTES
*     MT-NOTE: sge_start_heartbeat() is MT safe 
*
*******************************************************************************/
void heartbeat_initialize(void)
{
   te_event_t ev     = NULL;

   DENTER(TOP_LAYER, "heartbeat_initialize");

   te_register_event_handler(increment_heartbeat, TYPE_HEARTBEAT_EVENT);
   ev = te_new_event(HEARTBEAT_INTERVAL, TYPE_HEARTBEAT_EVENT, RECURRING_EVENT, 
                     0, 0, "heartbeat-event");
   te_add_event(ev);
   te_free_event(&ev);

   /* this is for testsuite shadowd test */
   if (getenv("SGE_TEST_HEARTBEAT_TIMEOUT") != NULL) {
      int test_timeout = atoi(getenv("SGE_TEST_HEARTBEAT_TIMEOUT"));
      set_inc_qmaster_heartbeat_test_mode(test_timeout);
      DPRINTF(("heartbeat timeout test enabled (timeout="sge_U32CFormat")\n", sge_u32c(test_timeout)));
   }

   DEXIT;
   return;
} /* sge_start_heartbeat(void) */
/****** qmaster/sge_mod_configuration() ****************************************
*  NAME
*     sge_mod_configuration() -- modify cluster configuration
*
*  SYNOPSIS
*     int sge_mod_configuration(lListElem *aConf, lList **anAnswer, char *aUser,
*                               char *aHost)
*
*  FUNCTION
*     Modify cluster configuration. 'confp' is a pointer to a 'CONF_Type' list
*     element and does contain the modified configuration entry. Adding a new
*     configuration entry is also viewed as a modification.
*
*  INPUTS
*     lListElem *aConf  - CONF_Type element containing the modified conf
*     lList **anAnswer  - answer list
*     char *aUser       - target user
*     char *aHost       - target host
*
*  RESULT
*     int - 0 success
*          -1 error
*
*  NOTES
*     MT-NOTE: sge_mod_configuration() is MT safe 
*
*******************************************************************************/
int sge_mod_configuration(sge_gdi_ctx_class_t *ctx, lListElem *aConf, lList **anAnswer, char *aUser, char *aHost)
{
   lListElem *old_conf;
   const char *tmp_name = NULL;
   char unique_name[CL_MAXHOSTLEN];
   int ret = -1;
   const char *cell_root = ctx->get_cell_root(ctx);
   const char *qualified_hostname = ctx->get_qualified_hostname(ctx);
   u_long32 progid = ctx->get_who(ctx);

   DENTER(TOP_LAYER, "sge_mod_configuration");

   if (!aConf || !aUser || !aHost) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }

   if ((tmp_name = lGetHost(aConf, CONF_name)) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS, lNm2Str(CONF_name), SGE_FUNC));
      answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }

   if ((ret = sge_resolve_hostname(tmp_name, unique_name, EH_name, sizeof(unique_name)))
       != CL_RETVAL_OK) {
      DPRINTF(("%s: error %s resolving host %s\n", SGE_FUNC, cl_get_error_text(ret), tmp_name));
      ERROR((SGE_EVENT, MSG_SGETEXT_CANTRESOLVEHOST_S, tmp_name));
      answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }
   
   if ((ret = check_config(anAnswer, aConf))) {
      DRETURN(ret); 
   }

   if ((old_conf = sge_get_configuration_for_host(unique_name)) != NULL) {
      int ret = -1;
      
      ret = do_mod_config(ctx, unique_name, old_conf, aConf, anAnswer);
      
      lFreeElem(&old_conf);
      
      if (ret == 0) {    
         INFO((SGE_EVENT, MSG_SGETEXT_MODIFIEDINLIST_SSSS, aUser, aHost, unique_name, MSG_OBJ_CONF));
         answer_list_add(anAnswer, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
      } else {
         DRETURN(STATUS_EUNKNOWN);
      }
   } else {
      do_add_config(ctx, unique_name, aConf, anAnswer);
            
      INFO((SGE_EVENT, MSG_SGETEXT_ADDEDTOLIST_SSSS, aUser, aHost, unique_name, MSG_OBJ_CONF));            
      answer_list_add(anAnswer, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
   }
   
   if (strcmp(SGE_GLOBAL_NAME, unique_name) == 0) {
      sge_add_event(0, sgeE_GLOBAL_CONFIG, 0, 0, NULL, NULL, NULL, NULL);
   }

   /*
   ** is the configuration change relevant for the qmaster itsself?
   ** if so, initialise conf struct anew
   */
   if (strcmp(unique_name, SGE_GLOBAL_NAME) == 0 || sge_hostcmp(unique_name, qualified_hostname) == 0) {
      lListElem *local = NULL;
      lListElem *global = NULL;
      lList *answer_list = NULL;
      char* qmaster_params = NULL;
      int accounting_flush_time = mconf_get_accounting_flush_time();

      if ((local = sge_get_configuration_for_host(qualified_hostname)) == NULL) {
         WARNING((SGE_EVENT, MSG_CONF_NOLOCAL_S, qualified_hostname));
      }
      
      if ((global = sge_get_configuration_for_host(SGE_GLOBAL_NAME)) == NULL) {
         ERROR((SGE_EVENT, SFNMAX, MSG_CONF_NOGLOBAL));
      }
            
      if (merge_configuration(&answer_list, progid, cell_root, global, local, NULL) != 0) {
         ERROR((SGE_EVENT, MSG_CONF_CANTMERGECONFIGURATIONFORHOST_S, qualified_hostname));
      }
      answer_list_output(&answer_list);

      /* Restart the accounting flush event if needed. */
      if ((accounting_flush_time == 0) &&
          (mconf_get_accounting_flush_time() != 0)) {
         te_event_t ev = te_new_event(time(NULL), TYPE_ACCOUNTING_TRIGGER, ONE_TIME_EVENT, 1, 0, NULL);
         te_add_event(ev);
         te_free_event(&ev);
      }
      
      lFreeElem(&local);
      lFreeElem(&global);
      
      sge_show_conf();

      /* 'max_unheard' may have changed */
      cl_commlib_set_connection_param(cl_com_get_handle("qmaster", 1), HEARD_FROM_TIMEOUT, mconf_get_max_unheard());

      /* fetching qmaster_params and begin to parse */
      qmaster_params = mconf_get_qmaster_params();

      /* updating the commlib paramterlist and gdi_timeout with new or changed parameters */
      cl_com_update_parameter_list(qmaster_params);

      sge_free(&qmaster_params);
   }
    
   /* invalidate configuration cache */
   mconf_set_new_config(true);
   
   DRETURN(STATUS_OK);
}