static int do_qeti_test(lListElem *cr, u_long32 *qeti_expected_result)
{
   lList *cr_list;
   sge_qeti_t *iter;
   u_long32 pe_time;
   int ret = 0;
   int i = 0;

   /* sge_qeti_allocate() */
   cr_list = lCreateList("", RUE_Type);
   lAppendElem(cr_list, cr);
   iter = sge_qeti_allocate2(cr_list);

   /* sge_qeti_first() */
   for (pe_time = sge_qeti_first(iter), i=0; pe_time; pe_time = sge_qeti_next(iter), i++) {
      if (qeti_expected_result == NULL) {
         printf("failed: qeti returned "sge_U32CFormat", expected no iteration\n", sge_u32c(pe_time));
         ret++;
      } else if (qeti_expected_result[i] != pe_time) {
         printf("failed: qeti returned "sge_U32CFormat", expected "sge_U32CFormat"\n", sge_u32c(pe_time), sge_u32c(qeti_expected_result[i]));
         ret++;
      } else {
         printf("success: QETI returned "sge_U32CFormat"\n", sge_u32c(pe_time));
      }
   }

   lDechainElem(cr_list, cr);
   sge_qeti_release(&iter);
   lFreeList(&cr_list);

   return ret;
}
bool
sge_gdi_packet_debug_print(sge_gdi_packet_class_t * packet)
{
   bool ret = true;

   DENTER(TOP_LAYER, "sge_gdi_packet_debug_print");

   if (packet != NULL) {
      sge_gdi_task_class_t *task;

      DPRINTF(("packet->id = " sge_U32CFormat "\n", sge_u32c(packet->id)));
      DPRINTF(("packet->host = " SFQ "\n", packet->host ? packet->host : "<null>"));
      DPRINTF(("packet->commproc = " SFQ "\n", packet->commproc ? packet->commproc : "<null>"));
      DPRINTF(("packet->auth_info = " SFQ "\n", packet->auth_info ?  packet->auth_info : "<null>"));
      DPRINTF(("packet->version = " sge_U32CFormat "\n",
               sge_u32c(packet->version)));
      DPRINTF(("packet->first_task = %p\n", packet->first_task));
      DPRINTF(("packet->last_task = %p\n", packet->last_task));

      task = packet->first_task;
      while (task != NULL) {
         sge_gdi_task_debug_print(task);
         task = task->next;
      }
   } else {
      DPRINTF(("packet is NULL\n"));;
   }
   DRETURN(ret);
}
/* MT-NOTE: schedd_serf_record_func() is not MT safe */
static void
schedd_serf_record_func(u_long32 job_id, u_long32 ja_taskid, const char *state,
                        u_long32 start_time, u_long32 end_time,
                        char level_char, const char *object_name,
                        const char *name, double utilization)
{
   FILE *fp;

   DENTER(TOP_LAYER, "schedd_serf_record_func");

   if (!(fp = fopen(schedule_log_path, "a"))) {
      DRETURN_VOID;
   }

   /* a new record */
   fprintf(fp, sge_U32CFormat":"sge_U32CFormat":%s:"sge_U32CFormat":"
           sge_U32CFormat":%c:%s:%s:%f\n", sge_u32c(job_id),
           sge_u32c(ja_taskid), state, sge_u32c(start_time),
           sge_u32c(end_time - start_time),
           level_char, object_name, name, utilization);
   FCLOSE(fp);

   DRETURN_VOID;
FCLOSE_ERROR:
   DPRINTF((MSG_FILE_ERRORCLOSEINGXY_SS, schedule_log_path, strerror(errno)));
   DRETURN_VOID;
}
/****** sge_binding/binding_print_to_string() **********************************
*  NAME
*     binding_print_to_string() -- Prints the content of a binding list to a string
*
*  SYNOPSIS
*     bool binding_print_to_string(const lListElem *this_elem, dstring *string)
*
*  FUNCTION
*     Prints the binding type and binding strategy of a binding list element 
*     into a string.
*
*  INPUTS
*     const lListElem* this_elem - Binding list element
*
*  OUTPUTS
*     const dstring *string      - Output string which must be initialized.
*
*  RESULT
*     bool - true in all cases
*
*  NOTES
*     MT-NOTE: is_starting_point() is MT safe
*
*******************************************************************************/
bool
binding_print_to_string(const lListElem *this_elem, dstring *string) {
   bool ret = true;

   DENTER(BINDING_LAYER, "binding_print_to_string");
   if (this_elem != NULL && string != NULL) {
      const char *const strategy = lGetString(this_elem, BN_strategy);
      binding_type_t type = (binding_type_t)lGetUlong(this_elem, BN_type);

      switch (type) {
         case BINDING_TYPE_SET:
            sge_dstring_append(string, "set ");
            break;
         case BINDING_TYPE_PE:
            sge_dstring_append(string, "pe ");
            break;
         case BINDING_TYPE_ENV:
            sge_dstring_append(string, "env ");
            break;
         case BINDING_TYPE_NONE:
            sge_dstring_append(string, "NONE");
      }

      if (strcmp(strategy, "linear_automatic") == 0) {
         u_long32 n = sge_u32c(lGetUlong(this_elem, BN_parameter_n));
         if (BIND_INFINITY == n)
            sge_dstring_sprintf_append(string, "linear:slots");
         else
            sge_dstring_sprintf_append(string, "linear:"sge_U32CFormat, n);
      } else if (strcmp(strategy, "linear") == 0) {
        sge_dstring_sprintf_append(string, "%s:"sge_U32CFormat":"sge_U32CFormat","sge_U32CFormat,
               "linear", sge_u32c(lGetUlong(this_elem, BN_parameter_n)),
               sge_u32c(lGetUlong(this_elem, BN_parameter_socket_offset)),
               sge_u32c(lGetUlong(this_elem, BN_parameter_core_offset)));
      } else if (strcmp(strategy, "striding_automatic") == 0) {
         sge_dstring_sprintf_append(string, "%s:"sge_U32CFormat":"sge_U32CFormat,
            "striding", sge_u32c(lGetUlong(this_elem, BN_parameter_n)),
            sge_u32c(lGetUlong(this_elem, BN_parameter_striding_step_size)));
      } else if (strcmp(strategy, "striding") == 0) {
         sge_dstring_sprintf_append(string, "%s:"sge_U32CFormat":"sge_U32CFormat":"sge_U32CFormat","sge_U32CFormat,
            "striding", sge_u32c(lGetUlong(this_elem, BN_parameter_n)),
            sge_u32c(lGetUlong(this_elem, BN_parameter_striding_step_size)),
            sge_u32c(lGetUlong(this_elem, BN_parameter_socket_offset)),
            sge_u32c(lGetUlong(this_elem, BN_parameter_core_offset)));
      } else if (strcmp(strategy, "explicit") == 0) {
         sge_dstring_sprintf_append(string, "%s", lGetString(this_elem, BN_parameter_explicit));
      }
   }
   DRETURN(ret);
}
void 
sge_worker_initialize(sge_gdi_ctx_class_t *ctx)
{
   const u_long32 max_initial_worker_threads = ctx->get_worker_thread_count(ctx);
   cl_thread_settings_t* dummy_thread_p = NULL;
   int i;   

   DENTER(TOP_LAYER, "sge_worker_initialize");

   /*
    * TODO: EB: corresponding destroy function is missing during shutdown
    */
   sge_tq_create(&Master_Task_Queue);

   sge_init_job_number();
   sge_init_ar_id();
   DPRINTF(("job/ar counter have been initialized\n"));

   reporting_initialize(NULL);
   DPRINTF(("accounting and reporting module has been initialized\n"));

   INFO((SGE_EVENT, MSG_QMASTER_THREADCOUNT_US, 
         sge_u32c(max_initial_worker_threads), threadnames[WORKER_THREAD]));
   cl_thread_list_setup(&(Main_Control.worker_thread_pool), "thread pool");
   for (i = 0; i < max_initial_worker_threads; i++) {
      dstring thread_name = DSTRING_INIT;

      sge_dstring_sprintf(&thread_name, "%s%03d", threadnames[WORKER_THREAD], i);
      cl_thread_list_create_thread(Main_Control.worker_thread_pool, &dummy_thread_p,
                                   cl_com_get_log_list(), sge_dstring_get_string(&thread_name), i, 
                                   sge_worker_main, NULL, NULL, CL_TT_WORKER);
      sge_dstring_free(&thread_name);
   }
   DRETURN_VOID;
}
static bool
sge_pack_gdi_info(u_long32 command)
{
   bool ret = true;

   DENTER(GDI_LAYER, "sge_pack_gdi_info");
   switch (command) {
   case SGE_GDI_GET:
      DPRINTF(("packing SGE_GDI_GET request\n"));
      break;
   case SGE_GDI_ADD:
   case SGE_GDI_ADD | SGE_GDI_RETURN_NEW_VERSION:
   case SGE_GDI_ADD | SGE_GDI_SET_ALL:
   case SGE_GDI_ADD | SGE_GDI_EXECD_RESTART:
      DPRINTF(("packing SGE_GDI_ADD request\n"));
      break;
   case SGE_GDI_DEL:
   case SGE_GDI_DEL | SGE_GDI_ALL_JOBS:
   case SGE_GDI_DEL | SGE_GDI_ALL_USERS:
   case SGE_GDI_DEL | SGE_GDI_ALL_JOBS | SGE_GDI_ALL_USERS:
      DPRINTF(("packing SGE_GDI_DEL request\n"));
      break;
   case SGE_GDI_MOD:
   case SGE_GDI_MOD | SGE_GDI_ALL_JOBS:
   case SGE_GDI_MOD | SGE_GDI_ALL_USERS:
   case SGE_GDI_MOD | SGE_GDI_ALL_JOBS | SGE_GDI_ALL_USERS:
   case SGE_GDI_MOD | SGE_GDI_APPEND:
   case SGE_GDI_MOD | SGE_GDI_REMOVE:
   case SGE_GDI_MOD | SGE_GDI_CHANGE:
   case SGE_GDI_MOD | SGE_GDI_SET_ALL:
      DPRINTF(("packing SGE_GDI_MOD request\n"));
      break;
   case SGE_GDI_TRIGGER:
      DPRINTF(("packing SGE_GDI_TRIGGER request\n"));
      break;
   case SGE_GDI_PERMCHECK:
      DPRINTF(("packing SGE_GDI_PERMCHECK request\n"));
      break;
   case SGE_GDI_SPECIAL:
      DPRINTF(("packing special things\n"));
      break;
   case SGE_GDI_COPY:
      DPRINTF(("request denied\n"));
      break;
   case SGE_GDI_REPLACE:
   case SGE_GDI_REPLACE | SGE_GDI_SET_ALL:
      DPRINTF(("packing SGE_GDI_REPLACE request\n"));
      break;
   default:
      ERROR((SGE_EVENT, MSG_GDI_ERROR_INVALIDVALUEXFORARTOOP_D,
             sge_u32c(command)));
      ret = false;
   }

   DRETURN(ret);
}
/****** sge_cqueue_verify/cqueue_verify_job_slots() ****************************
*  NAME
*     cqueue_verify_job_slots() -- verify the queue slots attribute
*
*  SYNOPSIS
*     bool 
*     cqueue_verify_job_slots(lListElem *cqueue, lList **answer_list, 
*                             lListElem *attr_elem)
*
*  FUNCTION
*     Verifies if the slots attribute of a queue is in the expected range
*     (0 .. MAX_SEQNUM). MAX_SEQNUM is 9999999.
*
*  INPUTS
*     lListElem *cqueue    - The queue to verify.
*     lList **answer_list  - answer list to report errors
*     lListElem *attr_elem - the attribute to verify
*
*  RESULT
*     bool - true on success, false on error
*
*  NOTES
*     MT-NOTE: cqueue_verify_job_slots() is MT safe 
*******************************************************************************/
bool 
cqueue_verify_job_slots(lListElem *cqueue, lList **answer_list, lListElem *attr_elem)
{
   bool ret = true;

   DENTER(CQUEUE_VERIFY_LAYER, "cqueue_verify_job_slots");
   if (cqueue != NULL && attr_elem != NULL) {
      u_long32 slots = lGetUlong(attr_elem, AULNG_value);

      if (slots > MAX_SEQNUM) {
         answer_list_add_sprintf(answer_list, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR,
                                 MSG_ATTR_INVALID_ULONGVALUE_USUU, sge_u32c(slots), "slots",
                                 sge_u32c(0), sge_u32c(MAX_SEQNUM));
         ret = false;
      }
   }

   DEXIT;
   return ret;
}
static int do_utilization_test(lListElem *cr, test_array_t *ta)
{
   int ret = 0;
   double uti;
   int i;

   for (i = 0; ta[i].start_time != 0; i++) {
      uti = utilization_max(cr, ta[i].start_time, ta[i].duration, false);
      if (uti != ta[i].uti) {
         printf("failed: utilization(cr, "sge_U32CFormat", "sge_U32CFormat") returned %f, expected %f\n",
                sge_u32c(ta[i].start_time), sge_u32c(ta[i].duration), uti, ta[i].uti);
         ret++;
      } else {
         printf("success: utilization(cr, "sge_U32CFormat", "sge_U32CFormat") returned %f\n",
                sge_u32c(ta[i].start_time), sge_u32c(ta[i].duration), uti);
      }
   }

   return ret;
}
bool 
sge_gdi_task_debug_print(sge_gdi_task_class_t * task) 
{
    bool ret = true;
    
    DENTER(TOP_LAYER, "sge_gdi_task_debug_print");
    if (task != NULL) {
        DPRINTF(("task->id = " sge_U32CFormat "\n", sge_u32c(task->id)));
        DPRINTF(("task->command = " sge_U32CFormat "\n",
                sge_u32c(task->command)));
        DPRINTF(("task->target = " sge_U32CFormat "\n", sge_u32c(task->target)));
        DPRINTF(("task->data_list = %p\n", task->data_list));
        DPRINTF(("task->answer_list = %p\n", task->answer_list));
        DPRINTF(("task->condition = %p\n", task->condition));
        DPRINTF(("task->enumeration = %p\n", task->enumeration));
        DPRINTF(("task->next = %p\n", task->next));
    } else {
        DPRINTF(("task is NULL\n"));
    }
    DRETURN(ret);
}
Beispiel #10
0
/****** uti/monitor/ext_gdi_output() *******************************************
*  NAME
*     ext_gdi_output() -- generates a string from the GDI extension
*
*  SYNOPSIS
*     static void ext_gdi_output(char *message, int size, void 
*     *monitoring_extension, double time) 
*
*  FUNCTION
*     generates a string from the extension and returns it.
*
*  INPUTS
*     char *message              - initilized string buffer
*     int size                   - buffer size
*     void *monitoring_extension - the extension structure
*     double time                - length of the mesurement interval
*
*  NOTES
*     MT-NOTE: ext_gdi_output() is MT safe 
*******************************************************************************/
static void ext_gdi_output(dstring *message, void *monitoring_extension, double time)
{
   m_gdi_t *gdi_ext = (m_gdi_t*) monitoring_extension;

   sge_dstring_sprintf_append(message, MSG_UTI_MONITOR_GDIEXT_FFFFFFFFFFFFI,
            gdi_ext->eload_count/time, gdi_ext->ejob_count/time, 
            gdi_ext->econf_count/time, gdi_ext->eproc_count/time,
            gdi_ext->eack_count/time, 
            gdi_ext->gdi_add_count/time, gdi_ext->gdi_get_count/time,
            gdi_ext->gdi_mod_count/time, gdi_ext->gdi_del_count/time,
            gdi_ext->gdi_cp_count/time, gdi_ext->gdi_trig_count/time, 
            gdi_ext->gdi_perm_count/time,
            sge_u32c(gdi_ext->queue_length));
}
Beispiel #11
0
static int spawn_file(dstring *aBuffer, dstring *error_message) {
   int my_errno;
   char* mktemp_return = NULL;
   char tmp_file_string[256];
   char tmp_string[SGE_PATH_MAX];

   /*
    * generate template filename for mktemp()
    */   
   snprintf(tmp_file_string, 256, "pid-%u-XXXXXX", (unsigned int)getpid()); 

   /*
    * check final length of path
    */
   if (sge_dstring_strlen(aBuffer) + strlen(tmp_file_string) >= SGE_PATH_MAX) {
      sge_dstring_append(aBuffer, tmp_file_string);
      sge_dstring_sprintf(error_message, MSG_TMPNAM_SGE_MAX_PATH_LENGTH_US,
                          sge_u32c(SGE_PATH_MAX), sge_dstring_get_string(aBuffer));
      return -1;
   }

   /*
    * now build full path string for mktemp()
    */
   snprintf(tmp_string, SGE_PATH_MAX, "%s%s", sge_dstring_get_string(aBuffer), tmp_file_string);

   /*
    * generate temp file by call to mktemp()
    */
   errno = 0;
   mktemp_return = mktemp(tmp_string);
   my_errno = errno;
   if (mktemp_return[0] == '\0') {
      sge_dstring_sprintf(error_message, MSG_TMPNAM_GOT_SYSTEM_ERROR_SS, 
                                strerror(my_errno), sge_dstring_get_string(aBuffer));
      return -1;
   }

   /*
    * finally copy the resulting path to aBuffer
    */
   sge_dstring_sprintf(aBuffer, tmp_string);
   return 0;
}
Beispiel #12
0
/*-----------------------------------------------------------------
 * compare_qmaster_names
 * see if old qmaster name and current qmaster name are still the same
 *-----------------------------------------------------------------*/
static int compare_qmaster_names(
const char *act_qmaster_file,
const char *oldqmaster 
) {
 char newqmaster[SGE_PATH_MAX];
 int ret;
 
 DENTER(TOP_LAYER, "compare_qmaster_names");
 
 if (get_qm_name(newqmaster, act_qmaster_file, NULL)) {
    WARNING((SGE_EVENT, MSG_SHADOWD_CANTREADACTQMASTERFILEX_S, act_qmaster_file)); 
    DRETURN(-1);
 }
 
 ret = sge_hostcmp(newqmaster, oldqmaster);
 
 DPRINTF(("strcmp() of old and new qmaster returns: "sge_U32CFormat"\n", sge_u32c(ret)));
 
 DRETURN(ret);
} 
/****** qmaster/sge_qmaster_main/sge_start_heartbeat() *****************************
*  NAME
*     sge_start_heartbeat() -- Start qmaster heartbeat. 
*
*  SYNOPSIS
*     static void sge_start_heartbeat(void) 
*
*  FUNCTION
*     Add heartbeat event and register according event handler. 
*
*  INPUTS
*     void - none 
*
*  RESULT
*     void - none 
*
*  NOTES
*     MT-NOTE: sge_start_heartbeat() is MT safe 
*
*******************************************************************************/
void heartbeat_initialize(void)
{
   te_event_t ev     = NULL;

   DENTER(TOP_LAYER, "heartbeat_initialize");

   te_register_event_handler(increment_heartbeat, TYPE_HEARTBEAT_EVENT);
   ev = te_new_event(HEARTBEAT_INTERVAL, TYPE_HEARTBEAT_EVENT, RECURRING_EVENT, 
                     0, 0, "heartbeat-event");
   te_add_event(ev);
   te_free_event(&ev);

   /* this is for testsuite shadowd test */
   if (getenv("SGE_TEST_HEARTBEAT_TIMEOUT") != NULL) {
      int test_timeout = atoi(getenv("SGE_TEST_HEARTBEAT_TIMEOUT"));
      set_inc_qmaster_heartbeat_test_mode(test_timeout);
      DPRINTF(("heartbeat timeout test enabled (timeout="sge_U32CFormat")\n", sge_u32c(test_timeout)));
   }

   DEXIT;
   return;
} /* sge_start_heartbeat(void) */
/****** sge_gdi_packet/sge_gdi_packet_verify_version() ************************
*  NAME
*     sge_gdi_packet_verify_version() -- verify packet version
*
*  SYNOPSIS
*     bool sge_gdi_packet_verify_version(sge_gdi_packet_class_t *packet,
*                                        lList **alpp)
*
*  FUNCTION
*     This function is the replacement for the function
*     verify_request_version() which was part of the source code
*     before the packet structure was introduced.
*
*     It compares the version information of the provided "packet"
*     with the compiledin version number GRM_GDI_VERSION.
*
*     If both versions are not the same then it tries to find
*     if the client which provided us with this packet structure
*     has a higer version number or the binary executing
*     this function. In both cases the answer_list will
*     be filled with an appropriate message.
*
*  INPUTS
*     sge_gdi_packet_class_t *packet - packet
*     lList **alpp                   - answer list
*
*  RESULT
*     bool - error state
*        true  - same version
*        false - differnet version numbers
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_verify_version() is not MT safe
******************************************************************************/
bool
sge_gdi_packet_verify_version(sge_gdi_packet_class_t * packet, lList **alpp)
{
   bool ret = true;
   u_long32 version = packet->version;

   DENTER(TOP_LAYER, "sge_gdi_packet_verify_version");

   if (version != GRM_GDI_VERSION)
   {
      char *client_version = NULL;
      dstring ds;
      char buffer[256];
      const vdict_t *vp;
      const vdict_t *vdict = GRM_GDI_VERSION_ARRAY;

      sge_dstring_init(&ds, buffer, sizeof(buffer));

      for (vp = &vdict[0]; vp->version; vp++) {
         if (version == vp->version) {
            client_version = vp->release;
         }
      }

      if (client_version) {
         WARNING((SGE_EVENT, MSG_GDI_WRONG_GDI_SSISS, packet->host,
                  packet->commproc, (int)(packet->id), client_version,
                  feature_get_product_name(FS_VERSION, &ds)));
      } else {
         WARNING((SGE_EVENT, MSG_GDI_WRONG_GDI_SSIUS, packet->host,
                  packet->commproc, (int)(packet->id), sge_u32c(version),
                  feature_get_product_name(FS_VERSION, &ds)));
      }
      answer_list_add(alpp, SGE_EVENT, STATUS_EVERSION, ANSWER_QUALITY_ERROR);
      ret = false;
   }
   DRETURN(ret);
}
Beispiel #15
0
static sge_callback_result
print_jatask_event(sge_evc_class_t *evc, object_description *object_base, sge_object_type type, 
                   sge_event_action action, lListElem *event, void *clientdata)
{
   char buffer[1024];
   dstring buffer_wrapper;

   DENTER(TOP_LAYER, "print_jatask_event");

   sge_dstring_init(&buffer_wrapper, buffer, sizeof(buffer));
   
   DPRINTF(("%s\n", event_text(event, &buffer_wrapper)));
   if (lGetPosViaElem(event, ET_type, SGE_NO_ABORT) >= 0) {
      u_long32 type = lGetUlong(event, ET_type);
      u_long32 timestamp = lGetUlong(event, ET_timestamp);
      
      if (type == sgeE_JATASK_MOD) { 
         lList *jat = lGetList(event,ET_new_version);
         u_long job_id  = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         lListElem *ep = lFirst(jat);
         u_long job_status = lGetUlong(ep, JAT_status);
         int task_running = (job_status==JRUNNING || job_status==JTRANSFERING);

         if (task_running) {
            fprintf(stdout,"JOB_START (%ld.%ld:ECL_TIME="sge_U32CFormat")\n", job_id ,task_id,sge_u32c(timestamp));
            fflush(stdout);  
            Global_jobs_running++;
         }
      }
   
      if (type == sgeE_JOB_FINAL_USAGE) { 
         /* lList *jat = lGetList(event,ET_new_version); */
         u_long job_id = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         /* lWriteElemTo(event, stdout); */
         fprintf(stdout,"JOB_FINISH (%ld.%ld:ECL_TIME="sge_U32CFormat")\n", job_id, task_id,sge_u32c(timestamp));
         Global_jobs_running--;
         fflush(stdout);  
      }
      if (type == sgeE_JOB_ADD) { 
         lList *jat = lGetList(event,ET_new_version);
         u_long job_id  = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         lListElem *ep = lFirst(jat);
         const char* job_project = lGetString(ep, JB_project);
         if (job_project == NULL) {
            job_project = "NONE";
         }
         fprintf(stdout,"JOB_ADD (%ld.%ld:ECL_TIME="sge_U32CFormat":project=%s)\n", job_id, task_id, sge_u32c(timestamp),job_project);
         Global_jobs_registered++;
         fflush(stdout);  
      }
      if (type == sgeE_JOB_DEL) { 
         u_long job_id  = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         fprintf(stdout,"JOB_DEL (%ld.%ld:ECL_TIME="sge_U32CFormat")\n", job_id, task_id,sge_u32c(timestamp));
         Global_jobs_registered--;
         fflush(stdout);  
      }

   }
   /* create a callback error to test error handling */
   if(type == SGE_TYPE_GLOBAL_CONFIG) {
      DEXIT;
      return SGE_EMA_FAILURE;
   }
   
   DEXIT;
   return SGE_EMA_OK;
}
Beispiel #16
0
/*----------------------------------------------------------------------------*/
int 
main(int argc, char **argv)
{
   int heartbeat        = 0;
   int last_heartbeat   = 0;
   int latest_heartbeat = 0;
   int ret              = 0;
   int delay            = 0;
   time_t now, last;
/*    const char *cp; */
   char err_str[MAX_STRING_SIZE];
   char shadowd_pidfile[SGE_PATH_MAX];
   dstring ds;
   char buffer[256];
   pid_t shadowd_pid;

#if 1

static int check_interval = CHECK_INTERVAL;
static int get_active_interval = GET_ACTIVE_INTERVAL;
static int delay_time = DELAY_TIME;
static int sge_test_heartbeat = 0;

char binpath[SGE_PATH_MAX];
char oldqmaster[SGE_PATH_MAX];

char shadow_err_file[SGE_PATH_MAX];
char qmaster_out_file[SGE_PATH_MAX];

#endif

   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "sge_shadowd");
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));
   /* initialize recovery control variables */
   {
      char *s;
      int val;
      if ((s=getenv("SGE_CHECK_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         check_interval = val;
      if ((s=getenv("SGE_GET_ACTIVE_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         get_active_interval = val;
      if ((s=getenv("SGE_DELAY_TIME")) &&
          sscanf(s, "%d", &val) == 1)
         delay_time = val;
      if ((s=getenv("SGE_TEST_HEARTBEAT_TIMEOUT")) &&
          sscanf(s, "%d", &val) == 1)
         sge_test_heartbeat = val;
   }
         
   /* This needs a better solution */
   umask(022);

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   log_state_set_log_file(TMP_ERR_FILE_SHADOWD);

   if (sge_setup2(&ctx, SHADOWD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* AA: TODO: change this */
   ctx->set_exit_func(ctx, shadowd_exit_func);
   sge_setup_sig_handlers(SHADOWD);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   if (ctx->get_qmaster_spool_dir(ctx) != NULL) {
      char *shadowd_name = SGE_SHADOWD;

      /* is there a running shadowd on this host (with unqualified name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_unqualified_hostname(ctx));

      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }

      ctx->prepare_enroll(ctx);

      /* is there a running shadowd on this host (with aliased name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_qualified_hostname(ctx));
      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }  
   } else {
      ctx->prepare_enroll(ctx);
   }

   if (parse_cmdline_shadowd(argc, argv) == 1) {
      SGE_EXIT((void**)&ctx, 0);
   }
   
   if (ctx->get_qmaster_spool_dir(ctx) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTREADQMASTERSPOOLDIRFROMX_S, ctx->get_bootstrap_file(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (chdir(ctx->get_qmaster_spool_dir(ctx))) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTCHANGETOQMASTERSPOOLDIRX_S, ctx->get_qmaster_spool_dir(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_set_admin_username(ctx->get_admin_user(ctx), err_str)) {
      CRITICAL((SGE_EVENT, SFNMAX, err_str));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_switch2admin_user()) {
      CRITICAL((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSWITCHTOADMIN_USER));
      SGE_EXIT((void**)&ctx, 1);
   }

   sprintf(shadow_err_file, "messages_shadowd.%s", ctx->get_unqualified_hostname(ctx));
   sprintf(qmaster_out_file, "messages_qmaster.%s", ctx->get_unqualified_hostname(ctx));
   sge_copy_append(TMP_ERR_FILE_SHADOWD, shadow_err_file, SGE_MODE_APPEND);
   unlink(TMP_ERR_FILE_SHADOWD);
   log_state_set_log_as_admin_user(1);
   log_state_set_log_file(shadow_err_file);

   {
      int* tmp_fd_array = NULL;
      unsigned long tmp_fd_count = 0;

      if (cl_com_set_handle_fds(cl_com_get_handle(prognames[SHADOWD] ,0), &tmp_fd_array, &tmp_fd_count) == CL_RETVAL_OK) {
         sge_daemonize(tmp_fd_array, tmp_fd_count, ctx);
         if (tmp_fd_array != NULL) {
            sge_free(&tmp_fd_array);
         }
      } else {
         sge_daemonize(NULL, 0, ctx);
      }
   }

   /* shadowd pid file will contain aliased name */
   sge_write_pid(shadowd_pidfile);

   starting_up();
   
   sge_setup_sig_handlers(SHADOWD);

   last_heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

   last = (time_t) sge_get_gmt(); /* set time of last check time */

   delay = 0;
   while (!shut_me_down) {
      sleep(check_interval);

      /* get current heartbeat file content */
      heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

      now = (time_t) sge_get_gmt();


      /* Only check when we could read the heartbeat file at least two times
       * (last_heartbeat and heartbeat) without error 
       */
      if (last_heartbeat > 0 && heartbeat > 0) {

         /*
          * OK we have to heartbeat entries to check. Check times ...
          * now  = current time
          * last = last check time
          */
         if ( (now - last) >= (get_active_interval + delay) ) {

            delay = 0;
            if (last_heartbeat == heartbeat) {
               DPRINTF(("heartbeat not changed since seconds: "sge_U32CFormat"\n", sge_u32c(now - last)));
               delay = delay_time; /* set delay time */

               /*
                * check if we are a possible new qmaster host (lock file of qmaster active, etc.)
                */
               ret = check_if_valid_shadow(binpath, oldqmaster, 
                                           ctx->get_act_qmaster_file(ctx), 
                                           ctx->get_shadow_master_file(ctx), 
                                           ctx->get_qualified_hostname(ctx), 
                                           ctx->get_binary_path(ctx));

               if (ret == 0) {
                  /* we can start a qmaster on this host */
                  if (qmaster_lock(QMASTER_LOCK_FILE)) {
                     ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_FAILEDTOLOCKQMASTERSOMBODYWASFASTER));
                  } else {
                     int out, err;

                     /* still the old qmaster name in act_qmaster file and still the old heartbeat */
                     latest_heartbeat = get_qmaster_heartbeat( QMASTER_HEARTBEAT_FILE, 30);
                     /* TODO: what do we when there is a timeout ??? */
                     DPRINTF(("old qmaster name in act_qmaster and old heartbeat\n"));
                     if (!compare_qmaster_names(ctx->get_act_qmaster_file(ctx), oldqmaster) &&
                         !shadowd_is_old_master_enrolled(sge_test_heartbeat, sge_get_qmaster_port(NULL), oldqmaster) && 
                         (latest_heartbeat == heartbeat)) {
                        char qmaster_name[256];

                        strcpy(qmaster_name, SGE_PREFIX);
                        strcat(qmaster_name, prognames[QMASTER]); 
                        DPRINTF(("qmaster_name: "SFN"\n", qmaster_name)); 

                        /*
                         * open logfile as admin user for initial qmaster/schedd 
                         * startup messages
                         */
                        out = SGE_OPEN3(qmaster_out_file, O_CREAT|O_WRONLY|O_APPEND, 
                                   0644);
                        err = out;
                        if (out == -1) {
                           /*
                            * First priority is the master restart
                            * => ignore this error
                            */
                           out = 1;
                           err = 2;
                        } 

                        sge_switch2start_user();
                        ret = startprog(out, err, NULL, binpath, qmaster_name, NULL);
                        sge_switch2admin_user();
                        if (ret) {
                           ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSTARTQMASTER));
                        }
                        close(out);
                     } else {
                        qmaster_unlock(QMASTER_LOCK_FILE);
                     }
                  }      
               } else {
                  if (ret == -1) {
                     /* just log the more important failures */    
                     WARNING((SGE_EVENT, MSG_SHADOWD_DELAYINGSHADOWFUNCFORXSECONDS_U, sge_u32c(delay) ));
                  }
               } 
            }
            /* Begin a new interval, set timers and hearbeat to current values */
            last = now;
            last_heartbeat = heartbeat;
         }
      } else {
         if (last_heartbeat < 0 || heartbeat < 0) {
            /* There was an error reading heartbeat or last_heartbeat */
            DPRINTF(("can't read heartbeat file. last_heartbeat="sge_U32CFormat", heartbeat="sge_U32CFormat"\n",
                     sge_u32c(last_heartbeat), sge_u32c(heartbeat)));
         } else {
            DPRINTF(("have to read the heartbeat file twice to check time differences\n"));
         }
      }
   }

   sge_shutdown((void**)&ctx, 0);

   DRETURN(EXIT_SUCCESS);
}
/****** qmaster/sge_qmaster_main/main() ****************************************
*  NAME
*     main() -- qmaster entry point 
*
*  SYNOPSIS
*     int main(int argc, char* argv[]) 
*
*  FUNCTION
*     Qmaster entry point.
*
*     NOTE: The main thread must block all signals before any additional thread
*     is created. Failure to do so will ruin signal handling!
*
*  INPUTS
*     int argc     - number of commandline arguments 
*     char* argv[] - commandline arguments 
*
*  RESULT
*     0 - success 
*
*  NOTES
*     We check whether 'SGE_ROOT' is set before we daemonize. Once qmaster is
*     a daemon, we are no longer connected to a terminal and hence can not
*     output an error message to stdout or stderr.
*
*     We need to inovke 'prepare_enroll()' *before* the user id is switched via
*     'become_admin_user()'. This is because qmaster must be able to bind a so
*     called reserved port (requires root privileges) if configured to do so.
*
*******************************************************************************/
int main(int argc, char* argv[])
{
   int max_enroll_tries;
   int ret_val;
   int file_descriptor_settings_result = 0;
   bool has_daemonized = false;
   sge_gdi_ctx_class_t *ctx = NULL;
   u_long32 start_time = sge_get_gmt();
   monitoring_t monitor;

   DENTER_MAIN(TOP_LAYER, "qmaster");

   sge_monitor_init(&monitor, "MAIN", NONE_EXT, MT_WARNING, MT_ERROR);
   prof_mt_init();

   sge_get_root_dir(true, NULL, 0, true);
   
#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   sge_init_language_func((gettext_func_type)gettext, (setlocale_func_type)setlocale, (bindtextdomain_func_type)bindtextdomain, (textdomain_func_type)textdomain);
   sge_init_language(NULL,NULL);   
#endif 

   /* 
    * qmaster doesn't support any commandline anymore,
    * but we should show version string and -help option 
    */
   if (argc != 1) {
      sigset_t sig_set;
      sigfillset(&sig_set);
      pthread_sigmask(SIG_SETMASK, &sig_set, NULL);
      sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, true);
      sge_process_qmaster_cmdline(argv);
      SGE_EXIT((void**)&ctx, 1);
   }

   /*
    * daemonize qmaster
    * set file descriptor limits
    * and initialize libraries to be used in multi threaded environment
    * also take care that finished child processed of this process become
    * zombie jobs
    */
   has_daemonized = sge_daemonize_qmaster();
   file_descriptor_settings_result = set_file_descriptor_limit();
#if !defined(INTERIX) && !defined(CYGWIN)
   init_sig_action_and_mask();
#endif

   /* init qmaster threads without becomming admin user */
   sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, false);

   ctx->set_daemonized(ctx, has_daemonized);

   /* this must be done as root user to be able to bind ports < 1024 */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[QMASTER],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;
      if (max_enroll_tries <= 0) {
         /* exit after 30 seconds */
         CRITICAL((SGE_EVENT, MSG_QMASTER_COMMUNICATION_ERRORS ));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[QMASTER],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
      }
   }

   /*
    * now the commlib up and running. Set qmaster application status function 
    * (commlib callback function for qping status information response 
    *  messages (SIRM))
    */
   ret_val = cl_com_set_status_func(sge_qmaster_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
   }

   /* 
    * now we become admin user change into the correct root directory set the
    * the target for logging messages
    */
   sge_become_admin_user(ctx->get_admin_user(ctx));
   sge_chdir_exit(ctx->get_qmaster_spool_dir(ctx), 1);
   log_state_set_log_file(ERR_FILE);
   ctx->set_exit_func(ctx, sge_exit_func);

#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /*
    * We do increment the heartbeat manually here. This is the 'startup heartbeat'. 
    * The first time the hearbeat will be incremented through the heartbeat event 
    * handler is after about HEARTBEAT_INTERVAL seconds. The hardbeat event handler
    * is setup during the initialisazion of the timer thread.
    */
   inc_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, HEARTBEAT_INTERVAL, NULL);
     
   /*
    * Event master module has to be initialized already here because
    * sge_setup_qmaster() might already access it although event delivery
    * thread is not running.
    *
    * Corresponding shutdown is done in sge_event_master_terminate();
    *
    * EB: In my opinion the init function should called in
    * sge_event_master_initialize(). Is it possible to move that call?
    */ 
   sge_event_master_init();

   sge_setup_qmaster(ctx, argv);

#ifndef USE_POLL
   if (file_descriptor_settings_result == 1) {
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_LARGER_THAN_LIMIT_U, sge_u32c(FD_SETSIZE)));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE1_U, sge_u32c(FD_SETSIZE - 20)));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE2));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE3));
   }
#endif

   /*
    * Setup all threads and initialize corresponding modules. 
    * Order is important!
    */
   sge_signaler_initialize(ctx);
   sge_event_master_initialize(ctx);
   sge_timer_initialize(ctx, &monitor);
   sge_worker_initialize(ctx);
#if 0
   sge_test_initialize(ctx);
#endif
   sge_listener_initialize(ctx);
   sge_scheduler_initialize(ctx, NULL);
#ifndef NO_JNI
   sge_jvm_initialize(ctx, NULL);
#endif

   INFO((SGE_EVENT, "qmaster startup took "sge_u32" seconds", sge_get_gmt() - start_time));

   /*
    * Block till signal from signal thread arrives us
    */
   sge_thread_wait_for_signal();

   /* 
    * Shutdown all threads and shutdown corresponding modules.
    * Order is important!
    */
#ifndef NO_JNI
   sge_jvm_terminate(ctx, NULL);
#endif
   sge_scheduler_terminate(ctx, NULL);
   sge_listener_terminate();
#if 0
   sge_test_terminate(ctx);
#endif
   sge_worker_terminate(ctx);
   sge_timer_terminate();
   sge_event_master_terminate();
   sge_signaler_terminate();

   /*
    * Remaining shutdown operations
    */
   sge_clean_lists();
   sge_monitor_free(&monitor);

   sge_shutdown((void**)&ctx, sge_qmaster_get_exit_state());
   sge_prof_cleanup();

   DEXIT;
   return 0;
} /* main() */
Beispiel #18
0
/************************************************************************
 Master routine for job exit

 We need a rusage struct filled.
 In normal cases this is done by the execd, sending this structure
 to notify master about job finish.

 In case of an error noticed by the master which needs the job to be 
 removed we can fill this structure by hand. We need:

 rusage->job_number
 rusage->qname to clean up the queue (if we didn't find it we nevertheless
               clean up the job

 for functions regarding rusage see sge_rusage.c
 ************************************************************************/
void sge_job_exit(sge_gdi_ctx_class_t *ctx, lListElem *jr, lListElem *jep, lListElem *jatep, monitoring_t *monitor) 
{
   lListElem *queueep = NULL;
   const char *err_str = NULL;
   const char *qname = NULL;  
   const char *hostname = MSG_OBJ_UNKNOWNHOST;
   u_long32 jobid, jataskid;
   lListElem *hep = NULL;
   u_long32 timestamp;
   object_description *object_base = object_type_get_object_description();

   u_long32 failed, general_failure;
   lList *saved_gdil;

   DENTER(TOP_LAYER, "sge_job_exit");

   /* JG: TODO: we'd prefer some more precise timestamp, e.g. from jr */
   timestamp = sge_get_gmt();
                     
   qname = lGetString(jr, JR_queue_name);
   if (qname == NULL) {
      qname = (char *)MSG_OBJ_UNKNOWNQ;
   }

   err_str = lGetString(jr, JR_err_str);
   if (err_str == NULL) {
      err_str = MSG_UNKNOWNREASON;
   }

   jobid = lGetUlong(jr, JR_job_number);
   jataskid = lGetUlong(jr, JR_ja_task_number);
   failed = lGetUlong(jr, JR_failed);
   general_failure = lGetUlong(jr, JR_general_failure);

   cancel_job_resend(jobid, jataskid);

   /* This only has a meaning for Hibernator jobs. The job pid must
    * be saved accross restarts, since jobs get their old pid
    */
   lSetUlong(jatep, JAT_pvm_ckpt_pid, lGetUlong(jr, JR_job_pid));

   DPRINTF(("reaping job "sge_u32"."sge_u32" in queue >%s< job_pid %d\n", 
      jobid, jataskid, qname, (int) lGetUlong(jatep, JAT_pvm_ckpt_pid)));

   queueep = cqueue_list_locate_qinstance(*object_base[SGE_TYPE_CQUEUE].list, qname);
   if (queueep == NULL) {
      ERROR((SGE_EVENT, MSG_JOB_WRITEJFINISH_S, qname));
   }

   sge_job_remove_enforce_limit_trigger(jobid, jataskid);

   /* retrieve hostname for later use */
   if (queueep != NULL) {
      hostname = lGetHost(queueep, QU_qhostname);
   }

   if (failed) {        /* a problem occured */
      WARNING((SGE_EVENT, MSG_JOB_FAILEDONHOST_UUSSSS, sge_u32c(jobid), 
               sge_u32c(jataskid), 
               hostname,
               general_failure ? MSG_GENERAL : "",
               get_sstate_description(failed), err_str));
   } else {
      INFO((SGE_EVENT, MSG_JOB_JFINISH_UUS,  sge_u32c(jobid), sge_u32c(jataskid), hostname));
   }

   /*-------------------------------------------------*/

   /* test if this job is in state JRUNNING or JTRANSFERING */
   if (lGetUlong(jatep, JAT_status) != JRUNNING && 
       lGetUlong(jatep, JAT_status) != JTRANSFERING) {
      ERROR((SGE_EVENT, MSG_JOB_JEXITNOTRUN_UU, sge_u32c(lGetUlong(jep, JB_job_number)), sge_u32c(jataskid)));
      DRETURN_VOID;
   }

   saved_gdil = lCopyList("cpy", lGetList(jatep, JAT_granted_destin_identifier_list));

   /*
    * case 1: job being trashed because 
    *    --> failed starting interactive job
    *    --> job was deleted
    *    --> a failed batch job that explicitely shall not enter error state
    */
   if (((lGetUlong(jatep, JAT_state) & JDELETED) == JDELETED) ||
         (failed && !lGetString(jep, JB_exec_file)) ||
         (failed && general_failure==GFSTATE_JOB && JOB_TYPE_IS_NO_ERROR(lGetUlong(jep, JB_type)))) {
      reporting_create_acct_record(ctx, NULL, jr, jep, jatep, false);
      /* JG: TODO: we need more information in the log message */
      reporting_create_job_log(NULL, timestamp, JL_DELETED, MSG_EXECD, hostname, jr, jep, jatep, NULL, MSG_LOG_JREMOVED);

      sge_commit_job(ctx, jep, jatep, jr, COMMIT_ST_FINISHED_FAILED_EE, COMMIT_DEFAULT | COMMIT_NEVER_RAN, monitor);

      if (lGetUlong(jep, JB_ar) != 0 && (lGetUlong(jatep, JAT_state) & JDELETED) == JDELETED) {
         /* get AR and remove it if no other jobs are debited */
         lList *master_ar_list = *object_base[SGE_TYPE_AR].list;
         lListElem *ar = ar_list_locate(master_ar_list, lGetUlong(jep, JB_ar));

         if (ar != NULL && lGetUlong(ar, AR_state) == AR_DELETED) {
            lListElem *ar_queue;
            u_long32 ar_id = lGetUlong(ar, AR_id);

            for_each(ar_queue, lGetList(ar, AR_reserved_queues)) {
               if (qinstance_slots_used(ar_queue) != 0) {
                  break;
               }
            }
            if (ar_queue == NULL) {
               /* no jobs registered in advance reservation */
               dstring buffer = DSTRING_INIT;

               sge_dstring_sprintf(&buffer, sge_U32CFormat,
                                   sge_u32c(ar_id));

               ar_do_reservation(ar, false);

               reporting_create_ar_log_record(NULL, ar, ARL_DELETED, 
                                              "AR deleted",
                                              timestamp);
               reporting_create_ar_acct_records(NULL, ar, timestamp); 

               lRemoveElem(master_ar_list, &ar);

               sge_event_spool(ctx, NULL, 0, sgeE_AR_DEL, 
                      ar_id, 0, sge_dstring_get_string(&buffer), NULL, NULL,
                      NULL, NULL, NULL, true, true);
               sge_dstring_free(&buffer);
            }
         }
Beispiel #19
0
/****** sge_utility/verify_str_key() *******************************************
*  NAME
*     verify_str_key() -- Generic function for verifying object names
*
*  SYNOPSIS
*     an_status_t verify_str_key(lList **alpp, const char *str, size_t 
*     str_length, const char *name, int table) 
*
*  FUNCTION
*     Verifies object names. The follwing verification tables are 
*     used
*
*        QSUB_TABLE    job account strings 
*           (see qsub(1) -A for characters not allowed)
*        QSUB_TABLE    job name
*           (see qsub(1) -N for characters not allowed)
*        KEY_TABLE     parallel environemnt names
*           (see sge_pe(5))
*        KEY_TABLE     calendar names 
*           (see calendar_conf(5))
*        KEY_TABLE     cluster queue names 
*           (see queue_conf(5))
*        KEY_TABLE     project names
*           (see project(5))
*        KEY_TABLE     userset names
*           (see access_list(5))
*        KEY_TABLE     department names
*           (see access_list(5))
*        KEY_TABLE     checkpoint interface name
*           (see checkpoint(5))
*        KEY_TABLE     user name
*           (see user(5))
*        KEY_TABLE     hostgroup names
*           (see hostgroup(5))
*
*        KEY_TABLE     event client name (internal purposes only)
*        KEY_TABLE     JAPI session key (internal purposes only)
*
*           Note, there is test_sge_utility
*
*  INPUTS
*     lList **alpp      - answer list
*     const char *str   - string to be verified
*     size_t str_length - length of the string to be verified
*     const char *name  - verbal description of the object
*     int table         - verification table to be used
*
*  RESULT
*     an_status_t - STATUS_OK upon success
*
*  NOTES
*     MT-NOTE: verify_str_key() is MT safe 
* 
*  SEE ALSO
*     There is a module test (test_sge_utility) for verify_str_key().
*******************************************************************************/
an_status_t verify_str_key(
   lList **alpp, const char *str, size_t str_length, const char *name, int table) 
{
   static const char *begin_strings[2][3];
   static const char *mid_strings[2][20];

   static char begin_chars[2][3] =
      { { '.', '#', 0 },
        { 0, 0, 0 } };

   static const char mid_characters[2][20] =
      { { '\n', '\t', '\r', ' ', '/', ':', '\'', '\"', '\\', '[', ']', '{', '}', '|', '(', ')', '@', '%', ',', 0},  /* KEY_TABLE  */
        { '\n', '\t', '\r', '/', ':', '@', '\\', '*', '?', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };                      /* QSUB_TABLE */
   static const char* keyword[] = { "NONE", "ALL", "TEMPLATE", NULL };
   static const char* keyword_strings[4];

   static int initialized = 0;
   char forbidden_char;
   const char* forbidden_string;
   int i;

   table = table -1;
   if (!initialized) {
      begin_strings[0][0] = MSG_GDI_KEYSTR_DOT;
      begin_strings[0][1] = MSG_GDI_KEYSTR_HASH;
      begin_strings[0][2] = NULL;
      begin_strings[1][0] = NULL;
      begin_strings[1][1] = NULL;
      begin_strings[1][2] = NULL;

      mid_strings[0][0] = MSG_GDI_KEYSTR_RETURN;
      mid_strings[0][1] = MSG_GDI_KEYSTR_TABULATOR;
      mid_strings[0][2] = MSG_GDI_KEYSTR_CARRIAGERET;
      mid_strings[0][3] = MSG_GDI_KEYSTR_SPACE;
      mid_strings[0][4] = MSG_GDI_KEYSTR_SLASH;
      mid_strings[0][5] = MSG_GDI_KEYSTR_COLON;
      mid_strings[0][6] = MSG_GDI_KEYSTR_QUOTE;
      mid_strings[0][7] = MSG_GDI_KEYSTR_DBLQUOTE;
      mid_strings[0][8] = MSG_GDI_KEYSTR_BACKSLASH;
      mid_strings[0][9] = MSG_GDI_KEYSTR_BRACKETS;
      mid_strings[0][10] = MSG_GDI_KEYSTR_BRACKETS;
      mid_strings[0][11] = MSG_GDI_KEYSTR_BRACES;
      mid_strings[0][12] = MSG_GDI_KEYSTR_BRACES;
      mid_strings[0][13] = MSG_GDI_KEYSTR_PIPE;
      mid_strings[0][14] = MSG_GDI_KEYSTR_PARENTHESIS;
      mid_strings[0][15] = MSG_GDI_KEYSTR_PARENTHESIS;
      mid_strings[0][16] = MSG_GDI_KEYSTR_AT;
      mid_strings[0][17] = MSG_GDI_KEYSTR_PERCENT;
      mid_strings[0][18] = MSG_GDI_KEYSTR_COMMA;
      mid_strings[0][19] = NULL;
      mid_strings[1][0] = MSG_GDI_KEYSTR_RETURN;
      mid_strings[1][1] = MSG_GDI_KEYSTR_TABULATOR;
      mid_strings[1][2] = MSG_GDI_KEYSTR_CARRIAGERET;
      mid_strings[1][3] = MSG_GDI_KEYSTR_SLASH;
      mid_strings[1][4] = MSG_GDI_KEYSTR_COLON;
      mid_strings[1][5] = MSG_GDI_KEYSTR_AT;
      mid_strings[1][6] = MSG_GDI_KEYSTR_BACKSLASH;
      mid_strings[1][7] = MSG_GDI_KEYSTR_ASTERISK;
      mid_strings[1][8] = MSG_GDI_KEYSTR_QUESTIONMARK;
      mid_strings[1][9] = NULL;

      keyword_strings[0] = MSG_GDI_KEYSTR_KEYWORD;
      keyword_strings[1] = MSG_GDI_KEYSTR_KEYWORD;
      keyword_strings[2] = MSG_GDI_KEYSTR_KEYWORD;
      keyword_strings[3] = NULL;

      initialized = 1;
   }

   if (str == NULL) {
      SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_KEYSTR_NULL_S, name));
      answer_list_add(alpp, SGE_EVENT, STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
      return STATUS_EUNKNOWN;
   }

   /* check string length first, if too long -> error */
   if (strlen(str) > str_length) {
      SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_KEYSTR_LENGTH_U, sge_u32c(str_length)));
      answer_list_add(alpp, SGE_EVENT, STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
      return STATUS_EUNKNOWN;
   }

   /* check first character */
   i = -1;
   while ((forbidden_char = begin_chars[table][++i])) {
      if (str[0] == forbidden_char) {
         if (isprint((int) forbidden_char)) {
            SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_KEYSTR_FIRSTCHAR_SC,
                           begin_strings[table][i], begin_chars[table][i]));
         } else {
            SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_KEYSTR_FIRSTCHAR_S, 
                           begin_strings[table][i]));
         }
         answer_list_add(alpp, SGE_EVENT, STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
         return STATUS_EUNKNOWN;
      }
   }

   /* check all characters in str */
   i = -1;
   while ((forbidden_char = mid_characters[table][++i])) {
      if (strchr(str, forbidden_char)) {
         if (isprint((int) forbidden_char)) {
            SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_KEYSTR_MIDCHAR_SC,
                           mid_strings[table][i], mid_characters[table][i]));
         } else {
            SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_KEYSTR_MIDCHAR_S, 
                           mid_strings[table][i]));
         }
         answer_list_add(alpp, SGE_EVENT, STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
         return STATUS_EUNKNOWN;
      }
   }

   /* reject invalid keywords */
   i = -1;
   while ((forbidden_string = keyword[++i])) {
      if (!strcasecmp(str, forbidden_string)) {
         SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_KEYSTR_KEYWORD_SS, 
                        keyword_strings[i],
            forbidden_string));
         answer_list_add(alpp, SGE_EVENT, STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
         return STATUS_EUNKNOWN;
      }
   }

   return STATUS_OK;
}
/****** sge_c_report() *******************************************************
*  NAME
*     sge_c_report() -- process execd load report
*
*  SYNOPSIS
*     void sge_c_report(char *rhost, char *commproc, int id, lList *report_list)
*
*  FUNCTION
*
*  INPUTS
*     char *rhost
*     char *commproc
*     int id
*     lList *report_list
*
*  RESULT
*     void - nothing
*
*  NOTES
*     MT-NOTE: sge_c_report() is MT safe
*
******************************************************************************/
void sge_c_report(sge_gdi_ctx_class_t *ctx, char *rhost, char *commproc, int id, lList *report_list, monitoring_t *monitor)
{
   lListElem *hep = NULL;
   u_long32 rep_type;
   lListElem *report;
   int ret = 0;
   u_long32 this_seqno, last_seqno;
   u_long32 rversion;
   sge_pack_buffer pb;   
   bool is_pb_used = false;
   bool send_tag_new_conf = false;

   DENTER(TOP_LAYER, "sge_c_report");

   if (lGetNumberOfElem(report_list) == 0) {
      DPRINTF(("received empty report\n"));
      if (rhost != NULL) {
         WARNING((SGE_EVENT, MSG_QMASTER_RECEIVED_EMPTY_LOAD_REPORT_S, rhost));
      } else {
         WARNING((SGE_EVENT, MSG_QMASTER_RECEIVED_EMPTY_LOAD_REPORT_S, "unknown"));
      }
      DRETURN_VOID;
   }

   /* accept reports only from execd's */
   if (strcmp(prognames[EXECD], commproc)) {
      ERROR((SGE_EVENT, MSG_GOTSTATUSREPORTOFUNKNOWNCOMMPROC_S, commproc));
      DRETURN_VOID;
   }

   /* do not process load reports from old execution daemons */
   rversion = lGetUlong(lFirst(report_list), REP_version);
   if (verify_request_version(NULL, rversion, rhost, commproc, id)) {
      DRETURN_VOID;
   }
   
   this_seqno = lGetUlong(lFirst(report_list), REP_seqno);

   /* need exec host for all types of reports */
   if (!(hep = host_list_locate(*object_type_get_master_list(SGE_TYPE_EXECHOST), rhost))) {
      ERROR((SGE_EVENT, MSG_GOTSTATUSREPORTOFUNKNOWNEXECHOST_S, rhost));
      DRETURN_VOID;
   }

   /* prevent old reports being proceeded 
      frequent loggings of outdated reports can be an indication 
      of too high message traffic arriving at qmaster */ 
   last_seqno = lGetUlong(hep, EH_report_seqno);

   if ((this_seqno < last_seqno && (last_seqno - this_seqno) <= 9000) &&
      !(last_seqno > 9990 && this_seqno < 10)) {
      /* this must be an old report, log and then ignore it */
      INFO((SGE_EVENT, MSG_QMASTER_RECEIVED_OLD_LOAD_REPORT_UUS, 
               sge_u32c(this_seqno), sge_u32c(last_seqno), rhost));
      DRETURN_VOID;
   }
   
   lSetUlong(hep, EH_report_seqno, this_seqno);

   /* RU: */
   /* tag all reschedule_unknown list entries we hope to 
      hear about in that job report */
   update_reschedule_unknown_list(ctx, hep);

   /*
   ** process the reports one after the other
   ** usually there will be a load report
   ** and a configuration version report
   */
   for_each(report, report_list) {
      rep_type = lGetUlong(report, REP_type);

      switch (rep_type) {
      case NUM_REP_REPORT_LOAD:
      case NUM_REP_FULL_REPORT_LOAD:
         MONITOR_ELOAD(monitor); 
         /* Now handle execds load reports */
         if (lGetUlong(hep, EH_lt_heard_from) == 0 && rep_type != NUM_REP_FULL_REPORT_LOAD) {
            host_notify_about_full_load_report(ctx, hep);
         } else {
            if (!is_pb_used) {
               is_pb_used = true;
               init_packbuffer(&pb, 1024, 0);
            }
            sge_update_load_values(ctx, rhost, lGetList(report, REP_list));

            if (mconf_get_simulate_execds()) {
               lList *master_exechost_list = *object_type_get_master_list(SGE_TYPE_EXECHOST);
               lListElem *shep;
               lListElem *simhostElem=NULL; 

               for_each(shep, master_exechost_list) {
                  simhostElem = lGetSubStr(shep, CE_name, "load_report_host", EH_consumable_config_list);
                  if (simhostElem != NULL) {
                     const char *real_host = lGetString(simhostElem, CE_stringval);
                     if (real_host != NULL && sge_hostcmp(real_host, rhost) == 0) {
                        const char* sim_host = lGetHost(shep, EH_name);
                        lListElem *clp = NULL;

                        DPRINTF(("Copy load values of %s to simulated host %s\n",
                                rhost, sim_host));

                        for_each(clp, lGetList(report, REP_list)) {
                           if (strcmp(lGetHost(clp, LR_host), SGE_GLOBAL_NAME) != 0) {
                              lSetHost(clp, LR_host, sim_host);
                           }
                        }
                        sge_update_load_values(ctx, sim_host, lGetList(report, REP_list));
                     }
                  }
               }
            }

            pack_ack(&pb, ACK_LOAD_REPORT, this_seqno, 0, NULL);
         }
         break;
      case NUM_REP_REPORT_CONF: 
         MONITOR_ECONF(monitor); 
         if (sge_compare_configuration(hep, lGetList(report, REP_list)) != 0) {
            DPRINTF(("%s: configuration on host %s is not up to date\n", SGE_FUNC, rhost));
            send_tag_new_conf = true;
         }
         break;
         
      case NUM_REP_REPORT_PROCESSORS:
         /*
         ** save number of processors
         */
         MONITOR_EPROC(monitor);
         ret = update_license_data(ctx, hep, lGetList(report, REP_list)); 
         if (ret) {
            ERROR((SGE_EVENT, MSG_LICENCE_ERRORXUPDATINGLICENSEDATA_I, ret));
         }
         break;

      case NUM_REP_REPORT_JOB:
         MONITOR_EJOB(monitor);
         if (!is_pb_used) {
            is_pb_used = true;
            init_packbuffer(&pb, 1024, 0);
         }
         process_job_report(ctx, report, hep, rhost, commproc, &pb, monitor);
         break;

      default:   
         DPRINTF(("received invalid report type %ld\n", (long) rep_type));
      }
/********************************************************
 ensure 
 - all nodes have a unique path in share tree
 - a project is not referenced more than once in share tree
 - a user appears only once in a project sub-tree
 - a user appears only once outside of a project sub-tree
 - a user does not appear as a non-leaf node
 - all leaf nodes in a project sub-tree reference a known
   user object or the reserved name "default"
 - there are no sub-projects within a project sub-tree
 - all leaf nodes not in a project sub-tree reference a known
   user or project object
 - all user leaf nodes in a project sub-tree have access
   to the project

 ********************************************************/
int check_sharetree(
lList **alpp,
lListElem *node,
lList *user_list,
lList *project_list,
lListElem *project,
lList **found  /* tmp list that contains one entry for each found u/p */
) {
   lList *children;
   lListElem *child, *remaining;
   lList *save_found = NULL;
   const char *name = lGetString(node, STN_name);
   lListElem *pep;

   DENTER(TOP_LAYER, "check_sharetree");

   /* Check for dangling or circular references. */
   if (name == NULL) {
      ERROR((SGE_EVENT, MSG_STREE_NOVALIDNODEREF_U,
             sge_u32c(lGetUlong(node, STN_id))));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return -1;
   }
   
   if ((children=lGetList(node, STN_children))) {

      /* not a leaf node */

      /* check if this is a project node */
      if ((pep=prj_list_locate(project_list, name))) {

         /* check for sub-projects (not allowed) */
         if (project) {
            ERROR((SGE_EVENT, MSG_STREE_PRJINPTJSUBTREE_SS, name, lGetString(project, PR_name)));
            answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
            DEXIT;
            return -1;
         }

         /* check for projects appearing more than once */
         if (lGetElemStr(*found, STN_name, name)) {
            ERROR((SGE_EVENT, MSG_STREE_PRJTWICE_S, name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
            DEXIT;
            return -1;
         }

         /* register that this project was found */
         lAddElemStr(found, STN_name, name, STN_Type);

         /* set up for project sub-tree recursion */
         project = pep;
         save_found = *found;
         *found = NULL;

         /* check for user appearing as non-leaf node */
      } else if (user_list_locate(user_list, name)) {
            ERROR((SGE_EVENT, MSG_STREE_USERNONLEAF_S, name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
            DEXIT;
            return -1;
      }

      for_each(child, children) {
         /* ensure pathes are identically inside share tree */ 
         for (remaining=lNext(child); remaining; remaining=lNext(remaining)) {
            const char *cn = lGetString(child, STN_name);
            const char *rn = lGetString(remaining, STN_name);
            if (cn == NULL || rn == NULL) {
               ERROR((SGE_EVENT, MSG_GDI_KEYSTR_NULL_S, "STN_name"));
               answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
               /* restore old found list */
               if (save_found) {
                  lFreeList(found);
                  *found = save_found;
               }
               DEXIT;
               return -1;
            }
            if (!strcmp(cn, rn)) {
               ERROR((SGE_EVENT, MSG_SGETEXT_FOUND_UP_TWICE_SS, 
                  cn, lGetString(node, STN_name)));
               answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
               /* restore old found list */
               if (save_found) {
                  lFreeList(found);
                  *found = save_found;
               }
               DEXIT;
               return -1;
            }  
         }

         if (check_sharetree(alpp, child, user_list, project_list, project,
               found)) {
            /* restore old found list */
            if (save_found) {
               lFreeList(found);
               *found = save_found;
            }
            DEXIT;
            return -1;
         }
      }

      /* restore old found list */
      if (save_found) {
         lFreeList(found);
         *found = save_found;
      }
   } else {
      /* a leaf node */

      /* check if this is a project node */
      if (prj_list_locate(project_list, name)) {
int sge_get_execd_port(void) {
   char* port = NULL;
   int int_port = -1;

   struct timeval now;
   static long next_timeout = 0;
   static int cached_port = -1;

   DENTER(TOP_LAYER, "sge_get_execd_port");

   sge_mutex_lock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);
   
   /* check for reresolve timeout */
   gettimeofday(&now,NULL);

   if ( next_timeout > 0 ) {
      DPRINTF(("reresolve port timeout in "sge_U32CFormat"\n", sge_u32c( next_timeout - now.tv_sec)));
   }
   if ( cached_port >= 0 && next_timeout > now.tv_sec ) {
      int_port = cached_port;
      DPRINTF(("returning cached port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      sge_mutex_unlock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);
      return int_port;
   }

   port = getenv("SGE_EXECD_PORT");   
   if (port != NULL) {
      int_port = atoi(port);
   }

   /* get port from services file */
   if (int_port <= 0) {
      char buffer[2048];
      struct servent se_result;
      struct servent* se_help = NULL;

      se_help = sge_getservbyname_r(&se_result, "sge_execd", buffer, sizeof(buffer));
      if (se_help != NULL) {
         int_port = ntohs(se_help->s_port);
      }
   }

   if (int_port <= 0 ) {
      ERROR((SGE_EVENT, MSG_UTI_CANT_GET_ENV_OR_PORT_SS, "SGE_EXECD_PORT" , "sge_execd"));
      if ( cached_port > 0 ) {
         WARNING((SGE_EVENT, MSG_UTI_USING_CACHED_PORT_SU, "sge_execd", sge_u32c(cached_port) ));
         int_port = cached_port; 
      } else {
         sge_mutex_unlock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);
         SGE_EXIT(NULL, 1);
      }
   } else {
      DPRINTF(("returning port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      /* set new timeout time */
      gettimeofday(&now,NULL);
      next_timeout = now.tv_sec + SGE_PORT_CACHE_TIMEOUT;

      /* set new port value */
      cached_port = int_port;
   } 

   sge_mutex_unlock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);

   DEXIT;
   return int_port;

}
Beispiel #23
0
static void qevent_parse_command_line(int argc, char **argv, qevent_options *option_struct) {

   
   DENTER(TOP_LAYER, "qevent_parse_command_line");

   option_struct->help_option = 0;
   option_struct->testsuite_option = 0;
   option_struct->subscribe_option = 0;
   option_struct->trigger_option_count =0;

   while (*(++argv)) {
      if (!strcmp("-h", *argv) || !strcmp("-help", *argv)) {
         option_struct->help_option = 1;
         continue;
      }
      if (!strcmp("-ts", *argv) || !strcmp("-testsuite", *argv)) {
         option_struct->testsuite_option = 1;
         continue;
      }
      if (!strcmp("-sm", *argv) || !strcmp("-subscribe", *argv)) {
         option_struct->subscribe_option = 1;
         continue;
      }
      if (!strcmp("-trigger", *argv)) {
         int ok = 0;
         if (option_struct->trigger_option_count >= MAX_TRIGGER_SCRIPTS ) {
            sge_dstring_sprintf(option_struct->error_message,
                                "option \"-trigger\": only "sge_U32CFormat" trigger arguments supported\n",
                                sge_u32c(MAX_TRIGGER_SCRIPTS) );
            break; 
         }

         ++argv;
         if (*argv) {
            /* get EVENT argument */
            if (strcmp(qevent_get_event_name(QEVENT_JB_END),*argv) == 0) {
               ok = 1;
               (option_struct->trigger_option_events)[option_struct->trigger_option_count] = QEVENT_JB_END;
            } 
            if (strcmp(qevent_get_event_name(QEVENT_JB_TASK_END),*argv) == 0) {
               ok = 1;
               (option_struct->trigger_option_events)[option_struct->trigger_option_count] = QEVENT_JB_TASK_END;
            } 

            if (!ok) {
               sge_dstring_append(option_struct->error_message,"option \"-trigger\": undefined EVENT type\n");
               break; 
            }
         } else {
            sge_dstring_append(option_struct->error_message,"option \"-trigger\": found no EVENT argument\n");
            break;
         }
         ++argv;
         if (*argv) {
            /* get SCRIPT argument */

            /* check for SCRIPT file */
            if (!sge_is_file(*argv)) {
               sge_dstring_sprintf(option_struct->error_message,
                                   "option \"-trigger\": SCRIPT file %s not found\n",
                                   (*argv));
               break;
            }

            /* is file executable ? */
            if (!sge_is_executable(*argv)) {  
               sge_dstring_sprintf(option_struct->error_message,
                                   "option \"-trigger\": SCRIPT file %s not executable\n",
                                   (*argv));
               break;

            } 
 
            (option_struct->trigger_option_scripts)[option_struct->trigger_option_count] = *argv;
            (option_struct->trigger_option_count)++;
         } else {
            sge_dstring_append(option_struct->error_message,"option \"-trigger\": found no SCRIPT argument\n");
            break;
         }
         continue;
      }


      /* unkown option */
      if ( *argv[0] == '-' ) {  
         sge_dstring_append(option_struct->error_message,"unkown option: ");
         sge_dstring_append(option_struct->error_message,*argv);
         sge_dstring_append(option_struct->error_message,"\n");
      } else {
         sge_dstring_append(option_struct->error_message,"unkown argument: ");
         sge_dstring_append(option_struct->error_message,*argv);
         sge_dstring_append(option_struct->error_message,"\n");
      }
   } 
   DEXIT;
}
Beispiel #24
0
void procfs_kill_addgrpid(gid_t add_grp_id, int sig, tShepherd_trace shepherd_trace)
{
   char procnam[128];
   int i;
   int groups=0;
   u_long32 max_groups;
   gid_t *list;
#if defined(SOLARIS) || defined(ALPHA)
   int fd;
   prcred_t proc_cred;
#elif defined(LINUX)
   FILE *fp;
   char buffer[1024];
   uid_t uids[4] = {0,0,0,0};
   gid_t gids[4] = {0,0,0,0};
#endif

   DENTER(TOP_LAYER, "procfs_kill_addgrpid");

   /* quick return in case of invalid add. group id */
   if (add_grp_id == 0) {
      DEXIT;
      return;
   }

   max_groups = sge_sysconf(SGE_SYSCONF_NGROUPS_MAX);
   if (max_groups <= 0)
      if (shepherd_trace) {
         shepherd_trace(MSG_SGE_NGROUPS_MAXOSRECONFIGURATIONNECESSARY);
      }
/*
 * INSURE detects a WRITE_OVERFLOW when getgroups was invoked (LINUX).
 * Is this a bug in the kernel or in INSURE?
 */
#if defined(LINUX)
   list = (gid_t*) malloc(2*max_groups*sizeof(gid_t));
#else
   list = (gid_t*) malloc(max_groups*sizeof(gid_t));
#endif
   if (list == NULL)
      if (shepherd_trace) {
         shepherd_trace(MSG_SGE_PROCFSKILLADDGRPIDMALLOCFAILED);
      }

   pt_open();

   /* find next valid entry in procfs  */
   while ((dent = readdir(cwd))) {
      if (!dent->d_name)
         continue;
      if (!dent->d_name[0])
         continue;

      if (!strcmp(dent->d_name, "..") || !strcmp(dent->d_name, "."))
         continue;

      if (atoi(dent->d_name) == 0)
         continue;

#if defined(SOLARIS) || defined(ALPHA)
      sprintf(procnam, "%s/%s", PROC_DIR, dent->d_name);
      if ((fd = open(procnam, O_RDONLY, 0)) == -1) {
         DPRINTF(("open(%s) failed: %s\n", procnam, strerror(errno)));
         continue;
      }
#elif defined(LINUX)
      if (!strcmp(dent->d_name, "self"))
         continue;

      sprintf(procnam, PROC_DIR "/%s/status", dent->d_name);
      if (!(fp = fopen(procnam, "r")))
         continue;
#endif

#if defined(SOLARIS) || defined(ALPHA)
      /* get number of groups */
      if (ioctl(fd, PIOCCRED, &proc_cred) == -1) {
         close(fd);
         continue;
      }

      /* get list of supplementary groups */
      groups = proc_cred.pr_ngroups;
      if (ioctl(fd, PIOCGROUPS, list) == -1) {
         close(fd);
         continue;
      }

#elif defined(LINUX)
      /* get number of groups and current uids, gids
       * uids[0], gids[0] => UID and GID
       * uids[1], gids[1] => EUID and EGID
       * uids[2], gids[2] => SUID and SGID
       * uids[3], gids[3] => FSUID and FSGID
       */
      groups = 0;
      while (fgets(buffer, sizeof(buffer), fp)) {
         char *label = NULL;
         char *token = NULL;

         label = strtok(buffer, " \t\n");
         if (label) {
            if (!strcmp("Groups:", label)) {
               while ((token = strtok((char*) NULL, " \t\n"))) {
                  list[groups]=(gid_t) atol(token);
                  groups++;
               }
            } else if (!strcmp("Uid:", label)) {
               int i = 0;

               while ((i < 4) && (token = strtok((char*) NULL, " \t\n"))) {
                  uids[i]=(uid_t) atol(token);
                  i++;
               }
            } else if (!strcmp("Gid:", label)) {
               int i = 0;

               while ((i < 4) && (token = strtok((char*) NULL, " \t\n"))) {
                  gids[i]=(gid_t) atol(token);
                  i++;
               }
            }
         }
      }
#endif

#if defined(SOLARIS) || defined(ALPHA)
      close(fd);
#elif defined(LINUX)
      FCLOSE(fp);
FCLOSE_ERROR:
#endif

      /* send each process a signal which belongs to add_grg_id */
      for (i = 0; i < groups; i++) {
         if (list[i] == add_grp_id) {
            pid_t pid;
            pid = (pid_t) atol(dent->d_name);

#if defined(LINUX)
            /* if UID, GID, EUID and EGID == 0
             *  don't kill the process!!! - it could be the rpc.nfs-deamon
             */
            if (!(uids[0] == 0 && gids[0] == 0 &&
                  uids[1] == 0 && gids[1] == 0)) {
#elif defined(SOLARIS) || defined(ALPHA)
            if (!(proc_cred.pr_ruid == 0 && proc_cred.pr_rgid == 0 &&
                  proc_cred.pr_euid == 0 && proc_cred.pr_egid == 0)) {
#endif

               if (shepherd_trace) {
                  char err_str[256];

                  sprintf(err_str, MSG_SGE_KILLINGPIDXY_UI , sge_u32c(pid), groups);
                  shepherd_trace(err_str);
               }

               kill(pid, sig);

            } else {
               if (shepherd_trace) {
                  char err_str[256];

                  sprintf(err_str, MSG_SGE_DONOTKILLROOTPROCESSXY_UI ,
                     sge_u32c(atol(dent->d_name)), groups);
                  shepherd_trace(err_str);
               }
            }

            break;
         }
      }
   }
   pt_close();
   sge_free(&list);
   DEXIT;
}

int pt_open(void)
{
   cwd = opendir(PROC_DIR);
   return !cwd;
}
/****** sge_hostname/sge_get_qmaster_port() ************************************
*  NAME
*     sge_get_qmaster_port() -- get qmaster port
*
*  SYNOPSIS
*     int sge_get_qmaster_port(bool *from_services) 
*
*  FUNCTION
*     This function returns the TCP/IP port of the qmaster daemon. It returns
*     a cached value from a previous run. The cached value will be refreshed
*     every 10 Minutes. The port may come from environment variable
*     SGE_QMASTER_PORT or the services files entry "sge_qmaster".
*
*  INPUTS
*     bool *from_services - Pointer to a boolean which is set to true
*                           when the port value comes from the services file.
*
*  RESULT
*     int - port of qmaster
*
*******************************************************************************/
#define SGE_PORT_CACHE_TIMEOUT 60*10   /* 10 Min. */
int sge_get_qmaster_port(bool *from_services) {
   char* port = NULL;
   int int_port = -1;

   struct timeval now;
   static long next_timeout = 0;
   static int cached_port = -1;
   static bool is_port_from_services_file = false;

   DENTER(GDI_LAYER, "sge_get_qmaster_port");

   sge_mutex_lock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);

   /* check for reresolve timeout */
   gettimeofday(&now,NULL);

   if (next_timeout > 0 ) {
      DPRINTF(("reresolve port timeout in "sge_U32CFormat"\n", sge_u32c( next_timeout - now.tv_sec)));
   }

   /* get port from cache when next_timeout for re-resolving is not reached */
   if (cached_port >= 0 && next_timeout > now.tv_sec) {
      int_port = cached_port;
      if (from_services != NULL) {
         *from_services = is_port_from_services_file;
      }
      DPRINTF(("returning cached port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      sge_mutex_unlock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);
      DEXIT;
      return int_port;
   }

   /* get port from environment variable SGE_QMASTER_PORT */
   port = getenv("SGE_QMASTER_PORT");   
   if (port != NULL) {
      int_port = atoi(port);
      is_port_from_services_file = false;
   }

   /* get port from services file */
   if (int_port <= 0) {
      char buffer[2048];
      struct servent se_result;
      struct servent* se_help = NULL;

      se_help = sge_getservbyname_r(&se_result, "sge_qmaster", buffer, sizeof(buffer));
      if (se_help != NULL) {
         int_port = ntohs(se_help->s_port);
         if (int_port > 0) {
            /* we found a port in the services */
            is_port_from_services_file = true;
            if (from_services != NULL) {
               *from_services = is_port_from_services_file;
            }
         }
      }
   }

   if (int_port <= 0 ) {
      ERROR((SGE_EVENT, MSG_UTI_CANT_GET_ENV_OR_PORT_SS, "SGE_QMASTER_PORT", "sge_qmaster"));
      if ( cached_port > 0 ) {
         WARNING((SGE_EVENT, MSG_UTI_USING_CACHED_PORT_SU, "sge_qmaster", sge_u32c(cached_port) ));
         int_port = cached_port; 
      } else {
         sge_mutex_unlock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);
         SGE_EXIT(NULL, 1);
      }
   } else {
      DPRINTF(("returning port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      /* set new timeout time */
      gettimeofday(&now,NULL);
      next_timeout = now.tv_sec + SGE_PORT_CACHE_TIMEOUT;

      /* set new port value */
      cached_port = int_port;
   }

   sge_mutex_unlock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);
   
   DEXIT;
   return int_port;
}
Beispiel #26
0
static void qevent_start_trigger_script(int qevent_event, const char* script_file, lListElem *event ) {
   u_long jobid, taskid;
   const char* event_name;
   int pid;
   char buffer[MAX_STRING_SIZE];
   char buffer2[MAX_STRING_SIZE];

   DENTER(TOP_LAYER, "qevent_start_trigger_script");

   jobid  = lGetUlong(event, ET_intkey);
   taskid = lGetUlong(event, ET_intkey2);
   event_name = qevent_get_event_name(qevent_event);
   

   /* test if script is executable and valid file */
   if (!sge_is_file(script_file)) {
      ERROR((SGE_EVENT, "no script file: "SFQ"\n", script_file));
      DEXIT;
      return;
   }

   /* is file executable ? */
   if (!sge_is_executable(script_file)) {  
      ERROR((SGE_EVENT, "file not executable: "SFQ"\n", script_file));
      DEXIT;
      return;
   } 

   pid = fork();
   if (pid < 0) {
      ERROR((SGE_EVENT, "fork() error\n"));
      DEXIT;
      return;
   }

   if (pid > 0) {
      int exit_status;

#if !(defined(CRAY) || defined(INTERIX))
         struct rusage rusage;
#endif

#if defined(SVR3) || defined(_BSD)
         union wait status;
#else
         int status;
#endif
#if defined(CRAY) || defined(INTERIX)
         waitpid(pid, &status, 0);
#else
         wait3(&status, 0, &rusage);
#endif
#if defined(SVR3) || defined(_BSD)
         exit_status = status.w_retcode;
#else
         exit_status = status;
#endif

      if ( WEXITSTATUS(exit_status) == 0 ) {
         INFO((SGE_EVENT,"exit status of script: "sge_U32CFormat"\n", sge_u32c(WEXITSTATUS(exit_status))));
      } else {
         ERROR((SGE_EVENT,"exit status of script: "sge_U32CFormat"\n", sge_u32c(WEXITSTATUS(exit_status))));
      }
      DEXIT;
      return;
   } else {
      const char *basename = sge_basename( script_file, '/' );
      /*      SETPGRP;  */
      /*      sge_close_all_fds(NULL); */
      sprintf(buffer  ,sge_U32CFormat,sge_u32c(jobid));
      sprintf(buffer2 ,sge_U32CFormat,sge_u32c(taskid)); 
      execlp(script_file, basename, event_name, buffer, buffer2, (char *)0);
   }
   exit(1);
}
Beispiel #27
0
int
krb_renew_tgts(
lList *joblist 
) {
   krb5_error_code rc;
   static u_long32 next_time = 0;
   u_long32 now = sge_get_gmt();
   lListElem *job;
   krb5_context context = krb_context();
   krb5_timestamp time_now;
   krb_global_data_t *gsd = krb_gsd();

   DENTER(TOP_LAYER, "krb_renew_tgts");


   if ((now = sge_get_gmt())<next_time) {
      DEXIT;
      return 0;
   }

   if ((rc = krb5_timeofday(context, &time_now))) {
      ERROR((SGE_EVENT, MSG_KRB_KRB5TIMEOFDAYFAILEDX_S ,
	     error_message(rc)));
      DEXIT;
      return -1;
   }

   /* renew job TGT's */

   for_each(job, joblist) {

      krb5_error_code rc;
      krb5_creds ** tgt_creds = NULL;
      krb5_data tgtbuf;
      const char *tgtstr = NULL;

      tgtbuf.length = 0;

      /* get TGT out of job entry */

      if ((tgtstr = lGetString(job, JB_tgt))) {

         tgtbuf.data = krb_str2bin(tgtstr, NULL, &tgtbuf.length);

         if (tgtbuf.length) {

            /* decrypt the TGT using the daemon key */

            if ((rc = krb_decrypt_tgt_creds(&tgtbuf, &tgt_creds))) {

               ERROR((SGE_EVENT, MSG_KRB_COULDNOTDECRYPTTGTFORJOBXY_DS,
                      sge_u32c(lGetUlong(job, JB_job_number)),
                      error_message(rc)));

            }

	    if (rc == 0 && tgt_creds) {

               krb5_creds *tgt = *tgt_creds;

	       /*
		* If TGT is renewable and TGT expiration time is not past
		* and is within the SGE renewal threshold and the TGT
		* renewal period is not past, then renew the TGT
		*/

	       if (tgt->ticket_flags & KDC_OPT_RENEWABLE &&
		   tgt->times.endtime > time_now &&
		   tgt->times.renew_till > time_now &&
		   tgt->times.endtime < time_now + gsd->tgt_renew_threshold) {

		  krb5_creds *new_creds[2];
		  krb5_creds creds;

		  memset(new_creds, 0, sizeof(new_creds));
		  memset(&creds, 0 ,sizeof(creds));

		  /* renew the TGT */

		  if (((rc = krb5_copy_principal(context, (*tgt_creds)->server,
			&creds.server))) ||
		      ((rc = krb5_copy_principal(context, (*tgt_creds)->client,
			&creds.client))) ||
		      ((rc = krb5_get_cred_via_tkt(context, tgt,
		        FLAGS2OPTS(tgt->ticket_flags)|KDC_OPT_RENEW,
		        tgt->addresses, &creds, &new_creds[0])))) {

		     ERROR((SGE_EVENT, MSG_KRB_COULDNOTRENEWTGTFORJOBXY_DS, 
                  sge_u32c(lGetUlong(job, JB_job_number)),
			         error_message(rc)));

		  }

		  krb5_free_cred_contents(context, &creds);

		  if (rc == 0) {
                     krb5_data outbuf;

		     /* store the new TGT back into the job entry */

		     outbuf.length = 0;

		     if ((rc = krb_encrypt_tgt_creds(new_creds, &outbuf))) {

			ERROR((SGE_EVENT, MSG_KRB_COULDNOTECRYPTTGTFORJOBXY_DS,
                sge_u32c(lGetUlong(job, JB_job_number)),
			       error_message(rc)));

		     } else {

			lSetString(job, JB_tgt,
			      krb_bin2str(outbuf.data, outbuf.length, NULL));
                     }

		     /* if we are called by the execd, also store the
			new TGT in the credentials cache of the user */

                     if (!strcmp(prognames[EXECD], gsd->progname)) {
                         
                        int retries = MAX_NIS_RETRIES;
			struct passwd *pw = NULL;

			while (retries-- && !pw)
			   pw = getpwnam(lGetString(job, JB_owner));

			if (pw) {

			   if ((krb_store_forwarded_tgt(pw->pw_uid,
				    lGetUlong(job, JB_job_number),
				    new_creds))) {

			       ERROR((SGE_EVENT, MSG_KRB_COULDNOTSTORERENEWEDTGTFORXJOBY_SD,
                                      lGetString(job, JB_owner),
				      sge_u32c(lGetUlong(job, JB_job_number))));
                           }

                        } else {

			   ERROR((SGE_EVENT, MSG_KRB_COULDNOTGETUSERIDFORXY_SD , lGetString(job, JB_owner),
				  sge_u32c(lGetUlong(job, JB_job_number))));
                        }
		     }

		     if (outbuf.length)
			krb5_xfree(outbuf.data);

		  }

      if (!mconf_get_simulate_jobs()) {
         job_write_spool_file(job, 0, NULL, SPOOL_DEFAULT);;
      }

		  if (new_creds[0])
		     krb5_free_creds(context, new_creds[0]);

	       }
	    }
         }

         if (tgtbuf.length)
            krb5_xfree(tgtbuf.data);

         if (tgt_creds)
            krb5_free_tgt_creds(context, tgt_creds);

      }

   }
Beispiel #28
0
static void qevent_testsuite_mode(sge_evc_class_t *evc) 
{
#ifndef QEVENT_SHOW_ALL
   u_long32 timestamp;
   lCondition *where =NULL;
   lEnumeration *what = NULL;
 
   const int job_nm[] = {       
         JB_job_number,
         JB_host,
         JB_category,            
         JB_project, 
         JB_ja_tasks,
         JB_ja_structure,
         JB_ja_n_h_ids,
         JB_ja_u_h_ids,
         JB_ja_s_h_ids,
         JB_ja_o_h_ids,   
         JB_ja_template,
         NoName
      };

   const int jat_nm[] = {     
      JAT_status, 
      JAT_task_number,
      NoName
   };  
#endif
   
   DENTER(TOP_LAYER, "qevent_testsuite_mode");

   sge_mirror_initialize(evc, EV_ID_ANY, "qevent", true,
                         NULL, NULL, NULL, NULL, NULL);

#ifdef QEVENT_SHOW_ALL
   sge_mirror_subscribe(evc, SGE_TYPE_ALL, print_event, NULL, NULL, NULL, NULL);
#else /* QEVENT_SHOW_ALL */
   where = NULL; 
   what =  lIntVector2What(JB_Type, job_nm); 
   sge_mirror_subscribe(evc, SGE_TYPE_JOB, print_jatask_event, NULL, NULL, where, what);
   lFreeWhere(&where);
   lFreeWhat(&what);
   
   where = NULL; 
   what = lIntVector2What(JAT_Type, jat_nm); 
   sge_mirror_subscribe(evc, SGE_TYPE_JATASK, print_jatask_event, NULL, NULL, where, what);
   lFreeWhere(&where);
   lFreeWhat(&what);
 
   /* we want a 5 second event delivery interval */
   evc->ec_set_edtime(evc, 5);

   /* and have our events flushed immediately */
   evc->ec_set_flush(evc, sgeE_JATASK_MOD, true, 1);
   evc->ec_set_flush(evc, sgeE_JOB_FINAL_USAGE, true, 1);
   evc->ec_set_flush(evc, sgeE_JOB_ADD, true, 1);
   evc->ec_set_flush(evc, sgeE_JOB_DEL, true, 1);

#endif /* QEVENT_SHOW_ALL */
   
   while (!shut_me_down) {
      sge_mirror_error error = sge_mirror_process_events(evc);
      if (error == SGE_EM_TIMEOUT && !shut_me_down) {
         sleep(10);
         continue;
      }

#ifndef QEVENT_SHOW_ALL
      timestamp = sge_get_gmt();
      fprintf(stdout,"ECL_STATE (jobs_running=%ld:jobs_registered=%ld:ECL_TIME="sge_U32CFormat")\n",
              Global_jobs_running,Global_jobs_registered,sge_u32c(timestamp));
      fflush(stdout);  
#endif
   }

   sge_mirror_shutdown(evc);

   DEXIT;
}
Beispiel #29
0
/*-------------------------------------------------------------------------*/
int main(int argc, char **argv)
{
   int ret;
   int my_pid;
   int ret_val;
   int printed_points = 0;
   int max_enroll_tries;
   static char tmp_err_file_name[SGE_PATH_MAX];
   time_t next_prof_output = 0;
   int execd_exit_state = 0;
   lList **master_job_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *alp = NULL;

   DENTER_MAIN(TOP_LAYER, "execd");

#if defined(LINUX)
   gen_procList ();
#endif

   prof_mt_init();

   set_thread_name(pthread_self(),"Execd Thread");

   prof_set_level_name(SGE_PROF_CUSTOM1, "Execd Thread", NULL); 
   prof_set_level_name(SGE_PROF_CUSTOM2, "Execd Dispatch", NULL); 

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   /* This needs a better solution */
   umask(022);
      
   /* Initialize path for temporary logging until we chdir to spool */
   my_pid = getpid();
   sprintf(tmp_err_file_name,"%s."sge_U32CFormat"", TMP_ERR_FILE_EXECD, sge_u32c(my_pid));
   log_state_set_log_file(tmp_err_file_name);

   /* exit func for SGE_EXIT() */
   sge_sig_handler_in_main_loop = 0;
   sge_setup_sig_handlers(EXECD);

   if (sge_setup2(&ctx, EXECD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }
   ctx->set_exit_func(ctx, execd_exit_func);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /* prepare daemonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_prepare(ctx);
   }

   if ((ret=sge_occupy_first_three())>=0) {
      CRITICAL((SGE_EVENT, MSG_FILE_REDIRECTFD_I, ret));
      SGE_EXIT((void**)&ctx, 1);
   }

   lInit(nmv);

   /* unset XAUTHORITY if set */
   if (getenv("XAUTHORITY") != NULL) {
      sge_unsetenv("XAUTHORITY");
   }

   parse_cmdline_execd(argv);   
   
   /* exit if we can't get communication handle (bind port) */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[EXECD],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;

      if (max_enroll_tries <= 0 || shut_me_down) {
         /* exit after 30 seconds */
         if (printed_points != 0) {
            printf("\n");
         }
         CRITICAL((SGE_EVENT, MSG_COM_ERROR));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[EXECD],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
        if (max_enroll_tries < 27) {
           printf(".");
           printed_points++;
           fflush(stdout);
        }
      }
   }

   if (printed_points != 0) {
      printf("\n");
   }

   /*
    * now the commlib up and running. Set execd application status function 
    * ( commlib callback function for qping status information response 
    *   messages (SIRM) )
    */
   ret_val = cl_com_set_status_func(sge_execd_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)) );
   }

   /* test connection */
   {
      cl_com_SIRM_t* status = NULL;
      ret_val = cl_commlib_get_endpoint_status(ctx->get_com_handle(ctx),
                                               (char *)ctx->get_master(ctx, true),
                                               (char*)prognames[QMASTER], 1, &status);
      if (ret_val != CL_RETVAL_OK) {
         ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
         ERROR((SGE_EVENT, MSG_CONF_NOCONFBG));
      }
      cl_com_free_sirm_message(&status);
   }
   
   /* finalize daeamonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_finalize(ctx);
   }

   /* daemonizes if qmaster is unreachable */   
   sge_setup_sge_execd(ctx, tmp_err_file_name);

   /* are we using qidle or not */
   sge_ls_qidle(mconf_get_use_qidle());
   sge_ls_gnu_ls(1);
   
   DPRINTF(("use_qidle: %d\n", mconf_get_use_qidle()));

   /* test load sensor (internal or external) */
   {
      lList *report_list = sge_build_load_report(ctx->get_qualified_hostname(ctx), ctx->get_binary_path(ctx));
      lFreeList(&report_list);
   }
   
   /* here we have to wait for qmaster registration */
   while (sge_execd_register_at_qmaster(ctx, false) != 0) {
      if (sge_get_com_error_flag(EXECD, SGE_COM_ACCESS_DENIED, true)) {
         /* This is no error */
         DPRINTF(("*****  got SGE_COM_ACCESS_DENIED from qmaster  *****\n"));
      }
      if (sge_get_com_error_flag(EXECD, SGE_COM_ENDPOINT_NOT_UNIQUE, false)) {
         execd_exit_state = SGE_COM_ENDPOINT_NOT_UNIQUE;
         break;
      }
      if (shut_me_down != 0) {
         break;
      }
      sleep(30);
   }

   /* 
    * Terminate on SIGTERM or hard communication error
    */
   if (execd_exit_state != 0 || shut_me_down != 0) {
      sge_shutdown((void**)&ctx, execd_exit_state);
      DRETURN(execd_exit_state);
   }

   /*
    * We write pid file when we are connected to qmaster. Otherwise an old
    * execd might overwrite our pidfile.
    */
   sge_write_pid(EXECD_PID_FILE);

   /*
    * At this point we are sure we are the only sge_execd and we are connected
    * to the current qmaster. First we have to report any reaped children
    * that might exist.
    */
   starting_up();

   /*
    * Log a warning message if execd hasn't been started by a superuser
    */
   if (!sge_is_start_user_superuser()) {
      WARNING((SGE_EVENT, MSG_SWITCH_USER_NOT_ROOT));
   }   

#ifdef COMPILE_DC
   if (ptf_init()) {
      CRITICAL((SGE_EVENT, MSG_EXECD_NOSTARTPTF));
      SGE_EXIT((void**)&ctx, 1);
   }
   INFO((SGE_EVENT, MSG_EXECD_STARTPDCANDPTF));
#endif

   master_job_list = object_type_get_master_list(SGE_TYPE_JOB);
   *master_job_list = lCreateList("Master_Job_List", JB_Type);
   job_list_read_from_disk(master_job_list, "Master_Job_List",
                           0, SPOOL_WITHIN_EXECD, 
                          job_initialize_job);
   

   /* clean up jobs hanging around (look in active_dir) */
   clean_up_old_jobs(ctx, 1);
   execd_trash_load_report();
   sge_set_flush_lr_flag(true);

   sge_sig_handler_in_main_loop = 1;

   if (thread_prof_active_by_id(pthread_self())) {
      prof_start(SGE_PROF_CUSTOM1, NULL);
      prof_start(SGE_PROF_CUSTOM2, NULL);
      prof_start(SGE_PROF_GDI_REQUEST, NULL);
   } else {
      prof_stop(SGE_PROF_CUSTOM1, NULL);
      prof_stop(SGE_PROF_CUSTOM2, NULL);
      prof_stop(SGE_PROF_GDI_REQUEST, NULL);
   }

   PROF_START_MEASUREMENT(SGE_PROF_CUSTOM1);

   /* Start dispatching */
   execd_exit_state = sge_execd_process_messages(ctx);


   /*
    * This code is only reached when dispatcher terminates and execd goes down.
    */

   /* log if we received SIGPIPE signal */
   if (sge_sig_handler_sigpipe_received) {
       sge_sig_handler_sigpipe_received = 0;
       INFO((SGE_EVENT, "SIGPIPE received\n"));
   }

#if defined(LINUX)
   free_procList();
#endif
   lFreeList(master_job_list);

   PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM1);
   if (prof_is_active(SGE_PROF_ALL)) {
     time_t now = (time_t)sge_get_gmt();

      if (now > next_prof_output) {
         prof_output_info(SGE_PROF_ALL, false, "profiling summary:\n");
         prof_reset(SGE_PROF_ALL,NULL);
         next_prof_output = now + 60;
      }
   }
   sge_prof_cleanup();

   sge_shutdown((void**)&ctx, execd_exit_state);
   DRETURN(execd_exit_state);
}