コード例 #1
0
ファイル: sge_log.c プロジェクト: HPCKP/gridengine
/****** uti/sge_log/sge_do_log() ***********************************************
*  NAME
*     sge_do_log() -- Write message to log file 
*
*  SYNOPSIS
*     static void sge_do_log(int aLevel, const char *aMessage, const char 
*
*  FUNCTION
*     ??? 
*
*  INPUTS
*     int aLevel           - log level
*     const char *aMessage - log message
*
*  RESULT
*     void - none
*
*  NOTES
*     MT-NOTE: sge_do_log() is MT safe.
*
*******************************************************************************/
static void sge_do_log(u_long32 me, const char* progname, const char* unqualified_hostname,
                       int aLevel, const char *aMessage)
{
   int fd;

   if (me == QMASTER || me == EXECD || me == SCHEDD || me == SHADOWD) {
      if ((fd = SGE_OPEN3(log_state_get_log_file(), O_WRONLY | O_APPEND | O_CREAT, 0666)) >= 0) {
         char msg2log[4*MAX_STRING_SIZE];
         dstring msg;
         int len;

         sge_dstring_init(&msg, msg2log, sizeof(msg2log));

         append_time((time_t)sge_get_gmt(), &msg, false);

         sge_dstring_sprintf_append(&msg, "|%6.6s|%s|%c|%s\n",
                 progname,
                 unqualified_hostname,
                 aLevel,
                 aMessage);

         len = strlen(msg2log);
         if (write(fd, msg2log, len) != len) {
            /* we are in error logging here - the only chance to log this problem
             * might be to write it to stderr
             */
            fprintf(stderr, "can't log to file %s: %s\n", log_state_get_log_file(), sge_strerror(errno, &msg));
         }
         close(fd);
      }
   }

   return;
} /* sge_do_log() */
コード例 #2
0
ファイル: qevent.c プロジェクト: HPCKP/gridengine
static void qevent_show_usage(void) {
   dstring ds;
   char buffer[256];
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));

   fprintf(stdout, "%s\n", feature_get_product_name(FS_SHORT_VERSION, &ds));
   fprintf(stdout, "%s\n", MSG_SRC_USAGE );

   fprintf(stdout,"qevent [-h|-help] -ts|-testsuite\n");
   fprintf(stdout,"qevent [-h|-help] -sm|-subscribe\n");
   fprintf(stdout,"qevent [-h|-help] -trigger EVENT SCRIPT [ -trigger EVENT SCRIPT, ... ]\n\n");
   
   fprintf(stdout,"   -h,  -help             show usage\n");
   fprintf(stdout,"   -ts, -testsuite        run in testsuite mode\n");
   fprintf(stdout,"   -sm, -subscribe        run in subscribe mode\n");
   fprintf(stdout,"   -trigger EVENT SCRIPT  start SCRIPT (executable) when EVENT occurs\n");
   fprintf(stdout,"\n");
   fprintf(stdout,"SCRIPT - path to a executable shell script\n");
   fprintf(stdout,"         1. command line argument: event name\n");
   fprintf(stdout,"         2. command line argument: jobid\n");
   fprintf(stdout,"         3. command line argument: taskid\n");
   fprintf(stdout,"EVENT  - One of the following event category:\n");
   fprintf(stdout,"         %s      - job end event\n", qevent_get_event_name(QEVENT_JB_END));
   fprintf(stdout,"         %s - job task end event\n", qevent_get_event_name(QEVENT_JB_TASK_END));
}
コード例 #3
0
ファイル: sge_log.c プロジェクト: ricrogz/gridscheduler
/****** uti/sge_log/sge_do_log() ***********************************************
*  NAME
*     sge_do_log() -- Write message to log file 
*
*  SYNOPSIS
*     static void sge_do_log(int aLevel, const char *aMessage, const char 
*
*  FUNCTION
*     ??? 
*
*  INPUTS
*     int aLevel           - log level
*     const char *aMessage - log message
*
*  RESULT
*     void - none
*
*  NOTES
*     MT-NOTE: sge_do_log() is MT safe.
*
*******************************************************************************/
static void sge_do_log(u_long32 me, const char* progname, const char* unqualified_hostname,
                       int aLevel, const char *aMessage) 
{
   int fd;

   if (me == QMASTER || me == EXECD || me == SCHEDD || me == SHADOWD) {
      if ((fd = SGE_OPEN3(log_state_get_log_file(), O_WRONLY | O_APPEND | O_CREAT, 0666)) >= 0) {
         char msg2log[4*MAX_STRING_SIZE];
         dstring msg;
         
         sge_dstring_init(&msg, msg2log, sizeof(msg2log));

         append_time((time_t)sge_get_gmt(), &msg, false); 

         sge_dstring_sprintf_append(&msg, "|%6.6s|%s|%c|%s\n",
                 progname,
                 unqualified_hostname,
                 aLevel,
                 aMessage);

         write(fd, msg2log, strlen(msg2log));
         close(fd);
      }
   }   

   return;
} /* sge_do_log() */
コード例 #4
0
void xml_addAttributeD(lListElem *xml_elem, const char *name, double value) {
    char buffer[20]="";
    dstring string;

    sge_dstring_init(&string, buffer, 20);
    xml_addAttribute(xml_elem, name, sge_dstring_sprintf(&string, "%f", value));
}
コード例 #5
0
int main(int argc, char *argv[])
{
   bool ret = true;
   dstring dynamic_dstring = DSTRING_INIT;
   dstring static_dstring;
   char    static_buffer[MAX_STRING_SIZE];
   
   sge_dstring_init(&static_dstring, static_buffer, STATIC_SIZE);

   printf("running all checks with a dynamic dstring\n");
   ret = check_all(&dynamic_dstring);
   test_dstring_performance(&dynamic_dstring, 100000, "test_data"); 
   test_dstring_performance_dynamic(100000, "test_data"); 
   printf("%s\n", sge_dstring_get_string(&dynamic_dstring));

   if (ret) {
      printf("\n\nrunning all checks with a static dstring of length %d\n", 
             STATIC_SIZE);
         ret = check_all(&static_dstring);
         test_dstring_performance(&static_dstring, 100000, "test_data"); 
         test_dstring_performance_static(100000, "test_data"); 
         printf("%s\n", sge_dstring_get_string(&static_dstring));
   }



   sge_dstring_free(&dynamic_dstring);

   return ret ? EXIT_SUCCESS : EXIT_FAILURE;
}
コード例 #6
0
ファイル: shadowd.c プロジェクト: StephenDennis/gridengine
/*---------------------------------------------------------------------
 * parse_cmdline_shadowd
 *---------------------------------------------------------------------*/
static int parse_cmdline_shadowd(
int argc,
char **argv 
) {
   dstring ds;
   char buffer[256];

   DENTER(TOP_LAYER, "parse_cmdline_shadowd");

   sge_dstring_init(&ds, buffer, sizeof(buffer));
   /*
   ** -help
   */
   if ((argc == 2) && !strcmp(argv[1],"-help")) {
#define PRINTITD(o,d) print_option_syntax(stdout,o,d)

      fprintf(stdout, "%s\n", feature_get_product_name(FS_SHORT_VERSION, &ds));

      fprintf(stdout, "%s sge_shadowd [options]\n", MSG_GDI_USAGE_USAGESTRING);

      PRINTITD(MSG_GDI_USAGE_help_OPT , MSG_GDI_UTEXT_help_OPT );
      DRETURN(1);
   }

   DRETURN(0);
}
コード例 #7
0
ファイル: qmon_main.c プロジェクト: HPCKP/gridengine
/*-------------------------------------------------------------------------*/
static void qmonUsage(Widget w)
{
   dstring ds;
   char buffer[256];

   DENTER(GUI_LAYER, "qmonUsage");

   sge_dstring_init(&ds, buffer, sizeof(buffer));

   printf("%s %s\n", GE_SHORTNAME, GDI_VERSION);
/*    printf("%s\n", feature_get_product_name(FS_SHORT_VERSION, &ds)); */
   printf(XmtLocalize2(w, "usage: qmon\n", "qmon_usage", "usageTitle"));
   printf("	[-cmap]                           ");
   printf(XmtLocalize2(w, "use own colormap\n", "qmon_usage", "cmapOption"));
   printf("	[-help]                           ");
   printf(XmtLocalize2(w, "show this information and exit\n", 
                           "qmon_usage", "helpOption"));
   printf("	[-fontFamily {big|medium|small}]  ");
   printf(XmtLocalize2(w, "use small/medium/big fonts\n", 
                           "qmon_usage", "fontFamilyOption"));
   printf("	[-nologo]                         ");
   printf(XmtLocalize2(w, "startup without logo\n",
                           "qmon_usage", "nologoOption"));
   printf(XmtLocalize2(w, "Additionally the default X commandline switches can be used.\nFor further information see the manual page X(1)\n", 
          "qmon_usage", "X11OptionInfo"));

   DEXIT;
}
コード例 #8
0
ファイル: tmpdir.c プロジェクト: HPCKP/gridengine
int sge_remove_tmpdir(const char *dir, const char *job_owner, u_long32 jobid, u_long32 jataskid, const char *queue_name)
{
   stringT tmpstr;
   char err_str_buffer[1024];
   dstring err_str;

   DENTER(TOP_LAYER, "sge_remove_tmpdir");

   sge_dstring_init(&err_str, err_str_buffer, sizeof(err_str_buffer));

   if (!dir) {
      DRETURN(0);
   }

   sprintf(tmpstr, "%s/"sge_u32"."sge_u32".%s", dir, jobid, jataskid, queue_name);
   DPRINTF(("recursively unlinking \"%s\"\n", tmpstr));
   sge_switch2start_user();
   if (sge_rmdir(tmpstr, &err_str)) {
      ERROR((SGE_EVENT, MSG_FILE_RECURSIVERMDIR_SS, 
             tmpstr, err_str_buffer));
      sge_switch2admin_user();
      DRETURN(-1);
   }
   sge_switch2admin_user();

   DRETURN(0);
}
コード例 #9
0
ファイル: qevent.c プロジェクト: HPCKP/gridengine
static sge_callback_result
analyze_jatask_event(sge_evc_class_t *evc, object_description *object_base,sge_object_type type, 
                     sge_event_action action, lListElem *event, void *clientdata)
{
   char buffer[1024];
   dstring buffer_wrapper;

   sge_dstring_init(&buffer_wrapper, buffer, sizeof(buffer));
   
   if (lGetPosViaElem(event, ET_type, SGE_NO_ABORT) >= 0) {
      u_long32 type = lGetUlong(event, ET_type);

      if (type == sgeE_JATASK_MOD) { 
         lList *jat = lGetList(event,ET_new_version);
         lListElem *ep = lFirst(jat);
         u_long job_status = lGetUlong(ep, JAT_status);
         int task_running = (job_status==JRUNNING || job_status==JTRANSFERING);
         if (task_running) {
         }
      }

      if (type == sgeE_JOB_FINAL_USAGE) { 
      }

      if (type == sgeE_JOB_ADD) { 
         /* lList *jat = lGetList(event,ET_new_version);
         u_long job_id  = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         lListElem *ep = lFirst(jat); */
      }

      if (type == sgeE_JOB_DEL) { 
         qevent_trigger_scripts(QEVENT_JB_END, qevent_get_option_struct(), event);
      }

      if (type == sgeE_JATASK_DEL) { 
         qevent_trigger_scripts(QEVENT_JB_TASK_END,qevent_get_option_struct() , event);
      }


   }
   /* create a callback error to test error handling */
   if(type == SGE_TYPE_GLOBAL_CONFIG) {
      return SGE_EMA_FAILURE;
   }
   
   return SGE_EMA_OK;
}
コード例 #10
0
ファイル: err_trace.c プロジェクト: BlueBolt/BB_GridEngine
static void shepherd_panic(const char *s)
{
   FILE *panic_fp;
   char panic_file[255];

   sprintf(panic_file, "/tmp/shepherd."pid_t_fmt, getpid());
   panic_fp = fopen(panic_file, "a");
   if (panic_fp) {
      dstring ds;
      char buffer[128];

      sge_dstring_init(&ds, buffer, sizeof(buffer));
      fprintf(panic_fp, "%s ["uid_t_fmt":"uid_t_fmt" "pid_t_fmt"]: PANIC: %s\n",
           sge_ctime(0, &ds), getuid(), geteuid(), getpid(), s);
      FCLOSE(panic_fp);
   }
FCLOSE_ERROR:
   return;
}
コード例 #11
0
static void test_dstring_performance_static(int max, const char *data)
{
   int i;
   struct timeval before;
   struct timeval after;
   double time;

   gettimeofday(&before, NULL);
   for (i = 0; i < max; i++) {
      dstring ds;
      char ds_buffer[MAX_STRING_SIZE];
      sge_dstring_init(&ds, ds_buffer, sizeof(ds_buffer));
      sge_dstring_sprintf(&ds, "%s/%s", data, data);
   }
   gettimeofday(&after, NULL);

   time = after.tv_usec - before.tv_usec;
   time = after.tv_sec - before.tv_sec + (time/1000000);

   printf("%d static dstring creations took %.2fs\n", max, time);
}
コード例 #12
0
void qmonAboutMsg(Widget w, XtPointer cld, XtPointer cad)
{
#if 0

   dstring ds;
   char buffer[256];
   const char* username = ctx->get_username(ctx);
   const char* qualified_hostname = ctx->get_qualified_hostname(ctx);
   const char* default_cell = ctx->get_default_cell(ctx);

   DENTER(TOP_LAYER, "qmonAboutMsg");
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));
   XmtDisplayMessage(w, "about_msg", "Help", header, 
                     "About Qmon", NULL, None, XmDIALOG_MODELESS,
                     XmDIALOG_INFORMATION, 
                     username, qualified_hostname, 
                     feature_get_product_name(FS_LONG_VERSION, &ds), 
                     default_cell, 
                     XmtLocalize(w, mailto, "mailto_msg"), SFLN_ELN); 
   sge_dstring_free(&ds);
   DEXIT;
#else
   char buffer[256];
   const char* username = ctx->get_username(ctx);
   const char* qualified_hostname = ctx->get_qualified_hostname(ctx);
   const char* default_cell = ctx->get_default_cell(ctx);

   DENTER(TOP_LAYER, "qmonAboutMsg");
   sprintf(buffer, "%s %s", GE_LONGNAME, GDI_VERSION);  
   XmtDisplayMessage(w, "about_msg", "Help", header, 
                     "About Qmon", NULL, None, XmDIALOG_MODELESS,
                     XmDIALOG_INFORMATION, 
                     username, qualified_hostname, 
                     buffer, 
                     default_cell, 
                     XmtLocalize(w, mailto, "mailto_msg"), SFLN_ELN); 
   DEXIT;
#endif
}
コード例 #13
0
ファイル: qevent.c プロジェクト: HPCKP/gridengine
static sge_callback_result 
print_event(sge_evc_class_t *evc, object_description *object_base, sge_object_type type, 
            sge_event_action action, lListElem *event, void *clientdata)
{
   char buffer[1024];
   dstring buffer_wrapper;

   DENTER(TOP_LAYER, "print_event");

   sge_dstring_init(&buffer_wrapper, buffer, sizeof(buffer));

   fprintf(stdout, "%s\n", event_text(event, &buffer_wrapper));
   fflush(stdout);
   /* create a callback error to test error handling */
   if(type == SGE_TYPE_GLOBAL_CONFIG) {
      DEXIT;
      return SGE_EMA_FAILURE;
   }
   
   DEXIT;
   return SGE_EMA_OK;
}
コード例 #14
0
/****** sge_gdi_packet/sge_gdi_packet_verify_version() ************************
*  NAME
*     sge_gdi_packet_verify_version() -- verify packet version
*
*  SYNOPSIS
*     bool sge_gdi_packet_verify_version(sge_gdi_packet_class_t *packet,
*                                        lList **alpp)
*
*  FUNCTION
*     This function is the replacement for the function
*     verify_request_version() which was part of the source code
*     before the packet structure was introduced.
*
*     It compares the version information of the provided "packet"
*     with the compiledin version number GRM_GDI_VERSION.
*
*     If both versions are not the same then it tries to find
*     if the client which provided us with this packet structure
*     has a higer version number or the binary executing
*     this function. In both cases the answer_list will
*     be filled with an appropriate message.
*
*  INPUTS
*     sge_gdi_packet_class_t *packet - packet
*     lList **alpp                   - answer list
*
*  RESULT
*     bool - error state
*        true  - same version
*        false - differnet version numbers
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_verify_version() is not MT safe
******************************************************************************/
bool
sge_gdi_packet_verify_version(sge_gdi_packet_class_t * packet, lList **alpp)
{
   bool ret = true;
   u_long32 version = packet->version;

   DENTER(TOP_LAYER, "sge_gdi_packet_verify_version");

   if (version != GRM_GDI_VERSION)
   {
      char *client_version = NULL;
      dstring ds;
      char buffer[256];
      const vdict_t *vp;
      const vdict_t *vdict = GRM_GDI_VERSION_ARRAY;

      sge_dstring_init(&ds, buffer, sizeof(buffer));

      for (vp = &vdict[0]; vp->version; vp++) {
         if (version == vp->version) {
            client_version = vp->release;
         }
      }

      if (client_version) {
         WARNING((SGE_EVENT, MSG_GDI_WRONG_GDI_SSISS, packet->host,
                  packet->commproc, (int)(packet->id), client_version,
                  feature_get_product_name(FS_VERSION, &ds)));
      } else {
         WARNING((SGE_EVENT, MSG_GDI_WRONG_GDI_SSIUS, packet->host,
                  packet->commproc, (int)(packet->id), sge_u32c(version),
                  feature_get_product_name(FS_VERSION, &ds)));
      }
      answer_list_add(alpp, SGE_EVENT, STATUS_EVERSION, ANSWER_QUALITY_ERROR);
      ret = false;
   }
   DRETURN(ret);
}
コード例 #15
0
/*-------------------------------------------------------------------------*/
static void set_TimeInput(
Widget w,
XtPointer address,
XrmQuark type,
Cardinal size 
) {
   int value = 0;
   String str = NULL;
   dstring ds;
   char buffer[128];

   sge_dstring_init(&ds, buffer, sizeof(buffer));

   if (type != QmonQCardinal )  {
      XmtWarningMsg("XmtDialogSetDialogValues", "TimeInput",
         "Type Mismatch: Widget '%s':\n\tCan't set widget values"
         " from a resource of type '%s'",
          XtName(w), XrmQuarkToString(type));

      return;
   }

   if (size == sizeof(Cardinal))
       value = *(Cardinal*) address;
   else
      return;

   if (value != 0 && sge_at_time(value, &ds)!=NULL) 
      str = buffer;

   if (str)
      XmtInputFieldSetString(w, str);
   else
      XmtInputFieldSetString(w, "");
      
}
コード例 #16
0
static bool add_job(int job_id)
{
   bool write_ok;
   lListElem *job;
   lList *answer_list = NULL;
   lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB);

   const char *key;
   dstring key_dstring;
   char key_buffer[100];

   sge_dstring_init(&key_dstring, key_buffer, sizeof(key_buffer));

   job = lAddElemUlong(&master_job_list, JB_job_number, job_id, JB_Type);
   key = job_get_key(job_id, 0, NULL, &key_dstring);
#if LOCAL_TRANSACTION
   spool_transaction(&answer_list, spool_get_default_context(),
                     STC_begin); 
   answer_list_output(&answer_list);
#endif
   write_ok = spool_write_object(&answer_list, spool_get_default_context(),
                                job, key, SGE_TYPE_JOB, false);
   answer_list_output(&answer_list);

   if (delay > 0) {
      sge_usleep(delay * 1000);
   }

#if LOCAL_TRANSACTION
   spool_transaction(&answer_list, spool_get_default_context(),
                     write_ok ? STC_commit : STC_rollback); 
   answer_list_output(&answer_list);
#endif

   return write_ok;
}
コード例 #17
0
ファイル: qevent.c プロジェクト: HPCKP/gridengine
static sge_callback_result
print_jatask_event(sge_evc_class_t *evc, object_description *object_base, sge_object_type type, 
                   sge_event_action action, lListElem *event, void *clientdata)
{
   char buffer[1024];
   dstring buffer_wrapper;

   DENTER(TOP_LAYER, "print_jatask_event");

   sge_dstring_init(&buffer_wrapper, buffer, sizeof(buffer));
   
   DPRINTF(("%s\n", event_text(event, &buffer_wrapper)));
   if (lGetPosViaElem(event, ET_type, SGE_NO_ABORT) >= 0) {
      u_long32 type = lGetUlong(event, ET_type);
      u_long32 timestamp = lGetUlong(event, ET_timestamp);
      
      if (type == sgeE_JATASK_MOD) { 
         lList *jat = lGetList(event,ET_new_version);
         u_long job_id  = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         lListElem *ep = lFirst(jat);
         u_long job_status = lGetUlong(ep, JAT_status);
         int task_running = (job_status==JRUNNING || job_status==JTRANSFERING);

         if (task_running) {
            fprintf(stdout,"JOB_START (%ld.%ld:ECL_TIME="sge_U32CFormat")\n", job_id ,task_id,sge_u32c(timestamp));
            fflush(stdout);  
            Global_jobs_running++;
         }
      }
   
      if (type == sgeE_JOB_FINAL_USAGE) { 
         /* lList *jat = lGetList(event,ET_new_version); */
         u_long job_id = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         /* lWriteElemTo(event, stdout); */
         fprintf(stdout,"JOB_FINISH (%ld.%ld:ECL_TIME="sge_U32CFormat")\n", job_id, task_id,sge_u32c(timestamp));
         Global_jobs_running--;
         fflush(stdout);  
      }
      if (type == sgeE_JOB_ADD) { 
         lList *jat = lGetList(event,ET_new_version);
         u_long job_id  = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         lListElem *ep = lFirst(jat);
         const char* job_project = lGetString(ep, JB_project);
         if (job_project == NULL) {
            job_project = "NONE";
         }
         fprintf(stdout,"JOB_ADD (%ld.%ld:ECL_TIME="sge_U32CFormat":project=%s)\n", job_id, task_id, sge_u32c(timestamp),job_project);
         Global_jobs_registered++;
         fflush(stdout);  
      }
      if (type == sgeE_JOB_DEL) { 
         u_long job_id  = lGetUlong(event, ET_intkey);
         u_long task_id = lGetUlong(event, ET_intkey2);
         fprintf(stdout,"JOB_DEL (%ld.%ld:ECL_TIME="sge_U32CFormat")\n", job_id, task_id,sge_u32c(timestamp));
         Global_jobs_registered--;
         fflush(stdout);  
      }

   }
   /* create a callback error to test error handling */
   if(type == SGE_TYPE_GLOBAL_CONFIG) {
      DEXIT;
      return SGE_EMA_FAILURE;
   }
   
   DEXIT;
   return SGE_EMA_OK;
}
コード例 #18
0
ファイル: err_trace.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd_trace ********************************************************
*  NAME
*     shepherd_trace() -- Write line to trace file.
*
*  SYNOPSIS
*     int shepherd_trace(const char *format, ...) 
*
*  FUNCTION
*     Writes a line to the trace file, preceding it with a date, time, uid
*     and pid stamp.
*
*  INPUTS
*     format: The format string of the line to be written to the error file.
*     ...: The parameters to the format string. See printf(3c).
*
*  RESULT
*     int - 0 if successful, 1 if an error occured.
*******************************************************************************/
int shepherd_trace(const char *format, ...) 
{
   int ret = 1, old_cancelstate;
   struct stat statbuf;

   /* Protect the trace file pointer with a mutex */
   pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_cancelstate);
   pthread_mutex_lock(&g_trace_mutex);

   /* File was closed (e.g. by an exec()) but fp was not set to NULL */
   if (shepherd_trace_fp && fstat(fileno(shepherd_trace_fp), &statbuf) == -1 && errno == EBADF)
   {
      shepherd_trace_fp = NULL;
   }
	
   if (shepherd_trace_fp == NULL)
   {
      shepherd_trace_fp = shepherd_trace_init_intern(st_trace);
   }

   if (shepherd_trace_fp != NULL)
   {
      char buffer[128], header_str[256];
      dstring ds, message = DSTRING_INIT;

      sge_dstring_init(&ds, buffer, sizeof(buffer));

      sprintf(header_str, "%s ["uid_t_fmt":"pid_t_fmt"]: ",
			sge_ctime(0, &ds), geteuid(), getpid());
     
      if (format != NULL)
      {
         va_list     ap;

         va_start(ap, format);
         sge_dstring_vsprintf(&message, format, ap);
         va_end(ap);

         ret = sh_str2file(header_str, sge_dstring_get_string(&message), 
                           shepherd_trace_fp);

         if (foreground)
         {
            printf("%s%s\n", header_str, sge_dstring_get_string(&message));
            fflush(stdout);
         }
         sge_dstring_free(&message);
      }
      /* There are cases where we have to open and close the files 
       * for every write.
       */
      if (!g_keep_files_open)
      {
         shepherd_trace_exit();
      }

      ret=0;	
   }

   pthread_mutex_unlock(&g_trace_mutex);
   pthread_setcancelstate(old_cancelstate, NULL);
   return ret;
}
コード例 #19
0
ファイル: err_trace.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd_error ********************************************************
*  NAME
*     shepherd_error() -- Write a line to the error file and exit program.
*
*  SYNOPSIS
*     void shepherd_error(bool do_exit, const char *format, ...)
*
*  FUNCTION
*     Writes a line to the error file, preceding it with a
*     date, time, uid and pid stamp, and exits the program. stops execution.
*
*  INPUTS
*     do_exit: If true, this function calls exit(2).
*     format: The format string of the line to be written to the error file.
*     ...: The parameters to the format string. See printf(3c).
*
*  RESULT
*     void - none
*******************************************************************************/
void shepherd_error(int do_exit, const char *format, ...)
{
   dstring     ds;
   dstring     message = DSTRING_INIT;
   char        buffer[128];
   char        header_str[256];
   struct stat statbuf;

   if (format != NULL)
   {
      va_list     ap;

      va_start(ap, format);
      sge_dstring_vsprintf(&message, format, ap);
      va_end(ap);
   }

   shepherd_trace(sge_dstring_get_string(&message));

   /* File was closed (e.g. by an exec()) but fp was not set to NULL */
   if (shepherd_error_fp && fstat(fileno(shepherd_error_fp), &statbuf) == -1 && errno==EBADF)
   {
      shepherd_error_fp = NULL;
   }

   if (shepherd_error_fp == NULL)
   {
      shepherd_error_fp = shepherd_trace_init_intern(st_error);
   }

   if (shepherd_error_fp != NULL)
   {
      sge_dstring_init(&ds, buffer, sizeof(buffer));
      sprintf(header_str, "%s ["uid_t_fmt":"pid_t_fmt"]: ",
              sge_ctime(0, &ds), geteuid(), getpid());

      sh_str2file(header_str, sge_dstring_get_string(&message), shepherd_error_fp);
   }

   if (foreground)
   {
      fprintf(stderr, "%s%s\n", header_str, sge_dstring_get_string(&message));
   }

   /* File was closed (e.g. by an exec()) but fp was not set to NULL */
   if (shepherd_exit_status_fp && fstat(fileno(shepherd_exit_status_fp), &statbuf) == -1 && errno==EBADF )
   {
      shepherd_exit_status_fp = NULL;
   }

   if (shepherd_exit_status_fp == NULL)
   {
      shepherd_exit_status_fp = shepherd_trace_init_intern(st_exit_status);
   }

   if (shepherd_exit_status_fp != NULL)
   {
      sprintf(header_str, "%d", shepherd_state);
      sh_str2file(header_str, NULL, shepherd_exit_status_fp);
   }
	
   if (coshepherd_pid > 0)
   {
      sge_switch2start_user();
      kill(coshepherd_pid, SIGTERM);
      sge_switch2admin_user();
   }   
     
   if (g_new_interactive_job_support == false && 
      search_conf_val("qrsh_control_port") != NULL)
   {
      char buffer[1024];
      snprintf(buffer, sizeof(buffer), "1:%s", sge_dstring_get_string(&message));
      write_to_qrsh(buffer);  
   }
   sge_dstring_free(&message);

   if (do_exit)
   {
      /* close all trace files before exit */
      shepherd_trace_exit();
      exit(shepherd_state);
   }

   /* There are cases where we have to open and close the files 
    * for every write.
    */
   if (!g_keep_files_open)
   {
      shepherd_error_exit();
   }
}
コード例 #20
0
ファイル: err_trace.c プロジェクト: BlueBolt/BB_GridEngine
/*-----------------------------------------------------------------*/
static int sh_str2file(const char *header_str, const char *str, FILE* fp) 
{
	int     ret = 1;
   int     ret_fp = -1;
   int     ret_fl = EOF;
   dstring ds;
   char    buffer[128];
   uid_t   old_euid = SGE_SUPERUSER_UID;

	if (fp) {
      /*
       * Work around for CR 6293411:
       * See shepherd_trace_exit() for details.
       */
      if (getuid() == SGE_SUPERUSER_UID) {
         old_euid = geteuid();
         seteuid(SGE_SUPERUSER_UID);
      }

   	if (!str && !header_str) {
      	ret_fp = fprintf(fp, "function sh_str2file() called with "
                              "NULL arguments\n");
      } else if (!header_str && str) {
      	ret_fp = fprintf(fp, "%s\n", str);
   	} else if (header_str && !str) {
      	ret_fp = fprintf(fp, "%s\n", header_str);
   	} else {
      	ret_fp = fprintf(fp, "%s%s\n", header_str, str);
      }

      if (ret_fp >= 0) {
		   ret_fl = fflush(fp);
         if (ret_fl == 0) {
		      ret = 0;
         }
      }
      /*
       * Switch back to admin user?
       */
      if (old_euid != SGE_SUPERUSER_UID) {
         seteuid(old_euid);
         old_euid = SGE_SUPERUSER_UID;
      }

      /*
       * PANIC! Can't write to trace/error/exit_status file!
       */
      if (ret_fp < 0) {
         sge_dstring_init(&ds, buffer, sizeof(buffer));
         sge_dstring_sprintf(&ds, "fprintf(%x,%s,%s) failed: %s", 
                             fp, header_str?header_str:"<null>", 
                             str?str:"<null>", strerror(errno));
         shepherd_panic(buffer);
      }
      if(ret_fl != 0) {
         sge_dstring_init(&ds, buffer, sizeof(buffer));
         sge_dstring_sprintf(&ds, "fflush(%x) failed: %s", 
                             fp, strerror(errno));
         shepherd_panic(buffer);
      }
	}
   return ret;
}
コード例 #21
0
ファイル: sge_monitor.c プロジェクト: HPCKP/gridengine
/****** uti/monitor/sge_monitor_status() ***************************************
*  NAME
*     sge_monitor_status() -- generates the status for qping / commlib
*
*  SYNOPSIS
*     u_long32 sge_monitor_status(char **info_message, u_long32 monitor_time) 
*
*  FUNCTION
*     This method creats the health monitoring output and returns the monitoring
*     info to the commlib. 
*
*  INPUTS
*     char **info_message   - info_message pointer, has to point to a NULL string
*     u_long32 monitor_time - the configured monitoring interval
*
*  RESULT
*     u_long32 - 0 : everything is okay
*                1 : warning
*                2 : error
*                3 : init problems
*
*  NOTES
*     MT-NOTE: sge_monitor_status() is MT safe 
*
*******************************************************************************/
u_long32 sge_monitor_status(char **info_message, u_long32 monitor_time)
{
   u_long32 ret = 0;
   char date[40];
   dstring ddate;
   
   DENTER(GDI_LAYER, "sge_monitor_status");

   if (info_message == NULL) {
      DEXIT;
      return 3;
   }

   sge_dstring_init(&ddate, date, sizeof(date));
   
   sge_mutex_lock("sge_monitor_status", SGE_FUNC, __LINE__, &global_mutex);

   sge_dstring_clear(&Info_Line);
   
   {/* this is the qping info section, it checks if each thread is still alive */
      int i;
      int error_count = 0;
      struct timeval now;
      double time;
      char   state = 'R';
      gettimeofday(&now,NULL);
      
      for (i = 0; i < MAX_OUTPUT_LINES; i++) {
         sge_mutex_lock("sge_monitor_status", SGE_FUNC, __LINE__, &(Output[i].Output_Mutex));
         if (Output[i].name != NULL) {
            time = now.tv_usec - Output[i].last_wait_time.tv_usec;
            time = now.tv_sec - Output[i].last_wait_time.tv_sec + (time /1000000);

            
            if (Output[i].warning_timeout !=  NO_WARNING) {
               if (Output[i].warning_timeout < time) {
                  if (Output[i].error_timeout < time) {
                     state = 'E';
                  }
                  else {
                     state = 'W';
                  }
                  error_count++;
               }
            } 
            sge_dstring_sprintf_append(&Info_Line, MSG_UTI_MONITOR_INFO_SCF, Output[i].name, state, time);
         }
         sge_mutex_unlock("sge_monitor_status", SGE_FUNC, __LINE__, &(Output[i].Output_Mutex));
      }

      if (error_count == 0) {
         sge_dstring_append(&Info_Line, MSG_UTI_MONITOR_OK);
      }
      else if (error_count == 1) {
         ret = 1;
         sge_dstring_append(&Info_Line, MSG_UTI_MONITOR_WARNING);
      }
      else {
         ret = 2;
         sge_dstring_append(&Info_Line, MSG_UTI_MONITOR_ERROR);
      }
      sge_dstring_append(&Info_Line, "\n");
   }

#if defined(LINUX) || defined(AIX43) || defined(AIX51) || defined(IRIX) || defined(SOLARIS) || defined(HP11)
   if (mallinfo_func_pointer != NULL) {
      struct mallinfo mallinfo_data = mallinfo_func_pointer();

      sge_dstring_sprintf_append(&Info_Line, MSG_UTI_MONITOR_SCHEXT_UUUUUUUUUU,
                                 mallinfo_data.arena,
                                 mallinfo_data.ordblks,
                                 mallinfo_data.smblks,
                                 mallinfo_data.hblks,
                                 mallinfo_data.hblkhd,
                                 mallinfo_data.usmblks,
                                 mallinfo_data.fsmblks,
                                 mallinfo_data.uordblks,
                                 mallinfo_data.fordblks,
                                 mallinfo_data.keepcost);
      sge_dstring_append(&Info_Line, "\n");
   }
#endif
 

   if (monitor_time != 0) { /* generates the output monitoring output data */
      int i;
      sge_dstring_append(&Info_Line, MSG_UTI_MONITOR_COLON); 
      sge_dstring_append(&Info_Line, "\n");

      for (i = 0; i < MAX_OUTPUT_LINES; i++) {
         sge_mutex_lock("sge_monitor_status", SGE_FUNC, __LINE__, &(Output[i].Output_Mutex));
         if (Output[i].name != NULL) {
            append_time(Output[i].update_time, &Info_Line, false);
            sge_dstring_append(&Info_Line, " | ");
            sge_dstring_append_dstring(&Info_Line, Output[i].output);
            sge_dstring_append(&Info_Line,"\n");
         }
         sge_mutex_unlock("sge_monitor_status", SGE_FUNC, __LINE__, &(Output[i].Output_Mutex));
      }
   }
   else {
      sge_dstring_append(&Info_Line, MSG_UTI_MONITOR_DISABLED);
      sge_dstring_append(&Info_Line, "\n");
   }

   *info_message = strdup(sge_dstring_get_string(&Info_Line));
  
   sge_mutex_unlock("sge_monitor_status", SGE_FUNC, __LINE__, &global_mutex);
   DEXIT;
   return ret;
}
コード例 #22
0
ファイル: sge_pe_task_mirror.c プロジェクト: HPCKP/gridengine
/****** Eventmirror/pe_task/pe_task_update_master_list() ***********************
*  NAME
*     pe_task_update_master_list() -- update parallel tasks of an array task
*
*  SYNOPSIS
*     bool 
*     pe_task_update_master_list(sge_object_type type, sge_event_action action, 
*                                lListElem *event, void *clientdata) 
*
*  FUNCTION
*     Update the list of parallel tasks of an array task
*     based on an event.
*     The function is called from the event mirroring interface.
*
*     The scaled usage list of a parallel task is not updated
*     by this function, as this data is maintained by a 
*     separate event.
*
*  INPUTS
*     sge_object_type type     - event type
*     sge_event_action action - action to perform
*     lListElem *event        - the raw event
*     void *clientdata        - client data
*
*  RESULT
*     bool - true, if update is successfull, else false
*
*  NOTES
*     The function should only be called from the event mirror interface.
*
*  SEE ALSO
*     Eventmirror/--Eventmirror
*     Eventmirror/sge_mirror_update_master_list()
*******************************************************************************/
sge_callback_result
pe_task_update_master_list(sge_evc_class_t *evc, object_description *object_base, sge_object_type type, 
                           sge_event_action action, lListElem *event, void *clientdata)
{
   u_long32 job_id; 
   lListElem *job = NULL; 

   const char *pe_task_id = NULL;     
   lListElem *pe_task = NULL;

   u_long32 ja_task_id;
   lListElem *ja_task = NULL; 
   lList *pe_task_list = NULL;
   const lDescr *pe_task_descr = NULL;

   lList *usage = NULL;

   char id_buffer[MAX_STRING_SIZE];
   dstring id_dstring;

   DENTER(TOP_LAYER, "pe_task_update_master_list");

   sge_dstring_init(&id_dstring, id_buffer, MAX_STRING_SIZE);

   job_id = lGetUlong(event, ET_intkey);
   ja_task_id = lGetUlong(event, ET_intkey2);
   pe_task_id = lGetString(event, ET_strkey);
   
   job = job_list_locate(*sge_master_list(object_base, SGE_TYPE_JOB), job_id);
   if (job == NULL) {
      ERROR((SGE_EVENT, MSG_JOB_CANTFINDJOBFORUPDATEIN_SS, 
             job_get_id_string(job_id, 0, NULL, &id_dstring), SGE_FUNC));
      DEXIT;
      return SGE_EMA_FAILURE;
   }
   
   ja_task = job_search_task(job, NULL, ja_task_id);
   if (ja_task == NULL) {
      ERROR((SGE_EVENT, MSG_JOB_CANTFINDJATASKFORUPDATEIN_SS, 
             job_get_id_string(job_id, ja_task_id, NULL, &id_dstring), SGE_FUNC));
      DEXIT;
      return SGE_EMA_FAILURE;
   }
   
   pe_task = ja_task_search_pe_task(ja_task, pe_task_id);

   pe_task_list = lGetList(ja_task, JAT_task_list);
   pe_task_descr = lGetListDescr(lGetList(event, ET_new_version)); 
  
   if (action == SGE_EMA_MOD) {
      /* modify event for pe_task.
       * we may not update
       * - PET_scaled_usage - it is maintained by JOB_USAGE events
       */
      if (pe_task == NULL) {
         ERROR((SGE_EVENT, MSG_JOB_CANTFINDPETASKFORUPDATEIN_SS, 
                job_get_id_string(job_id, ja_task_id, pe_task_id, &id_dstring), SGE_FUNC));
         DEXIT;
         return SGE_EMA_FAILURE;
      }
      lXchgList(pe_task, PET_scaled_usage, &usage);
   }
 
   if (sge_mirror_update_master_list(&pe_task_list, pe_task_descr, pe_task, 
                                     job_get_id_string(job_id, ja_task_id, 
                                                       pe_task_id, &id_dstring),
                                     action, event) != SGE_EM_OK) {
      lFreeList(&usage);
      DEXIT;
      return SGE_EMA_FAILURE;
   }

   /* restore pe_task list after modify event */
   if (action == SGE_EMA_MOD) {
      pe_task = ja_task_search_pe_task(ja_task, pe_task_id);
      if (pe_task == NULL) {
         ERROR((SGE_EVENT, MSG_JOB_CANTFINDPETASKFORUPDATEIN_SS, 
                job_get_id_string(job_id, ja_task_id, pe_task_id, &id_dstring), SGE_FUNC));
         lFreeList(&usage);       
         DEXIT;
         return SGE_EMA_FAILURE;
      }

      lXchgList(pe_task, PET_scaled_usage, &usage);
      lFreeList(&usage);
   }

   /* first petask add event could have created new pe_task list for job */
   if (lGetList(ja_task, JAT_task_list) == NULL && pe_task_list != NULL) {
      lSetList(ja_task, JAT_task_list, pe_task_list);
   }

   DEXIT;
   return SGE_EMA_OK;
}
コード例 #23
0
ファイル: err_trace.c プロジェクト: BlueBolt/BB_GridEngine
/****** err_trace/shepherd_trace_init_intern() *******************************
*  NAME
*     shepherd_trace_init_intern() -- Initialize shepherd's tracing. 
*
*  SYNOPSIS
*     static FILE* shepherd_trace_init(char *trace_file_path,
*													char *trace_file_name)
*
*  FUNCTION
*     Opens the shepherd's trace file and sets the FD_CLOEXEC-flag so it will
*     be closed automatically in an exec()-call.
*     Must be called with euid=admin user to work properly!
*
*  INPUTS
*     char *trace_file_path - either the whole path of the trace file (including
*                             the file itself)
*                             or NULL to retrieve the file pointer of an already
*                             opened trace file.
*     char *trace_file_name - the name of the trace file itself. Ignored when
*                             *trace_file_path is NULL.
* 
*  RESULT
*     FILE* - If successfully opened, the file pointer of shepherd's trace file.
*           - Otherwise NULL.
*******************************************************************************/
static FILE* shepherd_trace_init_intern(st_shepherd_file_t shepherd_file)
{
   static char     path[SGE_PATH_MAX];
   static bool     called = false;
   SGE_STRUCT_STAT statbuf;
   dstring         ds;
   char            buffer[SGE_PATH_MAX+128];
   char            tmppath[SGE_PATH_MAX];
   int             fd       = -1;
   FILE            *fp      = NULL;
   int             do_chown = 0;

  	/* 
  	 *  after changing into the jobs cwd we need an 
  	 *  absolute path to the error/trace file 
  	 */
	if (called == false) { 
      getcwd(path, sizeof(path)); 
		called=true;
	}

  	snprintf(tmppath, SGE_PATH_MAX,"%s/%s",path, g_shepherd_file_name[shepherd_file]);
   sge_strlcpy(g_shepherd_file_path[shepherd_file], tmppath, SGE_PATH_MAX);

	/* If the file does not exist, create it. Otherwise just open it. */
	if (SGE_STAT(tmppath, &statbuf)) {
	  fd = SGE_OPEN3(tmppath, O_RDWR | O_CREAT | O_APPEND, 0644);
      if (fd<0) {
         sge_dstring_init(&ds, buffer, sizeof(buffer));
         sge_dstring_sprintf(&ds, "creat(%s) failed: %s", tmppath, strerror(errno));
         shepherd_panic(buffer);
      }

      if (getuid() == SGE_SUPERUSER_UID) {
         /* We must give the file to the job owner later */
			do_chown = 1;
		} else {
         /* We are not root, so we have to own all files anyway. */
         do_chown = 0;
		}
	} else {
      /* The file already exists. We get here when
       * a) a exec() failed or
       * b) after the execution of prolog/job, when the job/epilog
       * tries to init the error/exit status files.
       *
       * In a root system we can just open the file, because we are either
       * root or the job user who owns the file.
       * In a admin user system we must set our euid to root to open it, then
       * it is the same as the root system.
       * In a test user system we are the owner of the file and can open it.
       *
       * When we are root (masked or not), we gave this file to the
       * prolog user/job user right after its creation. But we can have
       * 3 different users for prolog, job and epilog, so we must give
       * the file here to the next user.
       * This must be done via shepherd_trace_chown() in the shepherd
       * before we switch to this user there.
       * It can't be done here because we don't know if we are in 
       * case a) (exec failed) or case b) (after execution of prolog/job).
       */
      int  old_euid = SGE_SUPERUSER_UID;

      /*
       * Work around for CR 6293411:
       * See shepherd_trace_exit() for details.
       */
      if (getuid() == SGE_SUPERUSER_UID) {
         old_euid = geteuid();
         seteuid(SGE_SUPERUSER_UID);
      }

      fd = SGE_OPEN2(tmppath, O_RDWR | O_APPEND);
      if (fd<0) {
         sge_dstring_init(&ds, buffer, sizeof(buffer));
         sge_dstring_sprintf(&ds, "open(%s) failed: %s",
                             tmppath, strerror(errno));
         shepherd_panic(buffer);
      }
      do_chown = 0;

      /*
       * Switch back to admin user?
       */
      if (old_euid != SGE_SUPERUSER_UID) {
         seteuid(old_euid);
      }
	}

	/* Something went wrong. */
	if (fd<0) {
		return NULL;
	}

	/* To avoid to block stdin, stdout or stderr, dup the fd until it is >= 3 */
	if (fd<3) {
		dup_fd(&fd);
	}

	/* Set FD_CLOEXEC flag to automatically close the file in an exec() */
	if (!set_cloexec(fd)) {
      shepherd_panic("set_cloexec() failed");
		return NULL;
	}

   /*
	 * Now open a FILE* from the file descriptor, so we can use fprintf().
    */
	fp = fdopen(fd, "a");
   if (!fp) {
      sge_dstring_init(&ds, buffer, sizeof(buffer));
      sge_dstring_sprintf(&ds, "can't open %s file \"%s\": %s\n",
				              g_shepherd_file_name[shepherd_file], tmppath, 
                          strerror(errno));
      shepherd_panic(buffer);
      return NULL;
   }
	if (do_chown && strlen(g_job_owner) > 0) {
		shepherd_trace_chown_intern(g_job_owner, fp, shepherd_file);
	}
	return fp;
}
コード例 #24
0
ファイル: sge_complex_schedd.c プロジェクト: HPCKP/gridengine
/****** sge_select_queue/get_attribute() ***************************************
*  NAME
*     get_attribute() -- looks for an attribut, but only for one level (for host, global, or queue)  
*
*  SYNOPSIS
*     static lListElem* get_attribute(const char *attrname, lList *config_attr, 
*     lList *actual_attr, lList *load_attr, lList *centry_list, lListElem 
*     *queue, lListElem *rep, u_long32 layer, double lc_factor, dstring *reason) 
*
*  FUNCTION
*     Extracts the attribut specified with 'attrname' and finds the 
*     more important one, if it is defined multiple times on the same 
*     level. It only cares about one level.
*     If the attribute is a consumable, one can specify a point in time and a duration.
*     This will get the caller the min amount of that resource during the time frame.
*
*  INPUTS
*     const char *attrname - attribute name one is looking for 
*     lList *config_attr   - user defined attributes (CE_Type)
*     lList *actual_attr   - current usage of consumables (RUE_Type)
*     lList *load_attr     - load attributes 
*     lList *centry_list   - the system wide attribute configuration 
*     lListElem *queue     - the current queue, or null, if one works on hosts 
*     u_long32 layer       - the current layer 
*     double lc_factor     - the load correction value 
*     dstring *reason      - space for error messages or NULL 
*     bool zero_utilization - ???
*     u_long32 start_time  - begin of the time interval, one asks for the resource
*     u_long32 duration    - the duration the interval
*
*  RESULT
*     static lListElem* - the element one was looking for or NULL
*
*******************************************************************************/
lListElem* get_attribute(const char *attrname, lList *config_attr, lList *actual_attr, lList *load_attr, 
   const lList *centry_list, lListElem *queue, u_long32 layer, double lc_factor, dstring *reason,
   bool zero_utilization, u_long32 start_time, u_long32 duration)
{
   lListElem *actual_el=NULL;
   lListElem *load_el=NULL;
   lListElem *cplx_el=NULL;

   DENTER(BASIS_LAYER, "get_attribute");

   /* resource_attr is a complex_entry (CE_Type) */
   if (config_attr) {
      lListElem *temp = lGetElemStr(config_attr, CE_name, attrname);

      if (temp){ 

         cplx_el = lCopyElem(lGetElemStr(centry_list, CE_name, attrname));
         if(!cplx_el){
            /* error */
            DRETURN(NULL);
         }
         lSetUlong(cplx_el, CE_dominant, layer | DOMINANT_TYPE_FIXED);
         lSetUlong(cplx_el, CE_pj_dominant, DOMINANT_TYPE_VALUE);  /* default, no value set */ 
         lSetDouble(cplx_el, CE_doubleval, lGetDouble(temp,CE_doubleval) ); 
         lSetString(cplx_el, CE_stringval, lGetString(temp,CE_stringval) ); 
      }
   }

   if (cplx_el && lGetUlong(cplx_el, CE_consumable) != CONSUMABLE_NO) {
      lSetUlong(cplx_el, CE_pj_dominant, layer | DOMINANT_TYPE_CONSUMABLE);
      lSetUlong(cplx_el, CE_dominant, DOMINANT_TYPE_VALUE);
      /* treat also consumables as fixed attributes when assuming an empty queuing system */
      if (sconf_get_qs_state() == QS_STATE_FULL) {
         if (actual_attr && (actual_el = lGetElemStr(actual_attr, RUE_name, attrname))){
            dstring ds;
            char as_str[20];
            double utilized = zero_utilization ? 0 : utilization_max(actual_el, start_time, duration, false);

            switch (lGetUlong(cplx_el, CE_relop)) {
               case CMPLXGE_OP:
               case CMPLXGT_OP:
                     lSetDouble(cplx_el, CE_pj_doubleval, utilized); 
               break;

               case CMPLXEQ_OP:
               case CMPLXLT_OP:
               case CMPLXLE_OP:
               case CMPLXNE_OP:
               default:
                     lSetDouble(cplx_el, CE_pj_doubleval, lGetDouble(cplx_el, CE_doubleval) - utilized); 
                  break;
            }
            sge_dstring_init(&ds, as_str, sizeof(as_str));
            sge_dstring_sprintf(&ds, "%8.3f", (float)lGetDouble(cplx_el, CE_pj_doubleval));
            lSetString(cplx_el,CE_pj_stringval, as_str);
         } else{
            sge_dstring_sprintf(reason, MSG_ATTRIB_ACTUALELEMENTTOATTRIBXMISSING_S, attrname);
            lFreeElem(&cplx_el);
            DRETURN(NULL);
         }
      } else{
         lSetDouble(cplx_el, CE_pj_doubleval, lGetDouble(cplx_el, CE_doubleval)); 
         lSetString(cplx_el,CE_pj_stringval, lGetString(cplx_el, CE_stringval));
      }
   }

   /** check for a load value */
   if (load_attr && 
       (load_el = lGetElemStr(load_attr, HL_name, attrname)) &&
       (sconf_get_qs_state()==QS_STATE_FULL || lGetBool(load_el, HL_static)) &&
        (!is_attr_prior(cplx_el, cplx_el)))
   {
         lListElem *ep_nproc=NULL;
         int nproc=1;

         if (!cplx_el){
            cplx_el = lCopyElem(lGetElemStr(centry_list, CE_name, attrname));
               if (!cplx_el){
                  /* error */
                  DRETURN(NULL);
               }         
            lSetUlong(cplx_el, CE_dominant, DOMINANT_TYPE_VALUE);
            lSetUlong(cplx_el, CE_pj_dominant, DOMINANT_TYPE_VALUE);
         }

         if ((ep_nproc = lGetElemStr(load_attr, HL_name, LOAD_ATTR_NUM_PROC))) {
            const char *cp = lGetString(ep_nproc, HL_value);
            if (cp)
               nproc = MAX(1, atoi(lGetString(ep_nproc, HL_value)));
         }

         {
            const char *load_value=NULL;
            u_long32 type;
            double dval;

            load_value = lGetString(load_el, HL_value);

            /* are we working on string values? if though, than it is easy */
            if ( (type = lGetUlong(cplx_el, CE_valtype)) == TYPE_STR || type == TYPE_CSTR || type == TYPE_HOST || type == TYPE_RESTR) {
               lSetString(cplx_el, CE_stringval, load_value);
               lSetUlong(cplx_el, CE_dominant, layer | DOMINANT_TYPE_LOAD);
            } else { /* working on numerical values */
               lListElem *job_load;
               char err_str[256];
               char sval[100];
               u_long32 dom_type = DOMINANT_TYPE_LOAD;
               lList *load_adjustments = sconf_get_job_load_adjustments();
 
               job_load=lGetElemStr(load_adjustments, CE_name, attrname);

               if (parse_ulong_val(&dval, NULL, type, load_value, NULL, 0)) {

               sge_strlcpy(sval, load_value, 100);
               /* --------------------------------
                  look for 'name' in our load_adjustments list
               */
               if (job_load) {
                  const char *s;
                  double load_correction;

                  s = lGetString(job_load, CE_stringval);
                  if (!parse_ulong_val(&load_correction, NULL, type, s, err_str, 255)) {
                     ERROR((SGE_EVENT, MSG_SCHEDD_LOADADJUSTMENTSVALUEXNOTNUMERIC_S , attrname));
                  } else if (lc_factor) {
                     double old_dval;
                     u_long32 relop;
                     if (!strncmp(attrname, "np_", 3) && nproc != 1 ) {
                        DPRINTF(("fillComplexFromHost: dividing lc_factor for \"%s\" with value %f by %d to %f\n",
                                 attrname, lc_factor, nproc, lc_factor / nproc));
                        lc_factor /= nproc;
                     }
                     load_correction *= lc_factor;

                     /* it depends on relop in complex config whether load_correction is pos/neg */
                     if ( (relop = lGetUlong(cplx_el, CE_relop)) == CMPLXGE_OP || relop == CMPLXGT_OP){
                        old_dval = dval;
                        dval += load_correction;
                     }   
                     else{
                        old_dval = dval;
                        dval -= load_correction;
                     }

                     sprintf(sval, "%8.3f", dval);
                     DPRINTF(("%s: uc: %f c(%f): %f\n", attrname, old_dval, lc_factor, dval));
                     dom_type = DOMINANT_TYPE_CLOAD;
                  }
               }

               /* we can have a user, who wants to override the incomming load value. This is no
                  problem for consumables, but for fixed values. A custom fixed value is a per
                  slot value (stored in CE_doubleval) and a load value is a per job value (stored
                  in CE_pj_doubleval). 
   
                  This code changes a fixed custom value from a per slot to a per job value!!
               */
               if ( !(lGetUlong(cplx_el, CE_dominant) == DOMINANT_TYPE_VALUE) && 
                     (lGetUlong(cplx_el, CE_pj_dominant) == DOMINANT_TYPE_VALUE)){
                  lSetDouble(cplx_el, CE_pj_doubleval, lGetDouble(cplx_el, CE_doubleval));
                  lSetString(cplx_el, CE_pj_stringval, lGetString(cplx_el, CE_stringval));
                  lSetUlong(cplx_el, CE_dominant, DOMINANT_TYPE_VALUE);
                  lSetUlong(cplx_el, CE_pj_dominant, layer | DOMINANT_TYPE_FIXED);
               } 
 
               if (!is_attr_prior2(cplx_el, dval, CE_pj_doubleval, CE_pj_dominant)){
                  lSetString(cplx_el, CE_pj_stringval, load_value);
                  lSetUlong(cplx_el, CE_pj_dominant, layer | dom_type);
                  lSetDouble(cplx_el, CE_pj_doubleval, dval );
               }
            } /* end numerical load value */
            lFreeList(&load_adjustments);
         }/* end block */
      }
   }

   /* we are working on queue level, so we have to check for queue resource values */
   if (queue){
      bool created=false;
      if(!cplx_el){
         cplx_el = lCopyElem(lGetElemStr(centry_list, CE_name, attrname));
         if(!cplx_el){
            /* error */
            DRETURN(NULL);
         }         
         lSetUlong(cplx_el, CE_dominant, DOMINANT_TYPE_VALUE);
         lSetUlong(cplx_el, CE_pj_dominant, DOMINANT_TYPE_VALUE);
         created = true;
      }
      if (!get_queue_resource(cplx_el, queue, attrname) && created) {
         lFreeElem(&cplx_el);
      }
   }
   DRETURN(cplx_el);
}
コード例 #25
0
ファイル: qstat_cmdline.c プロジェクト: HPCKP/gridengine
/****
 **** qstat_usage (static)
 ****
 **** displays usage of qstat on file fp.
 **** Is what NULL, full usage will be displayed.
 ****
 **** Returns always 1.
 ****
 **** If what is a pointer to an option-string,
 **** only usage for that option will be displayed.
 ****   ** not implemented yet! **
 ****/
int 
qstat_usage(int qselect_mode, FILE *fp, char *what) 
{
   dstring ds;
   char buffer[256];
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));

   fprintf(fp, "%s\n", feature_get_product_name(FS_SHORT_VERSION, &ds));
 
   if(!what) {
      /* display full usage */
      fprintf(fp, "%s %s [options]\n", MSG_SRC_USAGE ,qselect_mode?"qselect":"qstat");
      if (!qselect_mode) {
         fprintf(fp, "        [-ext]                            %s\n",MSG_QSTAT_USAGE_VIEWALSOSCHEDULINGATTRIBUTES);
      }
      if (!qselect_mode) {
         fprintf(fp, "        [-explain a|c|A|E]                %s\n",MSG_QSTAT_USAGE_EXPLAINOPT);
      }
      if (!qselect_mode) 
         fprintf(fp, "        [-f]                              %s\n",MSG_QSTAT_USAGE_FULLOUTPUT);
      if (!qselect_mode) 
         fprintf(fp, "        [-F [resource_attributes]]        %s\n",MSG_QSTAT_USAGE_FULLOUTPUTANDSHOWRESOURCESOFQUEUES);
      if (!qselect_mode) {
         fprintf(fp, "        [-g {c}]                          %s\n",MSG_QSTAT_USAGE_DISPLAYCQUEUESUMMARY);
         fprintf(fp, "        [-g {d}]                          %s\n",MSG_QSTAT_USAGE_DISPLAYALLJOBARRAYTASKS);
         fprintf(fp, "        [-g {t}]                          %s\n",MSG_QSTAT_USAGE_DISPLAYALLPARALLELJOBTASKS);
      }
      fprintf(fp, "        [-help]                           %s\n",MSG_COMMON_help_OPT_USAGE);
      if (!qselect_mode)
         fprintf(fp, "        [-j job_identifier_list ]         %s\n",MSG_QSTAT_USAGE_SHOWSCHEDULERJOBINFO);
      fprintf(fp, "        [-l resource_list]                %s\n",MSG_QSTAT_USAGE_REQUESTTHEGIVENRESOURCES);
      if (!qselect_mode) 
         fprintf(fp, "        [-ne]                             %s\n",MSG_QSTAT_USAGE_HIDEEMPTYQUEUES);
      if (!qselect_mode) {
         fprintf(fp, "        [-ncb]                            %s\n",MSG_QSTAT_USAGE_VIEWALSOBINDINGATTRIBUTES);
      }
      fprintf(fp, "        [-pe pe_list]                     %s\n",MSG_QSTAT_USAGE_SELECTONLYQUEESWITHONOFTHESEPE);
      fprintf(fp, "        [-q wc_queue_list]                %s\n",MSG_QSTAT_USAGE_PRINTINFOONGIVENQUEUE);
      fprintf(fp, "        [-qs {a|c|d|o|s|u|A|C|D|E|S}]     %s\n",MSG_QSTAT_USAGE_PRINTINFOCQUEUESTATESEL);
      if (!qselect_mode) 
         fprintf(fp, "        [-r]                              %s\n",MSG_QSTAT_USAGE_SHOWREQUESTEDRESOURCESOFJOB);
      if (!qselect_mode) {
         fprintf(fp, "        [-s {p|r|s|z|hu|ho|hs|hd|hj|ha|h|a}] %s\n",MSG_QSTAT_USAGE_SHOWPENDINGRUNNINGSUSPENDESZOMBIEJOBS);
         fprintf(fp, "                                          %s\n",MSG_QSTAT_USAGE_JOBSWITHAUSEROPERATORSYSTEMHOLD);
         fprintf(fp, "                                          %s\n",MSG_QSTAT_USAGE_JOBSWITHSTARTTIMEINFUTORE);
         fprintf(fp, "                                          %s\n",MSG_QSTAT_USAGE_HISABBREVIATIONFORHUHOHSHJHA);
         fprintf(fp, "                                          %s\n",MSG_QSTAT_USAGE_AISABBREVIATIONFOR);
      }
      if (!qselect_mode) 
         fprintf(fp, "        [-t]                              %s\n",MSG_QSTAT_USAGE_SHOWTASKINFO);
      if (!qselect_mode){  
         fprintf(fp, "        [-u user_list]                    %s\n",MSG_QSTAT_USAGE_VIEWONLYJOBSOFTHISUSER);
      }   
      fprintf(fp, "        [-U user_list]                    %s\n",MSG_QSTAT_USAGE_SELECTQUEUESWHEREUSERXHAVEACCESS);

      if (!qselect_mode) {
         fprintf(fp, "        [-urg]                            %s\n",MSG_QSTAT_URGENCYINFO );
         fprintf(fp, "        [-pri]                            %s\n",MSG_QSTAT_PRIORITYINFO );
         fprintf(fp, "        [-xml]                            %s\n", MSG_COMMON_xml_OPT_USAGE);
      }   
      
      if (getenv("MORE_INFO")) {
         fprintf(fp, SFNMAX, MSG_QSTAT_USAGE_ADDITIONALDEBUGGINGOPTIONS);
         fprintf(fp, "        [-dj]                             %s\n",MSG_QSTAT_USAGE_DUMPCOMPLETEJOBLISTTOSTDOUT);
         fprintf(fp, "        [-dq]                             %s\n",MSG_QSTAT_USAGE_DUMPCOMPLETEQUEUELISTTOSTDOUT);
      }
      fprintf(fp, "\n");
      fprintf(fp, "pe_list                  pe[,pe,...]\n");
      fprintf(fp, "job_identifier_list      [job_id|job_name|pattern]{, [job_id|job_name|pattern]}\n");
      fprintf(fp, "resource_list            resource[=value][,resource[=value],...]\n");
      fprintf(fp, "user_list                user|@group[,user|@group],...]\n");
      fprintf(fp, "resource_attributes      resource,resource,...\n");
      fprintf(fp, "wc_cqueue                %s\n", MSG_QSTAT_HELP_WCCQ);
      fprintf(fp, "wc_host                  %s\n", MSG_QSTAT_HELP_WCHOST);
      fprintf(fp, "wc_hostgroup             %s\n", MSG_QSTAT_HELP_WCHG);
      fprintf(fp, "wc_qinstance             wc_cqueue@wc_host\n");
      fprintf(fp, "wc_qdomain               wc_cqueue@wc_hostgroup\n");
      fprintf(fp, "wc_queue                 wc_cqueue|wc_qdomain|wc_qinstance\n");
      fprintf(fp, "wc_queue_list            wc_queue[,wc_queue,...]\n");
   } else {
      /* display option usage */
      fprintf(fp, MSG_QDEL_not_available_OPT_USAGE_S,what);
      fprintf(fp, "\n");
   }
   return 1;
}
コード例 #26
0
/****** Eventmirror/job/job_update_master_list() *****************************
*  NAME
*     job_update_master_list() -- update the master list of jobs
*
*  SYNOPSIS
*     bool job_update_master_list(sge_object_type type,
*                                     sge_event_action action,
*                                     lListElem *event, void *clientdata)
*
*  FUNCTION
*     Update the global master list of jobs
*     based on an event.
*     The function is called from the event mirroring interface.
*
*     A jobs array tasks are not updated by this function,
*     as they are maintained by separate events.
*     In addition, some scheduler specific attributes, that
*     are only used in scheduler, are not updated.
*
*  INPUTS
*     sge_object_type type     - event type
*     sge_event_action action - action to perform
*     lListElem *event        - the raw event
*     void *clientdata        - client data
*
*  RESULT
*     bool - true, if update is successfull, else false
*
*  NOTES
*     The function should only be called from the event mirror interface.
*
*  SEE ALSO
*     Eventmirror/--Eventmirror
*     Eventmirror/sge_mirror_update_master_list()
*     Eventmirror/job/job_update_master_list_usage()
*******************************************************************************/
sge_callback_result
job_update_master_list(sge_evc_class_t *evc, object_description *object_base, sge_object_type type, 
                       sge_event_action action, lListElem *event, void *clientdata)
{
   lList **list;
   const lDescr *list_descr;
   u_long32 job_id;
   lListElem *job = NULL;
   lList *ja_tasks = NULL;

   char id_buffer[MAX_STRING_SIZE];
   dstring id_dstring;

   DENTER(TOP_LAYER, "job_update_master_list");

   sge_dstring_init(&id_dstring, id_buffer, MAX_STRING_SIZE);

   list = sge_master_list(object_base, SGE_TYPE_JOB);
   list_descr = lGetListDescr(lGetList(event, ET_new_version)); 
   job_id = lGetUlong(event, ET_intkey);
   job = job_list_locate(*list, job_id);

   if (action == SGE_EMA_MOD) {
      u_long32 event_type = lGetUlong(event, ET_type);

      if (job == NULL) {
         ERROR((SGE_EVENT, MSG_JOB_CANTFINDJOBFORUPDATEIN_SS,
                job_get_id_string(job_id, 0, NULL, &id_dstring), "job_update_master_list"));
         DRETURN(SGE_EMA_FAILURE);
      }

      if (event_type == sgeE_JOB_USAGE || event_type == sgeE_JOB_FINAL_USAGE ) {
         /* special handling needed for JOB_USAGE and JOB_FINAL_USAGE events.
         * they are sent for jobs, ja_tasks and pe_tasks and only contain
         * the usage list.
         * Preferable would probably be to send MOD events for the different
         * object types.
         */
         bool ret = job_update_master_list_usage(*list, event);
         DRETURN(ret?SGE_EMA_OK:SGE_EMA_FAILURE);
      } else {
         /* this is the true modify event.
          * we may not update several fields:
          * - JB_ja_tasks is the task list - it is maintained by JATASK events
          * - JB_host and JB_category are scheduler internal attributes
          *   they may not be overwritten.
          *   Better would be to move them from JB_Type to some scheduler specific
          *   object.
          */

          lListElem *modified_job;

          modified_job = lFirst(lGetList(event, ET_new_version));
          if(job != NULL && modified_job != NULL) {
            /* we want to preserve the old ja_tasks, since job update events to not contain them */
            lXchgList(job, JB_ja_tasks, &ja_tasks);
            lSetHost(modified_job, JB_host, lGetHost(job, JB_host));
            lSetRef(modified_job, JB_category, lGetRef(job, JB_category));
          }
      }
   }

   if (sge_mirror_update_master_list(list, list_descr, job, job_get_id_string(job_id, 0, NULL, &id_dstring), action, event) != SGE_EM_OK) {
      lFreeList(&ja_tasks);
      DRETURN(SGE_EMA_FAILURE);
   }

   /* restore ja_task list after modify event */
   if (action == SGE_EMA_MOD && ja_tasks != NULL) {
      /* we have to search the replaced job */
      job = job_list_locate(*list, job_id);
      if(job == NULL) {
         ERROR((SGE_EVENT, MSG_JOB_CANTFINDJOBFORUPDATEIN_SS,
                job_get_id_string(job_id, 0, NULL, &id_dstring), "job_update_master_list"));
         lFreeList(&ja_tasks);
         DRETURN(SGE_EMA_FAILURE);
      }

      lXchgList(job, JB_ja_tasks, &ja_tasks);
      lFreeList(&ja_tasks);
   }

   DRETURN(SGE_EMA_OK);
}
コード例 #27
0
/****** qmaster/threads/sge_scheduler_main() **********************************
*  NAME
*     sge_scheduler_main() -- main function of the scheduler thread 
*
*  SYNOPSIS
*     void * sge_scheduler_main(void *arg) 
*
*  FUNCTION
*     Main function of the scheduler thread, 
*
*  INPUTS
*     void *arg - pointer to the thread function (type cl_thread_settings_t*) 
*
*  RESULT
*     void * - always NULL 
*
*  NOTES
*     MT-NOTE: sge_scheduler_main() is MT safe 
*
*     MT-NOTE: this is a thread function. Do NOT use this function
*     MT-NOTE: in any other way!
*
*  SEE ALSO
*     qmaster/threads/sge_scheduler_initialize() 
*     qmaster/threads/sge_scheduler_cleanup_thread() 
*     qmaster/threads/sge_scheduler_terminate() 
*     qmaster/threads/sge_scheduler_main() 
*******************************************************************************/
void *
sge_scheduler_main(void *arg)
{
   time_t next_prof_output = 0;
   monitoring_t monitor;
   sge_gdi_ctx_class_t *ctx = NULL;
   sge_evc_class_t *evc = NULL;
   lList *alp = NULL;
   sge_where_what_t where_what;
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg;
   bool do_shutdown = false;
   bool do_endlessly = true;
   bool local_ret = true;

   DENTER(TOP_LAYER, "sge_scheduler_main");

   memset(&where_what, 0, sizeof(where_what));

   /*
    * startup
    */
   if (local_ret) {
      /* initialize commlib thread */
      cl_thread_func_startup(thread_config);

      /* initialize monitoring */
      sge_monitor_init(&monitor, thread_config->thread_name, SCH_EXT, SCT_WARNING, SCT_ERROR);
      sge_qmaster_thread_init(&ctx, SCHEDD, SCHEDD_THREAD, true);

      /* register at profiling module */
      set_thread_name(pthread_self(), "Scheduler Thread");
      conf_update_thread_profiling("Scheduler Thread");
      DPRINTF((SFN" started\n", thread_config->thread_name));

      /* initialize schedd_runnlog logging */
      schedd_set_schedd_log_file(ctx);
   }

   /* set profiling parameters */
   prof_set_level_name(SGE_PROF_EVENTMASTER, NULL, NULL);
   prof_set_level_name(SGE_PROF_SPOOLING, NULL, NULL);
   prof_set_level_name(SGE_PROF_CUSTOM0, "scheduler", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM1, "pending ticket calculation", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM3, "job sorting", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM4, "job dispatching", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM5, "send orders", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM6, "scheduler event loop", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM7, "copy lists", NULL);
   prof_set_level_name(SGE_PROF_SCHEDLIB4, NULL, NULL);

   /* set-up needed for 'schedule' file */
   serf_init(schedd_serf_record_func, schedd_serf_newline);
   schedd_set_serf_log_file(ctx);

   /*
    * prepare event client/mirror mechanism
    */
   if (local_ret) {
      local_ret = sge_gdi2_evc_setup(&evc, ctx, EV_ID_SCHEDD, &alp, "scheduler");
      DPRINTF(("prepared event client/mirror mechanism\n"));
   }

   /*
    * register as event mirror
    */
   if (local_ret) {
      sge_mirror_initialize(evc, EV_ID_SCHEDD, "scheduler",
                            false, &event_update_func, &sge_mod_event_client,
                            &sge_add_event_client, &sge_remove_event_client,
                            &sge_handle_event_ack);
      evc->ec_register(evc, false, NULL, &monitor);
      evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_RELEASED);
      DPRINTF(("registered at event mirror\n"));
   }

   /*
    * subscribe necessary data
    */
   if (local_ret) {
      ensure_valid_what_and_where(&where_what);
      subscribe_scheduler(evc, &where_what);
      DPRINTF(("subscribed necessary data from event master\n"));
   }

   /* 
    * schedulers main loop
    */
   if (local_ret) {
      while (do_endlessly) {
         bool handled_events = false;
         lList *event_list = NULL;
         int execute = 0;
         double prof_copy = 0.0;
         double prof_total = 0.0;
         double prof_init = 0.0;
         double prof_free = 0.0;
         double prof_run = 0.0;
         lList *orders = NULL;

         if (sconf_get_profiling()) {
            prof_start(SGE_PROF_OTHER, NULL);
            prof_start(SGE_PROF_PACKING, NULL);
            prof_start(SGE_PROF_EVENTCLIENT, NULL);
            prof_start(SGE_PROF_MIRROR, NULL);
            prof_start(SGE_PROF_GDI, NULL);
            prof_start(SGE_PROF_HT_RESIZE, NULL);
            prof_start(SGE_PROF_CUSTOM0, NULL);
            prof_start(SGE_PROF_CUSTOM1, NULL);
            prof_start(SGE_PROF_CUSTOM3, NULL);
            prof_start(SGE_PROF_CUSTOM4, NULL);
            prof_start(SGE_PROF_CUSTOM5, NULL);
            prof_start(SGE_PROF_CUSTOM6, NULL);
            prof_start(SGE_PROF_CUSTOM7, NULL);
            prof_start(SGE_PROF_SCHEDLIB4, NULL);
         } else {
            prof_stop(SGE_PROF_OTHER, NULL);
            prof_stop(SGE_PROF_PACKING, NULL);
            prof_stop(SGE_PROF_EVENTCLIENT, NULL);
            prof_stop(SGE_PROF_MIRROR, NULL);
            prof_stop(SGE_PROF_GDI, NULL);
            prof_stop(SGE_PROF_HT_RESIZE, NULL);
            prof_stop(SGE_PROF_CUSTOM0, NULL);
            prof_stop(SGE_PROF_CUSTOM1, NULL);
            prof_stop(SGE_PROF_CUSTOM3, NULL);
            prof_stop(SGE_PROF_CUSTOM4, NULL);
            prof_stop(SGE_PROF_CUSTOM5, NULL);
            prof_stop(SGE_PROF_CUSTOM6, NULL);
            prof_stop(SGE_PROF_CUSTOM7, NULL);
            prof_stop(SGE_PROF_SCHEDLIB4, NULL);
         }

         /*
          * Wait for new events
          */
         MONITOR_IDLE_TIME(sge_scheduler_wait_for_event(evc, &event_list), (&monitor), mconf_get_monitor_time(), 
                           mconf_is_monitor_message());

         /* If we lost connection we have to register again */
         if (evc->ec_need_new_registration(evc)) {
            lFreeList(&event_list);
            if (evc->ec_register(evc, false, NULL, &monitor) == true) {
               DPRINTF(("re-registered at event master!\n"));
            }
         }

         if (event_list != NULL) {
            /* check for shutdown */
            do_shutdown = (lGetElemUlong(event_list, ET_type, sgeE_SHUTDOWN) != NULL) ? true : false;

            /* update mirror and free data */
            if (do_shutdown == false && sge_mirror_process_event_list(evc, event_list) == SGE_EM_OK) {
               handled_events = true;
               DPRINTF(("events handled\n"));
            } else {
               DPRINTF(("events contain shutdown event - ignoring events\n"));
            }
            lFreeList(&event_list);
         }
 
         /* if we actually got events, start the scheduling run and further event processing */
         if (handled_events == true) {
            lList *answer_list = NULL;
            scheduler_all_data_t copy;
            lList *master_cqueue_list = *(object_type_get_master_list(SGE_TYPE_CQUEUE));
            lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB);
            lList *master_userset_list = *object_type_get_master_list(SGE_TYPE_USERSET);
            lList *master_project_list = *object_type_get_master_list(SGE_TYPE_PROJECT);
            lList *master_exechost_list= *object_type_get_master_list(SGE_TYPE_EXECHOST);
            lList *master_rqs_list= *object_type_get_master_list(SGE_TYPE_RQS);
            lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY);
            lList *master_ckpt_list = *object_type_get_master_list(SGE_TYPE_CKPT);
            lList *master_user_list = *object_type_get_master_list(SGE_TYPE_USER);
            lList *master_ar_list = *object_type_get_master_list(SGE_TYPE_AR);
            lList *master_pe_list = *object_type_get_master_list(SGE_TYPE_PE);
            lList *master_hgrp_list = *object_type_get_master_list(SGE_TYPE_HGROUP);
            lList *master_sharetree_list = *object_type_get_master_list(SGE_TYPE_SHARETREE);

            /* delay scheduling for test purposes, see issue GE-3306 */
            if (SGE_TEST_DELAY_SCHEDULING > 0) {
               sleep(SGE_TEST_DELAY_SCHEDULING);
            }

            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM6);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            if (__CONDITION(INFOPRINT)) {
               dstring ds;
               char buffer[128];

               sge_dstring_init(&ds, buffer, sizeof(buffer));
               DPRINTF(("================[SCHEDULING-EPOCH %s]==================\n",
                        sge_at_time(0, &ds)));
               sge_dstring_free(&ds);
            }

            /*
             * If there were new events then
             * copy/filter data necessary for the scheduler run
             * and run the scheduler method
             */
            memset(&copy, 0, sizeof(copy));

            copy.dept_list = lSelect("", master_userset_list, where_what.where_dept, where_what.what_acldept);
            copy.acl_list = lSelect("", master_userset_list, where_what.where_acl, where_what.what_acldept);

            DPRINTF(("RAW CQ:%d, J:%d, H:%d, C:%d, A:%d, D:%d, P:%d, CKPT:%d,"
                     " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d\n",
               lGetNumberOfElem(master_cqueue_list),
               lGetNumberOfElem(master_job_list),
               lGetNumberOfElem(master_exechost_list),
               lGetNumberOfElem(master_centry_list),
               lGetNumberOfElem(copy.acl_list),
               lGetNumberOfElem(copy.dept_list),
               lGetNumberOfElem(master_project_list),
               lGetNumberOfElem(master_ckpt_list),
               lGetNumberOfElem(master_user_list),
               lGetNumberOfElem(master_project_list),
               lGetNumberOfElem(master_rqs_list),
               lGetNumberOfElem(master_ar_list),
               lGetNumberOfNodes(NULL, master_sharetree_list, STN_children),
               lGetNumberOfLeafs(NULL, master_sharetree_list, STN_children)
            ));

            sge_rebuild_job_category(master_job_list, master_userset_list,
                                        master_project_list, master_rqs_list);

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_init = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            sge_before_dispatch(evc);

            /* prepare data for the scheduler itself */
            copy.host_list = lCopyList("", master_exechost_list);

            /*
             * Within the scheduler we do only need QIs
             */
            {
               lListElem *cqueue = NULL;
               lEnumeration *what_queue3 = NULL;

               for_each(cqueue, master_cqueue_list) {
                  lList *qinstance_list = lGetList(cqueue, CQ_qinstances);
                  lList *t;

                  if (!qinstance_list) {
                     continue;
                  }

                  /* all_queue_list contains all queue instances with state and full queue name only */
                  if (!what_queue3) {
                     what_queue3 = lWhat("%T(%I%I)", lGetListDescr(qinstance_list), QU_full_name, QU_state);
                  }
                  t = lSelect("t", qinstance_list, NULL, what_queue3);
                  if (t) {
                     if (copy.all_queue_list == NULL) {
                        copy.all_queue_list = lCreateList("all", lGetListDescr(t));
                     }
                     lAppendList(copy.all_queue_list, t);
                     lFreeList (&t);
                  }

                  t = lSelect("t", qinstance_list, where_what.where_queue, where_what.what_queue2);
                  if (t) {
                     if (copy.queue_list == NULL) {
                        copy.queue_list = lCreateList("enabled", lGetListDescr(t));
                     }
                     lAppendList(copy.queue_list, t);
                     lFreeList (&t);
                  }

                  t = lSelect("t", qinstance_list, where_what.where_queue2, where_what.what_queue2);
                  if (t) {
                     if (copy.dis_queue_list == NULL) {
                        copy.dis_queue_list = lCreateList("disabled", lGetListDescr(t));
                     }
                     lAppendList(copy.dis_queue_list, t);
                     lFreeList (&t);
                  }
               }
               if (what_queue3) {
                  lFreeWhat(&what_queue3);
               }
            }

            if (sconf_is_job_category_filtering()) {
               copy.job_list = sge_category_job_copy(copy.queue_list, &orders, evc->monitor_next_run);
            } else {
               copy.job_list = lCopyList("", master_job_list);
            }

            /* no need to copy these lists, they are read only used */
            copy.centry_list = master_centry_list;
            copy.ckpt_list = master_ckpt_list;
            copy.hgrp_list = master_hgrp_list;

            /* these lists need to be copied because they are modified during scheduling run */
            copy.share_tree = lCopyList("", master_sharetree_list);
            copy.pe_list = lCopyList("", master_pe_list);
            copy.user_list = lCopyList("", master_user_list);
            copy.project_list = lCopyList("", master_project_list);
            copy.rqs_list = lCopyList("", master_rqs_list);
            copy.ar_list = lCopyList("", master_ar_list);

            /* report number of reduced and raw (in brackets) lists */
            DPRINTF(("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, P:%d, CKPT:%d,"
                     " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n",
               lGetNumberOfElem(copy.queue_list),
               lGetNumberOfElem(copy.all_queue_list),
               lGetNumberOfElem(copy.job_list),
               lGetNumberOfElem(master_job_list),
               lGetNumberOfElem(copy.host_list),
               lGetNumberOfElem(master_exechost_list),
               lGetNumberOfElem(copy.centry_list),
               lGetNumberOfElem(copy.acl_list),
               lGetNumberOfElem(copy.dept_list),
               lGetNumberOfElem(copy.pe_list),
               lGetNumberOfElem(copy.ckpt_list),
               lGetNumberOfElem(copy.user_list),
               lGetNumberOfElem(copy.project_list),
               lGetNumberOfElem(copy.rqs_list),
               lGetNumberOfElem(copy.ar_list),
               lGetNumberOfNodes(NULL, copy.share_tree, STN_children),
               lGetNumberOfLeafs(NULL, copy.share_tree, STN_children)
            ));

            if (getenv("SGE_ND")) {
               printf("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, "
                  "P:%d, CKPT:%d, US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n",
                  lGetNumberOfElem(copy.queue_list),
                  lGetNumberOfElem(copy.all_queue_list),
                  lGetNumberOfElem(copy.job_list),
                  lGetNumberOfElem(master_job_list),
                  lGetNumberOfElem(copy.host_list),
                  lGetNumberOfElem(master_exechost_list),
                  lGetNumberOfElem(copy.centry_list),
                  lGetNumberOfElem(copy.acl_list),
                  lGetNumberOfElem(copy.dept_list),
                  lGetNumberOfElem(copy.pe_list),
                  lGetNumberOfElem(copy.ckpt_list),
                  lGetNumberOfElem(copy.user_list),
                  lGetNumberOfElem(copy.project_list),
                  lGetNumberOfElem(copy.rqs_list),
                  lGetNumberOfElem(copy.ar_list),
                  lGetNumberOfNodes(NULL, copy.share_tree, STN_children),
                  lGetNumberOfLeafs(NULL, copy.share_tree, STN_children)
                 );
            } else {
               schedd_log("-------------START-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run);
            }

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_copy = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            scheduler_method(evc, &answer_list, &copy, &orders);
            answer_list_output(&answer_list);

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_run = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            /* .. which gets deleted after using */
            lFreeList(&(copy.host_list));
            lFreeList(&(copy.queue_list));
            lFreeList(&(copy.dis_queue_list));
            lFreeList(&(copy.all_queue_list));
            lFreeList(&(copy.job_list));
            lFreeList(&(copy.acl_list));
            lFreeList(&(copy.dept_list));
            lFreeList(&(copy.pe_list));
            lFreeList(&(copy.share_tree));
            lFreeList(&(copy.user_list));
            lFreeList(&(copy.project_list));
            lFreeList(&(copy.rqs_list));
            lFreeList(&(copy.ar_list));

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_free = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);

            /* 
             * need to sync with event master thread
             * if schedd configuration changed then settings in evm can be adjusted
             */
            if (sconf_is_new_config()) {
               /* set scheduler interval / event delivery interval */
               u_long32 interval = sconf_get_schedule_interval();
               if (evc->ec_get_edtime(evc) != interval) {
                  evc->ec_set_edtime(evc, interval);
               }

               /* set job / ja_task event flushing */
               set_job_flushing(evc);

               /* no need to ec_commit here - we do it when resetting the busy state */

               /* now we handled the new schedd config - no need to do it twice */
               sconf_reset_new_config();
            }

            /* block till master handled all GDI orders */
            sge_schedd_block_until_orders_processed(evc->get_gdi_ctx(evc), NULL);
            schedd_order_destroy();

            /*
             * Stop profiling for "schedd run total" and the subcategories
             */
            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM6);
            prof_total = prof_get_measurement_wallclock(SGE_PROF_CUSTOM6, true, NULL);

            if (prof_is_active(SGE_PROF_CUSTOM6)) {
               PROFILING((SGE_EVENT, "PROF: schedd run took: %.3f s (init: %.3f s, copy: %.3f s, "
                          "run:%.3f, free: %.3f s, jobs: %d, categories: %d/%d)",
                           prof_total, prof_init, prof_copy, prof_run, prof_free,
                           lGetNumberOfElem(*object_type_get_master_list(SGE_TYPE_JOB)), sge_category_count(),
                           sge_cs_category_count() ));
            }
            if (getenv("SGE_ND") != NULL) {
               printf("--------------STOP-SCHEDULER-RUN-------------\n");
            } else {
               schedd_log("--------------STOP-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run);
            }

            thread_output_profiling("scheduler thread profiling summary:\n", &next_prof_output);

            sge_monitor_output(&monitor);
         }

         /* reset the busy state */
         evc->ec_set_busy(evc, 0);
         evc->ec_commit(evc, NULL);

         /* stop logging into schedd_runlog (enabled via -tsm) */
         evc->monitor_next_run = false;

         /*
          * pthread cancelation point
          *
          * sge_scheduler_cleanup_thread() is the last function which should
          * be called so it is pushed first
          */
         pthread_cleanup_push(sge_scheduler_cleanup_thread, (void *) &ctx);
         pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_monitor,
                              (void *)&monitor);
         pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_event_client,
                              (void *)evc);
         cl_thread_func_testcancel(thread_config);
         pthread_cleanup_pop(execute);
         pthread_cleanup_pop(execute);
         pthread_cleanup_pop(execute);
         DPRINTF(("passed cancelation point\n"));
      }
コード例 #28
0
ファイル: shadowd.c プロジェクト: StephenDennis/gridengine
/*----------------------------------------------------------------------------*/
int 
main(int argc, char **argv)
{
   int heartbeat        = 0;
   int last_heartbeat   = 0;
   int latest_heartbeat = 0;
   int ret              = 0;
   int delay            = 0;
   time_t now, last;
/*    const char *cp; */
   char err_str[MAX_STRING_SIZE];
   char shadowd_pidfile[SGE_PATH_MAX];
   dstring ds;
   char buffer[256];
   pid_t shadowd_pid;

#if 1

static int check_interval = CHECK_INTERVAL;
static int get_active_interval = GET_ACTIVE_INTERVAL;
static int delay_time = DELAY_TIME;
static int sge_test_heartbeat = 0;

char binpath[SGE_PATH_MAX];
char oldqmaster[SGE_PATH_MAX];

char shadow_err_file[SGE_PATH_MAX];
char qmaster_out_file[SGE_PATH_MAX];

#endif

   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "sge_shadowd");
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));
   /* initialize recovery control variables */
   {
      char *s;
      int val;
      if ((s=getenv("SGE_CHECK_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         check_interval = val;
      if ((s=getenv("SGE_GET_ACTIVE_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         get_active_interval = val;
      if ((s=getenv("SGE_DELAY_TIME")) &&
          sscanf(s, "%d", &val) == 1)
         delay_time = val;
      if ((s=getenv("SGE_TEST_HEARTBEAT_TIMEOUT")) &&
          sscanf(s, "%d", &val) == 1)
         sge_test_heartbeat = val;
   }
         
   /* This needs a better solution */
   umask(022);

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   log_state_set_log_file(TMP_ERR_FILE_SHADOWD);

   if (sge_setup2(&ctx, SHADOWD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* AA: TODO: change this */
   ctx->set_exit_func(ctx, shadowd_exit_func);
   sge_setup_sig_handlers(SHADOWD);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   if (ctx->get_qmaster_spool_dir(ctx) != NULL) {
      char *shadowd_name = SGE_SHADOWD;

      /* is there a running shadowd on this host (with unqualified name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_unqualified_hostname(ctx));

      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }

      ctx->prepare_enroll(ctx);

      /* is there a running shadowd on this host (with aliased name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_qualified_hostname(ctx));
      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }  
   } else {
      ctx->prepare_enroll(ctx);
   }

   if (parse_cmdline_shadowd(argc, argv) == 1) {
      SGE_EXIT((void**)&ctx, 0);
   }
   
   if (ctx->get_qmaster_spool_dir(ctx) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTREADQMASTERSPOOLDIRFROMX_S, ctx->get_bootstrap_file(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (chdir(ctx->get_qmaster_spool_dir(ctx))) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTCHANGETOQMASTERSPOOLDIRX_S, ctx->get_qmaster_spool_dir(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_set_admin_username(ctx->get_admin_user(ctx), err_str)) {
      CRITICAL((SGE_EVENT, SFNMAX, err_str));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_switch2admin_user()) {
      CRITICAL((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSWITCHTOADMIN_USER));
      SGE_EXIT((void**)&ctx, 1);
   }

   sprintf(shadow_err_file, "messages_shadowd.%s", ctx->get_unqualified_hostname(ctx));
   sprintf(qmaster_out_file, "messages_qmaster.%s", ctx->get_unqualified_hostname(ctx));
   sge_copy_append(TMP_ERR_FILE_SHADOWD, shadow_err_file, SGE_MODE_APPEND);
   unlink(TMP_ERR_FILE_SHADOWD);
   log_state_set_log_as_admin_user(1);
   log_state_set_log_file(shadow_err_file);

   {
      int* tmp_fd_array = NULL;
      unsigned long tmp_fd_count = 0;

      if (cl_com_set_handle_fds(cl_com_get_handle(prognames[SHADOWD] ,0), &tmp_fd_array, &tmp_fd_count) == CL_RETVAL_OK) {
         sge_daemonize(tmp_fd_array, tmp_fd_count, ctx);
         if (tmp_fd_array != NULL) {
            sge_free(&tmp_fd_array);
         }
      } else {
         sge_daemonize(NULL, 0, ctx);
      }
   }

   /* shadowd pid file will contain aliased name */
   sge_write_pid(shadowd_pidfile);

   starting_up();
   
   sge_setup_sig_handlers(SHADOWD);

   last_heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

   last = (time_t) sge_get_gmt(); /* set time of last check time */

   delay = 0;
   while (!shut_me_down) {
      sleep(check_interval);

      /* get current heartbeat file content */
      heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

      now = (time_t) sge_get_gmt();


      /* Only check when we could read the heartbeat file at least two times
       * (last_heartbeat and heartbeat) without error 
       */
      if (last_heartbeat > 0 && heartbeat > 0) {

         /*
          * OK we have to heartbeat entries to check. Check times ...
          * now  = current time
          * last = last check time
          */
         if ( (now - last) >= (get_active_interval + delay) ) {

            delay = 0;
            if (last_heartbeat == heartbeat) {
               DPRINTF(("heartbeat not changed since seconds: "sge_U32CFormat"\n", sge_u32c(now - last)));
               delay = delay_time; /* set delay time */

               /*
                * check if we are a possible new qmaster host (lock file of qmaster active, etc.)
                */
               ret = check_if_valid_shadow(binpath, oldqmaster, 
                                           ctx->get_act_qmaster_file(ctx), 
                                           ctx->get_shadow_master_file(ctx), 
                                           ctx->get_qualified_hostname(ctx), 
                                           ctx->get_binary_path(ctx));

               if (ret == 0) {
                  /* we can start a qmaster on this host */
                  if (qmaster_lock(QMASTER_LOCK_FILE)) {
                     ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_FAILEDTOLOCKQMASTERSOMBODYWASFASTER));
                  } else {
                     int out, err;

                     /* still the old qmaster name in act_qmaster file and still the old heartbeat */
                     latest_heartbeat = get_qmaster_heartbeat( QMASTER_HEARTBEAT_FILE, 30);
                     /* TODO: what do we when there is a timeout ??? */
                     DPRINTF(("old qmaster name in act_qmaster and old heartbeat\n"));
                     if (!compare_qmaster_names(ctx->get_act_qmaster_file(ctx), oldqmaster) &&
                         !shadowd_is_old_master_enrolled(sge_test_heartbeat, sge_get_qmaster_port(NULL), oldqmaster) && 
                         (latest_heartbeat == heartbeat)) {
                        char qmaster_name[256];

                        strcpy(qmaster_name, SGE_PREFIX);
                        strcat(qmaster_name, prognames[QMASTER]); 
                        DPRINTF(("qmaster_name: "SFN"\n", qmaster_name)); 

                        /*
                         * open logfile as admin user for initial qmaster/schedd 
                         * startup messages
                         */
                        out = SGE_OPEN3(qmaster_out_file, O_CREAT|O_WRONLY|O_APPEND, 
                                   0644);
                        err = out;
                        if (out == -1) {
                           /*
                            * First priority is the master restart
                            * => ignore this error
                            */
                           out = 1;
                           err = 2;
                        } 

                        sge_switch2start_user();
                        ret = startprog(out, err, NULL, binpath, qmaster_name, NULL);
                        sge_switch2admin_user();
                        if (ret) {
                           ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSTARTQMASTER));
                        }
                        close(out);
                     } else {
                        qmaster_unlock(QMASTER_LOCK_FILE);
                     }
                  }      
               } else {
                  if (ret == -1) {
                     /* just log the more important failures */    
                     WARNING((SGE_EVENT, MSG_SHADOWD_DELAYINGSHADOWFUNCFORXSECONDS_U, sge_u32c(delay) ));
                  }
               } 
            }
            /* Begin a new interval, set timers and hearbeat to current values */
            last = now;
            last_heartbeat = heartbeat;
         }
      } else {
         if (last_heartbeat < 0 || heartbeat < 0) {
            /* There was an error reading heartbeat or last_heartbeat */
            DPRINTF(("can't read heartbeat file. last_heartbeat="sge_U32CFormat", heartbeat="sge_U32CFormat"\n",
                     sge_u32c(last_heartbeat), sge_u32c(heartbeat)));
         } else {
            DPRINTF(("have to read the heartbeat file twice to check time differences\n"));
         }
      }
   }

   sge_shutdown((void**)&ctx, 0);

   DRETURN(EXIT_SUCCESS);
}