Example #1
0
int main(int argc, char **argv) {
   lList *pcmdline = NULL;
   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *ar_lp = NULL;

   lListElem *ar = NULL;

   DENTER_MAIN(TOP_LAYER, "qrsub");

   /* Set up the program information name */
   sge_setup_sig_handlers(QRSUB);

   log_state_set_log_gui(1);

   if (sge_gdi2_setup(&ctx, QRSUB, MAIN_THREAD, &alp) != AE_OK) {
      answer_list_output(&alp);
      goto error_exit;
   }

   /*
   ** stage 1 of commandline parsing
   */
   {
      dstring file = DSTRING_INIT;
      const char *user = ctx->get_username(ctx);
      const char *cell_root = ctx->get_cell_root(ctx);

      /* arguments from SGE_ROOT/common/sge_ar_request file */
      get_root_file_path(&file, cell_root, SGE_COMMON_DEF_AR_REQ_FILE);
      if ((alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, 
         FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE)) == NULL) {
         /* arguments from $HOME/.sge_ar_request file */
         if (get_user_home_file_path(&file, SGE_HOME_DEF_AR_REQ_FILE, user, &alp)) {
            lFreeList(&alp);
            alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, 
            FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE);
         }
      }
      sge_dstring_free(&file); 

      if (alp) {
         answer_list_output(&alp);
         lFreeList(&pcmdline);
         goto error_exit;
      }
   }
   
   alp = cull_parse_cmdline(QRSUB, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS);

   if (answer_list_print_err_warn(&alp, NULL, "qrsub: ", MSG_WARNING) > 0) {
      lFreeList(&pcmdline);
      goto error_exit;
   }
   
   if (!pcmdline) {
      /* no command line option is present: print help to stderr */
      sge_usage(QRSUB, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      goto error_exit;
   }

   /*
   ** stage 2 of command line parsing
   */
   ar = lCreateElem(AR_Type);

   if (!sge_parse_qrsub(ctx, pcmdline, &alp, &ar)) {
      answer_list_output(&alp);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   ar_lp = lCreateList(NULL, AR_Type);
   lAppendElem(ar_lp, ar);

   alp = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_ADD | SGE_GDI_RETURN_NEW_VERSION, &ar_lp, NULL, NULL);
   lFreeList(&ar_lp);
   answer_list_on_error_print_or_exit(&alp, stdout);
   if (answer_list_has_error(&alp)) {
      sge_gdi2_shutdown((void**)&ctx);
      sge_prof_cleanup();
      if (answer_list_has_status(&alp, STATUS_NOTOK_DOAGAIN)) {
         DRETURN(25);
      } else {
         DRETURN(1);
      }
   }

   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   DRETURN(0);

error_exit:
   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   DRETURN(1);
}
Example #2
0
int main(int argc, char *argv[])
{
   qevent_options enabled_options;
   dstring errors = DSTRING_INIT;
   int i, gdi_setup;
   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL; 
   sge_evc_class_t *evc = NULL;

   DENTER_MAIN(TOP_LAYER, "qevent");

/*    sge_mt_init(); */

   /* dump pid to file */
   qevent_dump_pid_file();

   /* parse command line */
   enabled_options.error_message = &errors;
   qevent_set_option_struct(&enabled_options);
   qevent_parse_command_line(argc, argv, &enabled_options);

   

   /* check if help option is set */
   if (enabled_options.help_option) {
      qevent_show_usage();
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   /* are there command line parsing errors ? */
   if (sge_dstring_get_string(enabled_options.error_message)) {
      ERROR((SGE_EVENT, "%s", sge_dstring_get_string(enabled_options.error_message) ));
      qevent_show_usage();
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }


   log_state_set_log_gui(1);
   sge_setup_sig_handlers(QEVENT);

   /* setup event client */
   gdi_setup = sge_gdi2_setup(&ctx, QEVENT, MAIN_THREAD, &alp);
   if (gdi_setup != AE_OK) {
      answer_list_output(&alp);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }
   /* TODO: how is the memory we allocate here released ???, SGE_EXIT doesn't */
   if (false == sge_gdi2_evc_setup(&evc, ctx, EV_ID_ANY, &alp, NULL)) {
      answer_list_output(&alp);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* ok, start over ... */
   /* check for testsuite option */
   
   if (enabled_options.testsuite_option) {
      /* only for testsuite */
      qevent_testsuite_mode(evc);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   /* check for subscribe option */
   if (enabled_options.subscribe_option) {
      /* only for testsuite */
      qevent_subscribe_mode(evc);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   if (enabled_options.trigger_option_count > 0) {
      lCondition *where =NULL;
      lEnumeration *what = NULL;

      sge_mirror_initialize(evc, EV_ID_ANY, "sge_mirror -trigger", true, 
                            NULL, NULL, NULL, NULL, NULL);
      evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_ACK);

      /* put out information about -trigger option */
      for (i=0;i<enabled_options.trigger_option_count;i++) {
         INFO((SGE_EVENT, "trigger script for %s events: %s\n",
                         qevent_get_event_name((enabled_options.trigger_option_events)[i]), 
                         (enabled_options.trigger_option_scripts)[i]));
         switch((enabled_options.trigger_option_events)[i]) {
            case QEVENT_JB_END:
                  
                  /* build mask for the job structure to contain only the needed elements */
                  where = NULL; 
                  what = lWhat("%T(%I %I %I %I %I %I %I %I)", JB_Type, JB_job_number, JB_ja_tasks, 
                                                              JB_ja_structure, JB_ja_n_h_ids, JB_ja_u_h_ids, 
                                                              JB_ja_s_h_ids,JB_ja_o_h_ids, JB_ja_template);
                  
                  /* register for job events */ 
                  sge_mirror_subscribe(evc, SGE_TYPE_JOB, analyze_jatask_event, NULL, NULL, where, what);
                  evc->ec_set_flush(evc, sgeE_JOB_DEL,true, 1);

                  /* the mirror interface registers more events, than we need,
                     thus we free the ones, we do not need */
                /*  evc->ec_unsubscribe(evc, sgeE_JOB_LIST); */
                  evc->ec_unsubscribe(evc, sgeE_JOB_MOD);
                  evc->ec_unsubscribe(evc, sgeE_JOB_MOD_SCHED_PRIORITY);
                  evc->ec_unsubscribe(evc, sgeE_JOB_USAGE);
                  evc->ec_unsubscribe(evc, sgeE_JOB_FINAL_USAGE);
               /*   evc->ec_unsubscribe(evc, sgeE_JOB_ADD); */

                  /* free the what and where mask */
                  lFreeWhere(&where);
                  lFreeWhat(&what);
               break;
            case QEVENT_JB_TASK_END:
            
                  /* build mask for the job structure to contain only the needed elements */
                  where = NULL; 
                  what = lWhat("%T(%I)", JAT_Type, JAT_status);
                  /* register for JAT events */ 
                  sge_mirror_subscribe(evc, SGE_TYPE_JATASK, analyze_jatask_event, NULL, NULL, where, what);
                  evc->ec_set_flush(evc, sgeE_JATASK_DEL,true, 1);
                  
                  /* the mirror interface registers more events, than we need,
                     thus we free the ones, we do not need */ 
                  evc->ec_unsubscribe(evc, sgeE_JATASK_ADD);
                  evc->ec_unsubscribe(evc, sgeE_JATASK_MOD);
                  /* free the what and where mask */
                  lFreeWhere(&where);
                  lFreeWhat(&what);
               break;
         }        
      }

      while(!shut_me_down) {
         sge_mirror_error error = sge_mirror_process_events(evc);
         if (error == SGE_EM_TIMEOUT && !shut_me_down ) {
            sleep(10);
            continue;
         }
      }

      sge_mirror_shutdown(evc);

      sge_dstring_free(enabled_options.error_message);
      sge_prof_cleanup();
      SGE_EXIT((void**)&ctx, 0);
      return 0;
   }


   ERROR((SGE_EVENT, "no option selected\n" ));
   qevent_show_usage();
   sge_dstring_free(enabled_options.error_message);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   return 1;
}
Example #3
0
int main(int argc, char **argv) {
   int ret = 0;
   lList *pcmdline = NULL;
   lList *answer_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   qrstat_env_t qrstat_env;

   DENTER_MAIN(TOP_LAYER, "qrsub");

   /* Set up the program information name */
   sge_setup_sig_handlers(QRSTAT);

   log_state_set_log_gui(1);

   if (sge_gdi2_setup(&ctx, QRSTAT, MAIN_THREAD, &answer_list) != AE_OK) {
      answer_list_output(&answer_list);
      goto error_exit;
   }

   qrstat_filter_init(&qrstat_env);
   qrstat_filter_set_ctx(&qrstat_env, ctx);

   /*
    * stage 1: commandline parsing
    */
   {
      dstring file = DSTRING_INIT;
      const char *user = ctx->get_username(ctx);
      const char *cell_root = ctx->get_cell_root(ctx);

      /* arguments from SGE_ROOT/common/sge_qrstat file */
      get_root_file_path(&file, cell_root, SGE_COMMON_DEF_QRSTAT_FILE);
      if (sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list) == true) {
         /* arguments from $HOME/.sge_qrstat file */
         if (get_user_home_file_path(&file, SGE_HOME_DEF_QRSTAT_FILE, user, &answer_list)) {
            sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list);
         }
      }
      sge_dstring_free(&file); 

      if (answer_list) {
         answer_list_output(&answer_list);
         lFreeList(&pcmdline);
         sge_prof_cleanup();
         SGE_EXIT((void**)&ctx, 1);
      }
   }

   answer_list = cull_parse_cmdline(QRSTAT, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS);
   if (answer_list != NULL) {
      answer_list_output(&answer_list);
      lFreeList(&pcmdline);
      goto error_exit;
   }
  
   /* 
    * stage 2: evalutate switches and modify qrstat_env
    */
   if (!sge_parse_qrstat(ctx, &answer_list, &qrstat_env, &pcmdline)) {
      answer_list_output(&answer_list);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   /* 
    * stage 3: fetch data from master 
    */
   {
      answer_list = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_GET, &qrstat_env.ar_list, 
                     qrstat_env.where_AR_Type, qrstat_env.what_AR_Type, false);

      if (answer_list_has_error(&answer_list)) {
         answer_list_output(&answer_list);
         goto error_exit;
      }
   }

   /*
    * stage 4: create output in correct format
    */
   {
      qrstat_report_handler_t *handler = NULL;

      if (qrstat_env.is_xml) {
         handler = qrstat_create_report_handler_xml(&qrstat_env, &answer_list);
      } else {
         handler = qrstat_create_report_handler_stdout(&qrstat_env, &answer_list);
      }
      if (!qrstat_print(&answer_list, handler, &qrstat_env)) {
         ret = 1;
      }
      if (qrstat_env.is_xml) {
         qrstat_destroy_report_handler_xml(&handler, &answer_list); 
      } else {
         qrstat_destroy_report_handler_stdout(&handler, &answer_list); 
      }
   }

   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   DRETURN(ret);

error_exit:
   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   DRETURN(1);
}
Example #4
0
int 
main(int argc, char **argv) 
{
   lList *opts_cmdline = NULL;
   lList *opts_defaults = NULL;
   lList *opts_scriptfile = NULL;
   lList *opts_all = NULL;
   lListElem *job = NULL;
   lList *alp = NULL;
   lListElem *ep;
   int exit_status = 0;
   int just_verify;
   int tmp_ret;
   int wait_for_job = 0, is_immediate = 0;
   dstring session_key_out = DSTRING_INIT;
   dstring diag = DSTRING_INIT;
   dstring jobid = DSTRING_INIT;
   u_long32 start, end, step;
   u_long32 num_tasks;
   int count, stat;
   char *jobid_string = NULL;
   bool has_terse;
   drmaa_attr_values_t *jobids = NULL;

   u_long32 prog_number = 0;
   u_long32 myuid = 0;
   const char *sge_root = NULL;
   const char *cell_root = NULL;
   const char *username = NULL;
   const char *qualified_hostname = NULL;
   const char *unqualified_hostname = NULL;
   const char *mastername = NULL;

   DENTER_MAIN(TOP_LAYER, "qsub");

   prof_mt_init();

   /* Set up the program information name */
   sge_setup_sig_handlers(QSUB);

   DPRINTF(("Initializing JAPI\n"));

   if (japi_init(NULL, NULL, NULL, QSUB, false, NULL, &diag)
                                                      != DRMAA_ERRNO_SUCCESS) {
      fprintf(stderr, "\n");
      fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
              sge_dstring_get_string(&diag));
      fprintf(stderr, "\n");
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   prog_number = ctx->get_who(ctx);
   myuid = ctx->get_uid(ctx);
   sge_root = ctx->get_sge_root(ctx);
   cell_root = ctx->get_cell_root(ctx);
   username = ctx->get_username(ctx);
   qualified_hostname = ctx->get_qualified_hostname(ctx);
   unqualified_hostname = ctx->get_unqualified_hostname(ctx);
   mastername = ctx->get_master(ctx, false);

   /*
    * read switches from the various defaults files
    */
   opt_list_append_opts_from_default_files(prog_number, cell_root, username, &opts_defaults, &alp, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * append the commandline switches to the list
    */
   opt_list_append_opts_from_qsub_cmdline(prog_number, &opts_cmdline, &alp,
                                          argv + 1, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_QSUB_WARNING_S);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * show usage if -help was in commandline
    */
   if (opt_list_has_X(opts_cmdline, "-help")) {
      sge_usage(QSUB, stdout);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   /*
    * We will only read commandline options from scripfile if the script
    * itself should not be handled as binary
    */
   if (opt_list_is_X_true(opts_cmdline, "-b") ||
       (!opt_list_has_X(opts_cmdline, "-b") &&
        opt_list_is_X_true(opts_defaults, "-b"))) {
      DPRINTF(("Skipping options from script due to -b option\n"));
   } else {
      opt_list_append_opts_from_script(prog_number,
                                       &opts_scriptfile, &alp, 
                                       opts_cmdline, environ);
      tmp_ret = answer_list_print_err_warn(&alp, NULL, MSG_QSUB_COULDNOTREADSCRIPT_S,
                                           MSG_WARNING);
      if (tmp_ret > 0) {
         DEXIT;
         SGE_EXIT((void**)&ctx, tmp_ret);
      }
   }

   /*
    * Merge all commandline options and interprete them
    */
   opt_list_merge_command_lines(&opts_all, &opts_defaults, 
                                &opts_scriptfile, &opts_cmdline);

   /*
    * Check if -terse is requested
    */
   has_terse = opt_list_has_X(opts_all, "-terse");

   /* If "-sync y" is set, wait for the job to end. */   
   /* Remove all -sync switches since cull_parse_job_parameter()
    * doesn't know what to do with them. */
   while ((ep = lGetElemStr(opts_all, SPA_switch, "-sync"))) {
      if (lGetInt(ep, SPA_argval_lIntT) == TRUE) {
         wait_for_job = 1;
      }
      
      lRemoveElem(opts_all, &ep);
   }

   if (wait_for_job) {
      DPRINTF(("Wait for job end\n"));
   }

   alp = cull_parse_job_parameter(myuid, username, cell_root, unqualified_hostname, 
                                  qualified_hostname, opts_all, &job);

   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   if (set_sec_cred(sge_root, mastername, job, &alp) != 0) {
      answer_list_output(&alp);
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   /* Check if job is immediate */
   is_immediate = (int)JOB_TYPE_IS_IMMEDIATE(lGetUlong(job, JB_type));
   DPRINTF(("Job is%s immediate\n", is_immediate ? "" : " not"));

   DPRINTF(("Everything ok\n"));

   if (lGetUlong(job, JB_verify)) {
      cull_show_job(job, 0, false);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   if (is_immediate || wait_for_job) {
      pthread_t sigt;
      
      qsub_setup_sig_handlers(); 

      if (pthread_create(&sigt, NULL, sig_thread, (void *)NULL) != 0) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 " error preparing signal handling thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
      
      if (japi_enable_job_wait(username, unqualified_hostname, NULL, &session_key_out, error_handler, &diag) ==
                                       DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
         const char *msg = sge_dstring_get_string(&diag);
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 msg?msg:" error starting event client thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
   }
   
   job_get_submit_task_ids(job, &start, &end, &step);
   num_tasks = (end - start) / step + 1;

   if (num_tasks > 1) {
      int error = japi_run_bulk_jobs(&jobids, &job, start, end, step, &diag);
      if (error != DRMAA_ERRNO_SUCCESS) {
         /* No active session here means that japi_enable_job_wait() was
          * interrupted by the signal handler, in which case we just break out
          * quietly. */
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the JAPI error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      DPRINTF(("job id is: %ld\n", jobids->it.ji.jobid));
      
      jobid_string = get_bulk_jobid_string((long)jobids->it.ji.jobid, start, end, step);
   }
   else if (num_tasks == 1) {
      int error = japi_run_job(&jobid, &job, &diag);
      
      if (error != DRMAA_ERRNO_SUCCESS) {
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the DRMAA error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      jobid_string = strdup(sge_dstring_get_string(&jobid));
      DPRINTF(("job id is: %s\n", jobid_string));

      sge_dstring_free(&jobid);
   }
   else {
      fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, "invalid task structure");
      fprintf(stderr, "\n");
      
      exit_status = 1;
      goto Error;
   }
  
   /* only success message is printed to stdout */

   just_verify = (lGetUlong(job, JB_verify_suitable_queues)==JUST_VERIFY || 
                  lGetUlong(job, JB_verify_suitable_queues)==POKE_VERIFY);
   DPRINTF(("Just verifying job\n"));

   if (!just_verify) {
      const char *output = sge_dstring_get_string(&diag); 

      /* print the tersed output */
      if (has_terse) {
         printf("%s", jobid_string);
      } else if (output != NULL) {
        printf("%s", output);
      } else {
        printf(MSG_QSUB_YOURJOBHASBEENSUBMITTED_SS, jobid_string, lGetString(job, JB_job_name));
      }
      printf("\n");
   } else {
      printf(MSG_JOB_VERIFYFOUNDQ);
      printf("\n");
   }   

   if ((wait_for_job || is_immediate) && !just_verify) {
      int event;

      if (is_immediate) {
         fprintf(stderr, "%s\n", MSG_QSUB_WAITINGFORIMMEDIATEJOBTOBESCHEDULED);

         /* We only need to wait for the first task to be scheduled to be able
          * to say that the job is running. */
         tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                             DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_START, &event,
                             NULL, &diag);

         if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_START)) {
            fprintf(stderr, "\n");
            fprintf(stderr, MSG_QSUB_YOURIMMEDIATEJOBXHASBEENSUCCESSFULLYSCHEDULED_S,
                  jobid_string);
            fprintf(stderr, "\n");
         }
         /* A job finish event here means that the job was rejected. */
         else if ((tmp_ret == DRMAA_ERRNO_SUCCESS) &&
                  (event == JAPI_JOB_FINISH)) {
            fprintf(stderr, "\n%s\n", MSG_QSUB_YOURQSUBREQUESTCOULDNOTBESCHEDULEDDTRYLATER);
            
            exit_status = 1;
            goto Error;
         }
         else {
         /* Since we told japi_wait to wait forever, we know that if it gets
          * a timeout, it's because it's been interrupted to exit, in which
          * case we don't complain.  Same for no active session. */
            if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
               fprintf(stderr, "\n");
               fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S,
                       sge_dstring_get_string(&diag));
               fprintf(stderr, "\n");
            }

            exit_status = 1;
            goto Error;
         }
      }
         
      if (wait_for_job) {
         /* Rather than using japi_synchronize on ALL for bulk jobs, we use
          * japi_wait on ANY num_tasks times because with synchronize, we would
          * have to wait for all the tasks to finish before we know if any
          * finished. */
         for (count = 0; count < num_tasks; count++) {
            /* Since there's only one running job in the session, we can just
             * wait for ANY. */
            if ((tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                          DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_FINISH, &event,
                          NULL, &diag)) != DRMAA_ERRNO_SUCCESS) {
               if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                   (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
                  fprintf(stderr, "\n");
                  fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag));
                  fprintf(stderr, "\n");
               }
               
               exit_status = 1;
               goto Error;
            }
            
            /* report how job finished */
            /* If the job is an array job, use the first non-zero exit code as
             * the exit code for qsub. */
            if (exit_status == 0) {
               exit_status = report_exit_status(stat,
                                              sge_dstring_get_string(&jobid));
            }
            /* If we've already found a non-zero exit code, just print the exit
             * info for the task. */
            else {
               report_exit_status(stat, sge_dstring_get_string(&jobid));
            }               
         }
      }
   }

Error:
   FREE(jobid_string);
   lFreeList(&alp);
   lFreeList(&opts_all);
   
   if ((tmp_ret = japi_exit(JAPI_EXIT_NO_FLAG, &diag)) != DRMAA_ERRNO_SUCCESS) {
      if (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTFINALIZEENV_S, sge_dstring_get_string(&diag));
         fprintf(stderr, "\n");
      }
      else {
         struct timespec ts;
         /* We know that if we get a DRMAA_ERRNO_NO_ACTIVE_SESSION here, it's
          * because the signal handler thread called japi_exit().  We know this
          * because if the call to japi_init() fails, we just exit directly.
          * If the call to japi_init() succeeds, then we have an active session,
          * so coming here because of an error would not result in the
          * DRMAA_ERRNO_NO_ACTIVE_SESSION error. */
         DPRINTF(("Sleeping for 15 seconds to wait for the exit to finish.\n"));
         
         sge_relative_timespec(15, &ts);
         sge_mutex_lock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
         
         while (!exited) {
            if (pthread_cond_timedwait(&exit_cv, &exit_mutex, &ts) == ETIMEDOUT) {
               DPRINTF(("Exit has not finished after 15 seconds.  Exiting.\n"));
               break;
            }
         }
         
         sge_mutex_unlock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
      }
   }

   sge_prof_cleanup();

   /* This is an exit() instead of an SGE_EXIT() because when the qmaster is
    * supended, SGE_EXIT() hangs. */
   exit(exit_status);
   DEXIT;
   return exit_status;
}
Example #5
0
/*----------------------------------------------------------------------------*/
int 
main(int argc, char **argv)
{
   int heartbeat        = 0;
   int last_heartbeat   = 0;
   int latest_heartbeat = 0;
   int ret              = 0;
   int delay            = 0;
   time_t now, last;
/*    const char *cp; */
   char err_str[MAX_STRING_SIZE];
   char shadowd_pidfile[SGE_PATH_MAX];
   dstring ds;
   char buffer[256];
   pid_t shadowd_pid;

#if 1

static int check_interval = CHECK_INTERVAL;
static int get_active_interval = GET_ACTIVE_INTERVAL;
static int delay_time = DELAY_TIME;
static int sge_test_heartbeat = 0;

char binpath[SGE_PATH_MAX];
char oldqmaster[SGE_PATH_MAX];

char shadow_err_file[SGE_PATH_MAX];
char qmaster_out_file[SGE_PATH_MAX];

#endif

   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "sge_shadowd");
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));
   /* initialize recovery control variables */
   {
      char *s;
      int val;
      if ((s=getenv("SGE_CHECK_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         check_interval = val;
      if ((s=getenv("SGE_GET_ACTIVE_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         get_active_interval = val;
      if ((s=getenv("SGE_DELAY_TIME")) &&
          sscanf(s, "%d", &val) == 1)
         delay_time = val;
      if ((s=getenv("SGE_TEST_HEARTBEAT_TIMEOUT")) &&
          sscanf(s, "%d", &val) == 1)
         sge_test_heartbeat = val;
   }
         
   /* This needs a better solution */
   umask(022);

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   log_state_set_log_file(TMP_ERR_FILE_SHADOWD);

   if (sge_setup2(&ctx, SHADOWD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* AA: TODO: change this */
   ctx->set_exit_func(ctx, shadowd_exit_func);
   sge_setup_sig_handlers(SHADOWD);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   if (ctx->get_qmaster_spool_dir(ctx) != NULL) {
      char *shadowd_name = SGE_SHADOWD;

      /* is there a running shadowd on this host (with unqualified name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_unqualified_hostname(ctx));

      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }

      ctx->prepare_enroll(ctx);

      /* is there a running shadowd on this host (with aliased name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_qualified_hostname(ctx));
      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }  
   } else {
      ctx->prepare_enroll(ctx);
   }

   if (parse_cmdline_shadowd(argc, argv) == 1) {
      SGE_EXIT((void**)&ctx, 0);
   }
   
   if (ctx->get_qmaster_spool_dir(ctx) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTREADQMASTERSPOOLDIRFROMX_S, ctx->get_bootstrap_file(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (chdir(ctx->get_qmaster_spool_dir(ctx))) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTCHANGETOQMASTERSPOOLDIRX_S, ctx->get_qmaster_spool_dir(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_set_admin_username(ctx->get_admin_user(ctx), err_str)) {
      CRITICAL((SGE_EVENT, SFNMAX, err_str));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_switch2admin_user()) {
      CRITICAL((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSWITCHTOADMIN_USER));
      SGE_EXIT((void**)&ctx, 1);
   }

   sprintf(shadow_err_file, "messages_shadowd.%s", ctx->get_unqualified_hostname(ctx));
   sprintf(qmaster_out_file, "messages_qmaster.%s", ctx->get_unqualified_hostname(ctx));
   sge_copy_append(TMP_ERR_FILE_SHADOWD, shadow_err_file, SGE_MODE_APPEND);
   unlink(TMP_ERR_FILE_SHADOWD);
   log_state_set_log_as_admin_user(1);
   log_state_set_log_file(shadow_err_file);

   {
      int* tmp_fd_array = NULL;
      unsigned long tmp_fd_count = 0;

      if (cl_com_set_handle_fds(cl_com_get_handle(prognames[SHADOWD] ,0), &tmp_fd_array, &tmp_fd_count) == CL_RETVAL_OK) {
         sge_daemonize(tmp_fd_array, tmp_fd_count, ctx);
         if (tmp_fd_array != NULL) {
            sge_free(&tmp_fd_array);
         }
      } else {
         sge_daemonize(NULL, 0, ctx);
      }
   }

   /* shadowd pid file will contain aliased name */
   sge_write_pid(shadowd_pidfile);

   starting_up();
   
   sge_setup_sig_handlers(SHADOWD);

   last_heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

   last = (time_t) sge_get_gmt(); /* set time of last check time */

   delay = 0;
   while (!shut_me_down) {
      sleep(check_interval);

      /* get current heartbeat file content */
      heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

      now = (time_t) sge_get_gmt();


      /* Only check when we could read the heartbeat file at least two times
       * (last_heartbeat and heartbeat) without error 
       */
      if (last_heartbeat > 0 && heartbeat > 0) {

         /*
          * OK we have to heartbeat entries to check. Check times ...
          * now  = current time
          * last = last check time
          */
         if ( (now - last) >= (get_active_interval + delay) ) {

            delay = 0;
            if (last_heartbeat == heartbeat) {
               DPRINTF(("heartbeat not changed since seconds: "sge_U32CFormat"\n", sge_u32c(now - last)));
               delay = delay_time; /* set delay time */

               /*
                * check if we are a possible new qmaster host (lock file of qmaster active, etc.)
                */
               ret = check_if_valid_shadow(binpath, oldqmaster, 
                                           ctx->get_act_qmaster_file(ctx), 
                                           ctx->get_shadow_master_file(ctx), 
                                           ctx->get_qualified_hostname(ctx), 
                                           ctx->get_binary_path(ctx));

               if (ret == 0) {
                  /* we can start a qmaster on this host */
                  if (qmaster_lock(QMASTER_LOCK_FILE)) {
                     ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_FAILEDTOLOCKQMASTERSOMBODYWASFASTER));
                  } else {
                     int out, err;

                     /* still the old qmaster name in act_qmaster file and still the old heartbeat */
                     latest_heartbeat = get_qmaster_heartbeat( QMASTER_HEARTBEAT_FILE, 30);
                     /* TODO: what do we when there is a timeout ??? */
                     DPRINTF(("old qmaster name in act_qmaster and old heartbeat\n"));
                     if (!compare_qmaster_names(ctx->get_act_qmaster_file(ctx), oldqmaster) &&
                         !shadowd_is_old_master_enrolled(sge_test_heartbeat, sge_get_qmaster_port(NULL), oldqmaster) && 
                         (latest_heartbeat == heartbeat)) {
                        char qmaster_name[256];

                        strcpy(qmaster_name, SGE_PREFIX);
                        strcat(qmaster_name, prognames[QMASTER]); 
                        DPRINTF(("qmaster_name: "SFN"\n", qmaster_name)); 

                        /*
                         * open logfile as admin user for initial qmaster/schedd 
                         * startup messages
                         */
                        out = SGE_OPEN3(qmaster_out_file, O_CREAT|O_WRONLY|O_APPEND, 
                                   0644);
                        err = out;
                        if (out == -1) {
                           /*
                            * First priority is the master restart
                            * => ignore this error
                            */
                           out = 1;
                           err = 2;
                        } 

                        sge_switch2start_user();
                        ret = startprog(out, err, NULL, binpath, qmaster_name, NULL);
                        sge_switch2admin_user();
                        if (ret) {
                           ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSTARTQMASTER));
                        }
                        close(out);
                     } else {
                        qmaster_unlock(QMASTER_LOCK_FILE);
                     }
                  }      
               } else {
                  if (ret == -1) {
                     /* just log the more important failures */    
                     WARNING((SGE_EVENT, MSG_SHADOWD_DELAYINGSHADOWFUNCFORXSECONDS_U, sge_u32c(delay) ));
                  }
               } 
            }
            /* Begin a new interval, set timers and hearbeat to current values */
            last = now;
            last_heartbeat = heartbeat;
         }
      } else {
         if (last_heartbeat < 0 || heartbeat < 0) {
            /* There was an error reading heartbeat or last_heartbeat */
            DPRINTF(("can't read heartbeat file. last_heartbeat="sge_U32CFormat", heartbeat="sge_U32CFormat"\n",
                     sge_u32c(last_heartbeat), sge_u32c(heartbeat)));
         } else {
            DPRINTF(("have to read the heartbeat file twice to check time differences\n"));
         }
      }
   }

   sge_shutdown((void**)&ctx, 0);

   DRETURN(EXIT_SUCCESS);
}
Example #6
0
int main(int argc, char **argv) {
   int ret = STATUS_OK;
   lList *alp = NULL;
   lList *request_list = NULL;
   lList *cmdline = NULL;
   lListElem *aep;
   int all_jobs = 0;
   int all_users = 0;
   u_long32 gdi_cmd = SGE_GDI_MOD; 
   int tmp_ret;
   int me_who;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "qalter");

   prof_mt_init();

   /*
   ** get command name: qalter or qresub
   */
   if (!strcmp(sge_basename(argv[0], '/'), "qresub")) {
      DPRINTF(("QRESUB\n"));
      me_who = QRESUB;
   } else if (!strcmp(sge_basename(argv[0], '/'), "qhold")) {
      DPRINTF(("QHOLD\n"));
      me_who = QHOLD;
   } else if (!strcmp(sge_basename(argv[0], '/'), "qrls")) {
      DPRINTF(("QRLS\n"));
      me_who = QRLS;
   } else {
      DPRINTF(("QALTER\n"));
      me_who = QALTER;
   } 

   log_state_set_log_gui(1);
   sge_setup_sig_handlers(me_who);

   if (sge_gdi2_setup(&ctx, me_who, MAIN_THREAD, &alp) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /*
   ** begin to work
   */
   opt_list_append_opts_from_qalter_cmdline(me_who, &cmdline, &alp, argv + 1, environ);
   tmp_ret = answer_list_print_err_warn(&alp, MSG_QALTER, MSG_QALTER, MSG_QALTERWARNING);
   
   if (tmp_ret > 0) {
      SGE_EXIT((void**)&ctx, tmp_ret);
   }
   
   /* handling the case that no command line parameter was specified */
   if ((me_who == QHOLD || me_who == QRLS) && lGetNumberOfElem(cmdline) == 1) {
      /* -h option is set implicitly for QHOLD and QRLS */
      sge_usage(me_who, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      SGE_EXIT((void**)&ctx, 1);
   } else if ((me_who == QRESUB || me_who == QALTER) && lGetNumberOfElem(cmdline) == 0) {
      /* qresub and qalter have nothing set */ 
      sge_usage(me_who, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      SGE_EXIT((void**)&ctx, 1);
   } else if (opt_list_has_X(cmdline, "-help")) {
      /* -help was specified */
      sge_usage(me_who, stdout);
      SGE_EXIT((void**)&ctx, 0);
   }
   
   alp = qalter_parse_job_parameter(me_who, cmdline, &request_list, &all_jobs, 
                                    &all_users);

   DPRINTF(("all_jobs = %d, all_user = %d\n", all_jobs, all_users));

   if (request_list && verify) {
      /* 
         got a request list containing one element 
         for each job to be modified 
         save jobid all fields contain the same fields
         so we may use show_job() with the first job
         in our list 
         The jobid's in our request list get printed before
         show_job()
      */
      cull_show_job(lFirst(request_list), FLG_QALTER, false);
      sge_prof_cleanup();
      SGE_EXIT((void**)&ctx, 0);
   }

   tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
   if (tmp_ret > 0) {
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   if ((me_who == QALTER) ||
       (me_who == QHOLD) ||
       (me_who == QRLS) 
      ) {
      DPRINTF(("QALTER\n"));
      gdi_cmd = SGE_GDI_MOD;
   } else if (me_who == QRESUB){
      DPRINTF(("QRESUB\n"));
      gdi_cmd = SGE_GDI_COPY;
   } else {
      printf("unknown binary name.\n");
      SGE_EXIT((void**)&ctx, 1);
   }

   if (all_jobs)
      gdi_cmd |= SGE_GDI_ALL_JOBS;
   if (all_users)
      gdi_cmd |= SGE_GDI_ALL_USERS;

   alp = ctx->gdi(ctx, SGE_JB_LIST, gdi_cmd, &request_list, NULL, NULL); 
   for_each (aep, alp) {
      printf("%s\n", lGetString(aep, AN_text));
      if (ret == STATUS_OK) {
         ret = lGetUlong(aep, AN_status);
      }
   }
Example #7
0
/*-------------------------------------------------------------------------*/
int main(int argc, char **argv)
{
   int ret;
   int my_pid;
   int ret_val;
   int printed_points = 0;
   int max_enroll_tries;
   static char tmp_err_file_name[SGE_PATH_MAX];
   time_t next_prof_output = 0;
   int execd_exit_state = 0;
   lList **master_job_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *alp = NULL;

   DENTER_MAIN(TOP_LAYER, "execd");

#if defined(LINUX)
   gen_procList ();
#endif

   prof_mt_init();

   set_thread_name(pthread_self(),"Execd Thread");

   prof_set_level_name(SGE_PROF_CUSTOM1, "Execd Thread", NULL); 
   prof_set_level_name(SGE_PROF_CUSTOM2, "Execd Dispatch", NULL); 

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   /* This needs a better solution */
   umask(022);
      
   /* Initialize path for temporary logging until we chdir to spool */
   my_pid = getpid();
   sprintf(tmp_err_file_name,"%s."sge_U32CFormat"", TMP_ERR_FILE_EXECD, sge_u32c(my_pid));
   log_state_set_log_file(tmp_err_file_name);

   /* exit func for SGE_EXIT() */
   sge_sig_handler_in_main_loop = 0;
   sge_setup_sig_handlers(EXECD);

   if (sge_setup2(&ctx, EXECD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }
   ctx->set_exit_func(ctx, execd_exit_func);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /* prepare daemonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_prepare(ctx);
   }

   if ((ret=sge_occupy_first_three())>=0) {
      CRITICAL((SGE_EVENT, MSG_FILE_REDIRECTFD_I, ret));
      SGE_EXIT((void**)&ctx, 1);
   }

   lInit(nmv);

   /* unset XAUTHORITY if set */
   if (getenv("XAUTHORITY") != NULL) {
      sge_unsetenv("XAUTHORITY");
   }

   parse_cmdline_execd(argv);   
   
   /* exit if we can't get communication handle (bind port) */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[EXECD],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;

      if (max_enroll_tries <= 0 || shut_me_down) {
         /* exit after 30 seconds */
         if (printed_points != 0) {
            printf("\n");
         }
         CRITICAL((SGE_EVENT, MSG_COM_ERROR));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[EXECD],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
        if (max_enroll_tries < 27) {
           printf(".");
           printed_points++;
           fflush(stdout);
        }
      }
   }

   if (printed_points != 0) {
      printf("\n");
   }

   /*
    * now the commlib up and running. Set execd application status function 
    * ( commlib callback function for qping status information response 
    *   messages (SIRM) )
    */
   ret_val = cl_com_set_status_func(sge_execd_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)) );
   }

   /* test connection */
   {
      cl_com_SIRM_t* status = NULL;
      ret_val = cl_commlib_get_endpoint_status(ctx->get_com_handle(ctx),
                                               (char *)ctx->get_master(ctx, true),
                                               (char*)prognames[QMASTER], 1, &status);
      if (ret_val != CL_RETVAL_OK) {
         ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
         ERROR((SGE_EVENT, MSG_CONF_NOCONFBG));
      }
      cl_com_free_sirm_message(&status);
   }
   
   /* finalize daeamonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_finalize(ctx);
   }

   /* daemonizes if qmaster is unreachable */   
   sge_setup_sge_execd(ctx, tmp_err_file_name);

   /* are we using qidle or not */
   sge_ls_qidle(mconf_get_use_qidle());
   sge_ls_gnu_ls(1);
   
   DPRINTF(("use_qidle: %d\n", mconf_get_use_qidle()));

   /* test load sensor (internal or external) */
   {
      lList *report_list = sge_build_load_report(ctx->get_qualified_hostname(ctx), ctx->get_binary_path(ctx));
      lFreeList(&report_list);
   }
   
   /* here we have to wait for qmaster registration */
   while (sge_execd_register_at_qmaster(ctx, false) != 0) {
      if (sge_get_com_error_flag(EXECD, SGE_COM_ACCESS_DENIED, true)) {
         /* This is no error */
         DPRINTF(("*****  got SGE_COM_ACCESS_DENIED from qmaster  *****\n"));
      }
      if (sge_get_com_error_flag(EXECD, SGE_COM_ENDPOINT_NOT_UNIQUE, false)) {
         execd_exit_state = SGE_COM_ENDPOINT_NOT_UNIQUE;
         break;
      }
      if (shut_me_down != 0) {
         break;
      }
      sleep(30);
   }

   /* 
    * Terminate on SIGTERM or hard communication error
    */
   if (execd_exit_state != 0 || shut_me_down != 0) {
      sge_shutdown((void**)&ctx, execd_exit_state);
      DRETURN(execd_exit_state);
   }

   /*
    * We write pid file when we are connected to qmaster. Otherwise an old
    * execd might overwrite our pidfile.
    */
   sge_write_pid(EXECD_PID_FILE);

   /*
    * At this point we are sure we are the only sge_execd and we are connected
    * to the current qmaster. First we have to report any reaped children
    * that might exist.
    */
   starting_up();

   /*
    * Log a warning message if execd hasn't been started by a superuser
    */
   if (!sge_is_start_user_superuser()) {
      WARNING((SGE_EVENT, MSG_SWITCH_USER_NOT_ROOT));
   }   

#ifdef COMPILE_DC
   if (ptf_init()) {
      CRITICAL((SGE_EVENT, MSG_EXECD_NOSTARTPTF));
      SGE_EXIT((void**)&ctx, 1);
   }
   INFO((SGE_EVENT, MSG_EXECD_STARTPDCANDPTF));
#endif

   master_job_list = object_type_get_master_list(SGE_TYPE_JOB);
   *master_job_list = lCreateList("Master_Job_List", JB_Type);
   job_list_read_from_disk(master_job_list, "Master_Job_List",
                           0, SPOOL_WITHIN_EXECD, 
                          job_initialize_job);
   

   /* clean up jobs hanging around (look in active_dir) */
   clean_up_old_jobs(ctx, 1);
   execd_trash_load_report();
   sge_set_flush_lr_flag(true);

   sge_sig_handler_in_main_loop = 1;

   if (thread_prof_active_by_id(pthread_self())) {
      prof_start(SGE_PROF_CUSTOM1, NULL);
      prof_start(SGE_PROF_CUSTOM2, NULL);
      prof_start(SGE_PROF_GDI_REQUEST, NULL);
   } else {
      prof_stop(SGE_PROF_CUSTOM1, NULL);
      prof_stop(SGE_PROF_CUSTOM2, NULL);
      prof_stop(SGE_PROF_GDI_REQUEST, NULL);
   }

   PROF_START_MEASUREMENT(SGE_PROF_CUSTOM1);

   /* Start dispatching */
   execd_exit_state = sge_execd_process_messages(ctx);


   /*
    * This code is only reached when dispatcher terminates and execd goes down.
    */

   /* log if we received SIGPIPE signal */
   if (sge_sig_handler_sigpipe_received) {
       sge_sig_handler_sigpipe_received = 0;
       INFO((SGE_EVENT, "SIGPIPE received\n"));
   }

#if defined(LINUX)
   free_procList();
#endif
   lFreeList(master_job_list);

   PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM1);
   if (prof_is_active(SGE_PROF_ALL)) {
     time_t now = (time_t)sge_get_gmt();

      if (now > next_prof_output) {
         prof_output_info(SGE_PROF_ALL, false, "profiling summary:\n");
         prof_reset(SGE_PROF_ALL,NULL);
         next_prof_output = now + 60;
      }
   }
   sge_prof_cleanup();

   sge_shutdown((void**)&ctx, execd_exit_state);
   DRETURN(execd_exit_state);
}