コード例 #1
0
ファイル: qdel.c プロジェクト: HPCKP/gridengine
int main(int argc, char **argv) {
   /* lListElem *rep, *nxt_rep, *jep, *aep, *jrep, *idep; */
   int ret = 0;
   lListElem *aep, *idep;
   lList *jlp = NULL, *alp = NULL, *pcmdline = NULL, *ref_list = NULL, *user_list=NULL;
   u_long32 force = 0;
   int wait;
   unsigned long status = 0;
   bool have_master_privileges;
   cl_com_handle_t* handle = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "qdel");

   log_state_set_log_gui(1);

   if (sge_gdi2_setup(&ctx, QDEL, MAIN_THREAD, &alp) != AE_OK) {
      answer_list_output(&alp);
      goto error_exit;
   }

   if (!sge_parse_cmdline_qdel(++argv, environ, &pcmdline, &alp)) {
      /*
      ** high level parsing error! show answer list
      */
      answer_list_output(&alp);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   if (!sge_parse_qdel(&pcmdline, &ref_list, &force, &user_list, &alp)) {
      /*
      ** low level parsing error! show answer list
      */
      answer_list_output(&alp);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   DPRINTF(("force     = "sge_u32"\n", force));
   
   if (user_list) {
      lListElem *id;

      if (lGetNumberOfElem(ref_list) == 0){
         id = lAddElemStr(&ref_list, ID_str, "0", ID_Type);
         lSetList(id, ID_user_list, user_list);
      } else {
         for_each(id, ref_list){
            lSetList(id, ID_user_list, user_list);
         }
      }
   }
コード例 #2
0
int main(int argc, char *argv[])
{

    DENTER_MAIN(CULL_LAYER, "str2nm_converter");
   
    if (argc == 1) {
      printf("Pfirdi God !\n");
      return -1;
    }  
    
    while (argc > 1) {
       printf("%s -> %d\n", argv[argc-1], lStr2NmGenerator(argv[argc-1], nmv));
       argc--;
    }
    DEXIT;
    return 0;
}
コード例 #3
0
int main(int argc, char *argv[]) 
{
   int ret = 0;

   DENTER_MAIN(TOP_LAYER, "test_resource_utilization");

   sge_mt_init();

   lInit(nmv);

   ret += test_normal_utilization();
   ret += test_extensive_utilization();

   if (ret != 0) {
      printf("\ntest failed!\n");
   }

   return ret;
}
コード例 #4
0
int main(int argc, char *argv[])
{
   int pos_tests_failed = 0;
   int neg_tests_failed = 0;
   int i = 0;
   lList *answer_list = NULL;


   filter_test_t positiv_test[] = {
      {"num_proc", 4}, 
      {"$num_proc", 4},
      {"$num_proc*2", 8},
      {"$num_proc*0.5", 2.0},
      {"num_proc*2", 8},
      {"num_proc+1", 5}, 
      {"$num_proc-2", 2}, 
      {"$num_proc+0.1", 4.1}, 
      {"1+$num_proc+0.1", 5.1}, 
      {NULL, 0}
   };

   filter_test_t negativ_test[] = {
      {"2*num_proc", 0}, 
      {"2,0+num_proc", 0}, 
      {"none", 0}, 
      {NULL, 0}
   };

   lList *centry_list;
   lList *host_centry_list;
   lListElem *centry;
   lListElem *host;

   DENTER_MAIN(TOP_LAYER, "test_sge_load_formula");

   lInit(nmv);

   /* set up centry */
   centry_list = lCreateList("", CE_Type);
   centry = lCreateElem(CE_Type);
   lSetString(centry, CE_name, "num_proc");
   lSetString(centry, CE_stringval, "4");
   lSetDouble(centry, CE_doubleval, 4);
   lAppendElem(centry_list, centry);

   /* set up host */
   host_centry_list = lCreateList("", CE_Type);
   lAppendElem(host_centry_list, lCopyElem(centry));
   host = lCreateElem(EH_Type);
   lSetList(host, EH_consumable_config_list, host_centry_list);

   for (i=0; ; i++){
      double val;
      if (positiv_test[i].formula == NULL) {
         break;
      }
      if (!validate_load_formula(positiv_test[i].formula, &answer_list, centry_list, "load_formula")) {
         answer_list_output(&answer_list);
         pos_tests_failed++;
      }

      val = scaled_mixed_load(positiv_test[i].formula, NULL, host, centry_list);
      if (val != positiv_test[i].value) {
         printf("got %f, but expected %f(%g,%g)\n", val, positiv_test[i].value, val, positiv_test[i].value);
         pos_tests_failed++;
      }
   }
   
   for (i=0; ; i++){
     if (negativ_test[i].formula == NULL) {
         break;
      }
      if (validate_load_formula(negativ_test[i].formula, &answer_list, centry_list, "load_formula") == true) {
         printf("load_formula \"%s\" returned no error\n", negativ_test[i].formula);
         neg_tests_failed++;
      }
      lFreeList(&answer_list);
   }

   lFreeList(&centry_list);
   lFreeElem(&host);

   printf("\n");
   printf("%d positiv test(s) failed\n", pos_tests_failed);
   printf("%d negativ test(s) failed\n", neg_tests_failed);

   DRETURN(pos_tests_failed + neg_tests_failed);
}
コード例 #5
0
int main(int argc, char *argv[])
{
   const char *url;
   int i, threads;
   pthread_t *t;
   int *args;

   lList *answer_list = NULL;
   lListElem *spooling_context;

   DENTER_MAIN(TOP_LAYER, "test_berkeleydb_mt");

   /* parse commandline parameters */
   if (argc < 3) {
      ERROR((SGE_EVENT, "usage: test_berkeleydb_mt <url> <threads> [<delay>]\n"));
      ERROR((SGE_EVENT, "       <url>     = path or host:database\n"));
      ERROR((SGE_EVENT, "       <threads> = number of threads\n"));
      ERROR((SGE_EVENT, "       <delay>   = delay after writing [ms]\n"));
      SGE_EXIT(NULL, 1);
   }

   url = argv[1];
   threads = atoi(argv[2]);

   if (argc > 3) {
      delay = atoi(argv[3]);
   }

   /* allocate memory for pthreads and arguments */
   t = (pthread_t *)malloc(threads * sizeof(pthread_t));
   args = (int *)malloc(threads * sizeof(int));

   DPRINTF(("writing to database %s from %d threads\n", url, threads));

   /* initialize spooling */
   spooling_context = spool_create_dynamic_context(&answer_list, NULL, url, NULL);
   answer_list_output(&answer_list);
   if (spooling_context == NULL) {
      SGE_EXIT(NULL, EXIT_FAILURE);
   }

   spool_set_default_context(spooling_context);

   if (!spool_startup_context(&answer_list, spooling_context, true)) {
      answer_list_output(&answer_list);
      SGE_EXIT(NULL, EXIT_FAILURE);
   }
   answer_list_output(&answer_list);

   /* let n threads to parallel spooling */
   for (i = 0; i < threads; i++) {
      args[i] = i + 1;     
      pthread_create(&(t[i]), NULL, work, (void*)(&args[i]));
   }

   /* also work in current thread */
   work((void *)0);

   /* wait for termination of all threads */
   for (i = 0; i < threads; i++) {
      pthread_join(t[i], NULL);
   }

   /* shutdown spooling */
   spool_shutdown_context(&answer_list, spooling_context);
   answer_list_output(&answer_list);

   sge_free(&t);

   DEXIT;
   return EXIT_SUCCESS;
}
コード例 #6
0
ファイル: qmon_main.c プロジェクト: HPCKP/gridengine
/*-------------------------------------------------------------------------*/
int main(
int argc,
char **argv 
) {
   Widget StartupWindow = 0;
   Arg  args[10];
   Cardinal ac = 0;
#ifdef L10N
   char *lang;
#endif   
/*    static char app_name[1024]; */

   int i;
   XrmDatabase qmon_database;
   static char progname[256];

   DENTER_MAIN(TOP_LAYER, "qmon_main");

#ifndef L10N
   setlocale(LC_ALL, "C");
   putenv("LANG=C"); 
   putenv("LC_ALL=C"); 
#endif

   /* INSTALL SIGNAL HANDLER */
   qmonInstSignalHandler();

   strcpy(progname, argv[0]);

   /* GENERAL SGE SETUP */
   if (!(argc > 1 && !strcmp(argv[1], "-help"))) {
      qmonInitSge(&ctx, progname, 0);
   } else {  
      /* -help */
      qmonInitSge(&ctx, progname, 1);
   }

   SGE_ROOT = ctx->get_sge_root(ctx);

   /*
   ** Attention !!! Change the XtMalloc() above if you add additional args
   */
   ac = 0;
   XtSetArg(args[ac], XmtNconfigDir, SGE_ROOT); ac++;
   XtSetArg(args[ac], XmtNconfigPath, "%R/locale/%L/%N%S:%R/locale/%l/%N%S:%R/locale/%l_%t.%c/%N%S:%R/qmon/%N%S"); ac++;
/*    XtSetArg(args[ac], XmtNpixmapFilePath, "%R/qmon/PIXMAPS/%N.xpm"); ac++; */
/*    XtSetArg(args[ac], XmtNcontextHelpFile, "qmon_help"); ac++; */
   XtSetArg(args[ac], XtNtitle, "QMON +++ Main Control"); ac++;
   
   /* 
   ** SETUP XMT, here qmon_version is checked, 
   ** so here an exit is possible 
   */
   AppShell = XmtInitialize( &AppContext, APP_NAME,
                             NULL, 0,
                             &argc, argv, 
                             qmon_fallbacks,
                             args, ac);

   sigint_id = XtAppAddSignal(AppContext, sigint_callback, NULL);
   
#if 0
   /*
   ** protocoll the actions performed by qmon
   */
   XtAppAddActionHook(AppContext, TraceActions, NULL);
#endif

#ifdef L10N
   /*
   ** Internationalization:
   ** The qmon_messages.ad file is installed under 
   ** $SGE_ROOT/qmon/locale/<LANG>/qmon_messages.ad
   ** Read in the _Messages_ catalogue
   */
   if (((lang = getenv("LC_MESSAGES")) || (lang = getenv("LC_ALL")) ||
         (lang = getenv("LANG"))) && lang && strcasecmp(lang, "POSIX") &&
         strcasecmp(lang, "C")) {
      DPRINTF(("lang: '%s'\n", lang));
      if (!strcasecmp(lang, "relabel"))   
         lang = "C";
      XmtLoadResourceFile(AppShell, "qmon_messages", False, True);
   }   
#endif

#if 0   
   strcpy(app_name, "QMON +++ Main Control");
   if (strcmp(uti_state_get_default_cell(), "default")) {
      strcat(app_name, " @ ");
      strncat(app_name, uti_state_get_default_cell(), 1000);
   }

   XtVaSetValues(AppShell, 
              XtNtitle, XmtLocalize(AppShell, app_name,
                                    "QMON +++ Main Control"), NULL);
#endif   
   XtVaSetValues(AppShell, 
              XtNtitle, XmtLocalize(AppShell, "QMON +++ Main Control",
                                    "QMON +++ Main Control"), NULL);
   
   /*
   ** we must shift the usage here for internationalization
   */
   if (helpset) {
      qmonUsage(AppShell);
      qmonExitFunc(0);
   }
   
   /* 
   ** get the dialog resource files, they override any settings from the
   ** Qmon app default file concerning dialogue descriptions
   */
   qmon_database = XtDatabase(XtDisplay(AppShell));
   for (i=0; qmon_dialogs[i]; i++) {
      XrmPutLineResource(&qmon_database, qmon_dialogs[i]);
   }
#if 0
   /*
   ** Debugging:
   ** write contents of Resource DB to file DB.TXT in cwd
   */
   XrmPutFileDatabase(qmon_database, "DB.TXT");
#endif   

   /* 
   ** read qmon preferences file ~/.qmon_preferences, it contains
   ** customization info for Queue and Job Control dialogues
   */
   qmonReadPreferences();
   
   /*
   ** display of startup screen ?
   */
   if (!nologo) {
      /* show the user we're starting up */
      StartupWindow = qmonStartupWindow(AppShell);
   }
   
   /* 
   ** INITIALIZE Graphics Contexts 
   */
   qmonCreateGC(AppShell);

   /* 
   ** Allocate Pixel values 
   */
   qmonAllocColor(AppShell);

   /* 
   ** Cache all Icons 
   */
   qmonLoadIcons();

   /* 
   ** set the close button callback 
   ** cause the close button to call the qmonExitCB() 
   ** set the icon and iconName after qmonLoadIcons()
   */
   XmtCreatePixmapIcon(AppShell, qmonGetIcon("mcicon"), None);
   XtVaSetValues(AppShell, XtNiconName, "qmon:Main Control", NULL);
   XmtAddDeleteCallback(AppShell, XmDO_NOTHING, qmonExitCB, NULL);

   /* 
   ** CREATE MainControl 
   */
   MainControl = qmonCreateMainControl(AppShell);

   /* 
   ** install context help 
   */
   XmtHelpInstallContextHelp(AppShell, XmtHelpContextHelpCallback, NULL);
/*    XmtHelpParseFile(AppShell, "qmon_help"); */


   /* 
   ** initialize QmonMirrorList entries 
   */
   qmonMirrorListInit();
   
   /* 
   ** setup timers 
   */
   qmonStartPolling(AppContext);
   
#ifdef HAS_EDITRES
    /* 
    ** Plug in editres protocol handler 
    */
    XtAddEventHandler (AppShell, (EventMask)0, True,
        _XEditResCheckMessages, (XtPointer)NULL);
#endif


   /* 
   ** Popdown startup screen and destroy it
   */
   if (!nologo) {
      sleep(1);
      XtDestroyWidget(StartupWindow);
   }   


   XtRealizeWidget(AppShell);
   XtAppMainLoop(AppContext);

   return 0;
}
コード例 #7
0
ファイル: qrstat.c プロジェクト: valhallasw/son-of-gridengine
int main(int argc, char **argv) {
   int ret = 0;
   lList *pcmdline = NULL;
   lList *answer_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   qrstat_env_t qrstat_env;

   DENTER_MAIN(TOP_LAYER, "qrsub");

   /* Set up the program information name */
   sge_setup_sig_handlers(QRSTAT);

   log_state_set_log_gui(1);

   if (sge_gdi2_setup(&ctx, QRSTAT, MAIN_THREAD, &answer_list) != AE_OK) {
      answer_list_output(&answer_list);
      goto error_exit;
   }

   qrstat_filter_init(&qrstat_env);
   qrstat_filter_set_ctx(&qrstat_env, ctx);

   /*
    * stage 1: commandline parsing
    */
   {
      dstring file = DSTRING_INIT;
      const char *user = ctx->get_username(ctx);
      const char *cell_root = ctx->get_cell_root(ctx);

      /* arguments from SGE_ROOT/common/sge_qrstat file */
      get_root_file_path(&file, cell_root, SGE_COMMON_DEF_QRSTAT_FILE);
      if (sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list) == true) {
         /* arguments from $HOME/.sge_qrstat file */
         if (get_user_home_file_path(&file, SGE_HOME_DEF_QRSTAT_FILE, user, &answer_list)) {
            sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list);
         }
      }
      sge_dstring_free(&file); 

      if (answer_list) {
         answer_list_output(&answer_list);
         lFreeList(&pcmdline);
         sge_prof_cleanup();
         SGE_EXIT((void**)&ctx, 1);
      }
   }

   answer_list = cull_parse_cmdline(QRSTAT, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS);
   if (answer_list != NULL) {
      answer_list_output(&answer_list);
      lFreeList(&pcmdline);
      goto error_exit;
   }
  
   /* 
    * stage 2: evalutate switches and modify qrstat_env
    */
   if (!sge_parse_qrstat(ctx, &answer_list, &qrstat_env, &pcmdline)) {
      answer_list_output(&answer_list);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   /* 
    * stage 3: fetch data from master 
    */
   {
      answer_list = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_GET, &qrstat_env.ar_list, 
                     qrstat_env.where_AR_Type, qrstat_env.what_AR_Type, false);

      if (answer_list_has_error(&answer_list)) {
         answer_list_output(&answer_list);
         goto error_exit;
      }
   }

   /*
    * stage 4: create output in correct format
    */
   {
      qrstat_report_handler_t *handler = NULL;

      if (qrstat_env.is_xml) {
         handler = qrstat_create_report_handler_xml(&qrstat_env, &answer_list);
      } else {
         handler = qrstat_create_report_handler_stdout(&qrstat_env, &answer_list);
      }
      if (!qrstat_print(&answer_list, handler, &qrstat_env)) {
         ret = 1;
      }
      if (qrstat_env.is_xml) {
         qrstat_destroy_report_handler_xml(&handler, &answer_list); 
      } else {
         qrstat_destroy_report_handler_stdout(&handler, &answer_list); 
      }
   }

   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   DRETURN(ret);

error_exit:
   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   DRETURN(1);
}
コード例 #8
0
ファイル: qsub.c プロジェクト: BlueBolt/BB_GridEngine
int 
main(int argc, char **argv) 
{
   lList *opts_cmdline = NULL;
   lList *opts_defaults = NULL;
   lList *opts_scriptfile = NULL;
   lList *opts_all = NULL;
   lListElem *job = NULL;
   lList *alp = NULL;
   lListElem *ep;
   int exit_status = 0;
   int just_verify;
   int tmp_ret;
   int wait_for_job = 0, is_immediate = 0;
   dstring session_key_out = DSTRING_INIT;
   dstring diag = DSTRING_INIT;
   dstring jobid = DSTRING_INIT;
   u_long32 start, end, step;
   u_long32 num_tasks;
   int count, stat;
   char *jobid_string = NULL;
   bool has_terse;
   drmaa_attr_values_t *jobids = NULL;

   u_long32 prog_number = 0;
   u_long32 myuid = 0;
   const char *sge_root = NULL;
   const char *cell_root = NULL;
   const char *username = NULL;
   const char *qualified_hostname = NULL;
   const char *unqualified_hostname = NULL;
   const char *mastername = NULL;

   DENTER_MAIN(TOP_LAYER, "qsub");

   prof_mt_init();

   /* Set up the program information name */
   sge_setup_sig_handlers(QSUB);

   DPRINTF(("Initializing JAPI\n"));

   if (japi_init(NULL, NULL, NULL, QSUB, false, NULL, &diag)
                                                      != DRMAA_ERRNO_SUCCESS) {
      fprintf(stderr, "\n");
      fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
              sge_dstring_get_string(&diag));
      fprintf(stderr, "\n");
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   prog_number = ctx->get_who(ctx);
   myuid = ctx->get_uid(ctx);
   sge_root = ctx->get_sge_root(ctx);
   cell_root = ctx->get_cell_root(ctx);
   username = ctx->get_username(ctx);
   qualified_hostname = ctx->get_qualified_hostname(ctx);
   unqualified_hostname = ctx->get_unqualified_hostname(ctx);
   mastername = ctx->get_master(ctx, false);

   /*
    * read switches from the various defaults files
    */
   opt_list_append_opts_from_default_files(prog_number, cell_root, username, &opts_defaults, &alp, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * append the commandline switches to the list
    */
   opt_list_append_opts_from_qsub_cmdline(prog_number, &opts_cmdline, &alp,
                                          argv + 1, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_QSUB_WARNING_S);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * show usage if -help was in commandline
    */
   if (opt_list_has_X(opts_cmdline, "-help")) {
      sge_usage(QSUB, stdout);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   /*
    * We will only read commandline options from scripfile if the script
    * itself should not be handled as binary
    */
   if (opt_list_is_X_true(opts_cmdline, "-b") ||
       (!opt_list_has_X(opts_cmdline, "-b") &&
        opt_list_is_X_true(opts_defaults, "-b"))) {
      DPRINTF(("Skipping options from script due to -b option\n"));
   } else {
      opt_list_append_opts_from_script(prog_number,
                                       &opts_scriptfile, &alp, 
                                       opts_cmdline, environ);
      tmp_ret = answer_list_print_err_warn(&alp, NULL, MSG_QSUB_COULDNOTREADSCRIPT_S,
                                           MSG_WARNING);
      if (tmp_ret > 0) {
         DEXIT;
         SGE_EXIT((void**)&ctx, tmp_ret);
      }
   }

   /*
    * Merge all commandline options and interprete them
    */
   opt_list_merge_command_lines(&opts_all, &opts_defaults, 
                                &opts_scriptfile, &opts_cmdline);

   /*
    * Check if -terse is requested
    */
   has_terse = opt_list_has_X(opts_all, "-terse");

   /* If "-sync y" is set, wait for the job to end. */   
   /* Remove all -sync switches since cull_parse_job_parameter()
    * doesn't know what to do with them. */
   while ((ep = lGetElemStr(opts_all, SPA_switch, "-sync"))) {
      if (lGetInt(ep, SPA_argval_lIntT) == TRUE) {
         wait_for_job = 1;
      }
      
      lRemoveElem(opts_all, &ep);
   }

   if (wait_for_job) {
      DPRINTF(("Wait for job end\n"));
   }

   alp = cull_parse_job_parameter(myuid, username, cell_root, unqualified_hostname, 
                                  qualified_hostname, opts_all, &job);

   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   if (set_sec_cred(sge_root, mastername, job, &alp) != 0) {
      answer_list_output(&alp);
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   /* Check if job is immediate */
   is_immediate = (int)JOB_TYPE_IS_IMMEDIATE(lGetUlong(job, JB_type));
   DPRINTF(("Job is%s immediate\n", is_immediate ? "" : " not"));

   DPRINTF(("Everything ok\n"));

   if (lGetUlong(job, JB_verify)) {
      cull_show_job(job, 0, false);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   if (is_immediate || wait_for_job) {
      pthread_t sigt;
      
      qsub_setup_sig_handlers(); 

      if (pthread_create(&sigt, NULL, sig_thread, (void *)NULL) != 0) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 " error preparing signal handling thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
      
      if (japi_enable_job_wait(username, unqualified_hostname, NULL, &session_key_out, error_handler, &diag) ==
                                       DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
         const char *msg = sge_dstring_get_string(&diag);
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 msg?msg:" error starting event client thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
   }
   
   job_get_submit_task_ids(job, &start, &end, &step);
   num_tasks = (end - start) / step + 1;

   if (num_tasks > 1) {
      int error = japi_run_bulk_jobs(&jobids, &job, start, end, step, &diag);
      if (error != DRMAA_ERRNO_SUCCESS) {
         /* No active session here means that japi_enable_job_wait() was
          * interrupted by the signal handler, in which case we just break out
          * quietly. */
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the JAPI error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      DPRINTF(("job id is: %ld\n", jobids->it.ji.jobid));
      
      jobid_string = get_bulk_jobid_string((long)jobids->it.ji.jobid, start, end, step);
   }
   else if (num_tasks == 1) {
      int error = japi_run_job(&jobid, &job, &diag);
      
      if (error != DRMAA_ERRNO_SUCCESS) {
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the DRMAA error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      jobid_string = strdup(sge_dstring_get_string(&jobid));
      DPRINTF(("job id is: %s\n", jobid_string));

      sge_dstring_free(&jobid);
   }
   else {
      fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, "invalid task structure");
      fprintf(stderr, "\n");
      
      exit_status = 1;
      goto Error;
   }
  
   /* only success message is printed to stdout */

   just_verify = (lGetUlong(job, JB_verify_suitable_queues)==JUST_VERIFY || 
                  lGetUlong(job, JB_verify_suitable_queues)==POKE_VERIFY);
   DPRINTF(("Just verifying job\n"));

   if (!just_verify) {
      const char *output = sge_dstring_get_string(&diag); 

      /* print the tersed output */
      if (has_terse) {
         printf("%s", jobid_string);
      } else if (output != NULL) {
        printf("%s", output);
      } else {
        printf(MSG_QSUB_YOURJOBHASBEENSUBMITTED_SS, jobid_string, lGetString(job, JB_job_name));
      }
      printf("\n");
   } else {
      printf(MSG_JOB_VERIFYFOUNDQ);
      printf("\n");
   }   

   if ((wait_for_job || is_immediate) && !just_verify) {
      int event;

      if (is_immediate) {
         fprintf(stderr, "%s\n", MSG_QSUB_WAITINGFORIMMEDIATEJOBTOBESCHEDULED);

         /* We only need to wait for the first task to be scheduled to be able
          * to say that the job is running. */
         tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                             DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_START, &event,
                             NULL, &diag);

         if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_START)) {
            fprintf(stderr, "\n");
            fprintf(stderr, MSG_QSUB_YOURIMMEDIATEJOBXHASBEENSUCCESSFULLYSCHEDULED_S,
                  jobid_string);
            fprintf(stderr, "\n");
         }
         /* A job finish event here means that the job was rejected. */
         else if ((tmp_ret == DRMAA_ERRNO_SUCCESS) &&
                  (event == JAPI_JOB_FINISH)) {
            fprintf(stderr, "\n%s\n", MSG_QSUB_YOURQSUBREQUESTCOULDNOTBESCHEDULEDDTRYLATER);
            
            exit_status = 1;
            goto Error;
         }
         else {
         /* Since we told japi_wait to wait forever, we know that if it gets
          * a timeout, it's because it's been interrupted to exit, in which
          * case we don't complain.  Same for no active session. */
            if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
               fprintf(stderr, "\n");
               fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S,
                       sge_dstring_get_string(&diag));
               fprintf(stderr, "\n");
            }

            exit_status = 1;
            goto Error;
         }
      }
         
      if (wait_for_job) {
         /* Rather than using japi_synchronize on ALL for bulk jobs, we use
          * japi_wait on ANY num_tasks times because with synchronize, we would
          * have to wait for all the tasks to finish before we know if any
          * finished. */
         for (count = 0; count < num_tasks; count++) {
            /* Since there's only one running job in the session, we can just
             * wait for ANY. */
            if ((tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                          DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_FINISH, &event,
                          NULL, &diag)) != DRMAA_ERRNO_SUCCESS) {
               if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                   (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
                  fprintf(stderr, "\n");
                  fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag));
                  fprintf(stderr, "\n");
               }
               
               exit_status = 1;
               goto Error;
            }
            
            /* report how job finished */
            /* If the job is an array job, use the first non-zero exit code as
             * the exit code for qsub. */
            if (exit_status == 0) {
               exit_status = report_exit_status(stat,
                                              sge_dstring_get_string(&jobid));
            }
            /* If we've already found a non-zero exit code, just print the exit
             * info for the task. */
            else {
               report_exit_status(stat, sge_dstring_get_string(&jobid));
            }               
         }
      }
   }

Error:
   FREE(jobid_string);
   lFreeList(&alp);
   lFreeList(&opts_all);
   
   if ((tmp_ret = japi_exit(JAPI_EXIT_NO_FLAG, &diag)) != DRMAA_ERRNO_SUCCESS) {
      if (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTFINALIZEENV_S, sge_dstring_get_string(&diag));
         fprintf(stderr, "\n");
      }
      else {
         struct timespec ts;
         /* We know that if we get a DRMAA_ERRNO_NO_ACTIVE_SESSION here, it's
          * because the signal handler thread called japi_exit().  We know this
          * because if the call to japi_init() fails, we just exit directly.
          * If the call to japi_init() succeeds, then we have an active session,
          * so coming here because of an error would not result in the
          * DRMAA_ERRNO_NO_ACTIVE_SESSION error. */
         DPRINTF(("Sleeping for 15 seconds to wait for the exit to finish.\n"));
         
         sge_relative_timespec(15, &ts);
         sge_mutex_lock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
         
         while (!exited) {
            if (pthread_cond_timedwait(&exit_cv, &exit_mutex, &ts) == ETIMEDOUT) {
               DPRINTF(("Exit has not finished after 15 seconds.  Exiting.\n"));
               break;
            }
         }
         
         sge_mutex_unlock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
      }
   }

   sge_prof_cleanup();

   /* This is an exit() instead of an SGE_EXIT() because when the qmaster is
    * supended, SGE_EXIT() hangs. */
   exit(exit_status);
   DEXIT;
   return exit_status;
}
コード例 #9
0
ファイル: example2.c プロジェクト: HPCKP/gridengine
int main(int argc, char *argv[])
{
   lList *queuelist = NULL, *joblist = NULL;

   enum {
      MATCH_REQUEST,
      LOOP_JOBS_QUEUES
/*                              SUBWHERE */
   };
   int scene, fulfilled, numdiddeldum;

   lList *erglist = NULL;
   lListElem *job, *queue;
   lCondition *where = NULL;

   lEnumeration *w0, *w1, *nothing, *all;
   lList *queuecomplexes = NULL, *attributes = NULL;
   lListElem *attribute, *request;
   const char *operator;

   DENTER_MAIN(TOP_LAYER, "example2");

   if (argc != 3)
      usage();

   sscanf(argv[1], "%d", &scene);
   sscanf(argv[2], "%d", &numdiddeldum);

   /* neccessary for comfortable output  */
   lInit(nmv);

   queuelist = buildQueueList();
   printf("\n\nQUEUELIST\n\n");
   lWriteList(queuelist);

   COMPLEXLIST = buildComplexList();
   printf("\n\nCOMPLEXLIST\n\n");
   lWriteList(COMPLEXLIST);

   joblist = buildJobList(numdiddeldum);
   printf("\n\nJOBLIST\n\n");
   lWriteList(joblist);

   printf("\n******** BEGIN PROCESSING *********\n\n");

   switch (scene) {

   case MATCH_REQUEST:

      /* 
         find for each job in the joblist all queues that 
         suffer the request of this job 
         ( implemented with quick list functions )
       */

      for_each(job, joblist) {

         printf("\n-------------------------------"
                "-------------------------------\n");
         printf("*** job %s may get started in the following queues ***\n\n",
                lGetString(job, J_name));

         for_each(queue, queuelist) {
            if (matchRequest(lGetList(queue, Q_complexname),
                             lGetList(job, J_hardrequest))) {
               if (!erglist)
                  if (!(erglist = lCreateList("erglist", QueueT))) {
                     printf("case MATCH_REQUEST: lCreateList"
                            " failure\n");
                     exit(-1);
                  }
               lAppendElem(erglist, lCopyElem(queue));
            }
         }
         printf("\n\n**** ERGLIST ****\n\n");
         if (erglist) {
            lWriteList(erglist);
            lFreeList(&erglist);
            erglist = NULL;
         }
      }

      break;

   case LOOP_JOBS_QUEUES:

      /* 
         find for each job in the joblist all queues that 
         suffer the request of this job 
         ( implemented with mighty database-like functions )
       */

      for_each(job, joblist) {

         printf("\n--------------------------------------------------------------\n");
         printf("*** job %s may get started in the following queues ***\n\n",
                lGetString(job, J_name));

         for_each(queue, queuelist) {

            /*
               build a list of the complexes of the queue

               therefore: build a subset of the complex list 
               with the queues complex name list as a selector

               join the complex name list of the queue      
               ( join field: N_complexname) 
               with the global complex list                                         
               ( join field: C_name )

               select nothing from the queue's complex name list (w0)
               and only the attributes of the global complex list (w1)

               every valid combination is needed, so the 
               where parameter is NULL for both lists 
             */

            w0 = lWhat("%T(NONE)", ComplexNameT);       /* NIX */
            w1 = lWhat("%T(%I)", ComplexT, C_attribute);

            queuecomplexes = lJoin("queuecomplexes",
                 N_complexname, lGetList(queue, Q_complexname), NULL, w0,
                                   C_name, COMPLEXLIST, NULL, w1);

            lFreeWhat(&w0);
            lFreeWhat(&w1);

            /* 
               try to find a hard request of this job 
               that is not fulfilled by the queue's complexes
             */
            fulfilled = 1;

            for_each(request, lGetList(job, J_hardrequest)) {

               /* 
                  build a flat complex attribute list with only
                  these attributes of the request

                  this produces a attribute list of all complexes 
                */

               nothing = lWhat("%T(NONE)", lGetListDescr(queuecomplexes));
               all = lWhat("%T(ALL)", ComplexAttributeT);
               where = lWhere("%T( %I == %s )", ComplexAttributeT,
                              A_name,
                              lGetString(request, R_name));

               attributes = lJoinSublist("attributelist",
                              C_attribute, queuecomplexes, NULL, nothing,
                                         ComplexAttributeT, where, all);

               lFreeWhere(&where);
               lFreeWhat(&nothing);
               lFreeWhat(&all);

               /* 
                  if we get an empty list then the queue has 
                  no complex fulfilling the request of this job
                */
               /* 
                  right now the lJoinSublist function returns
                  an empty list (no elements) if there was no sublist
                */
               if (lGetNumberOfElem(attributes) == 0) {
                  fulfilled = 0;
                  break;        /* leave request loop */
               }

               /* 
                  if there are attributes the values of at least one
                  complex of the queue must fulfill the request
                */
               for_each(attribute, attributes) {

                  operator = lGetString(request, R_operator);

                  if (strcmp(operator, "==") == 0) {
                     fulfilled = (!strcmp(
                                          lGetString(attribute, A_value),
                                          lGetString(request, R_value)));

                  }
                  else if (strcmp(operator, "!=") == 0) {
                     fulfilled = (strcmp(
                                           lGetString(attribute, A_value),
                                           lGetString(request, R_value)));

                  }             /* else if .. ( for all operators ) */

                  if (fulfilled)
                     break;     /* leave attribute loop */
               }
コード例 #10
0
ファイル: test_heartbeat.c プロジェクト: HPCKP/gridengine
int main(int argc, char* argv[])
{
   int return_value = 0;
   int i;
   int runs = 0;
   char* filename = "test.txt";
   int   timeout  = 15;
   int todo = 0;
   struct timeval now;
   struct timeval last_time;
   int do_stop = 0;
   int beat_val;
   int only_write = 0;

   DENTER_MAIN(TOP_LAYER, "test_sge_qmaster_heartbeat");

   /* initialize last_time */
   gettimeofday(&last_time, NULL);

   if (argc==3) {
      if (strcmp(argv[1],"-only-write") == 0) {
         printf("only writing heartbeat file once\n");
         only_write=1;
         filename = argv[2];
      }
   }

   if ( only_write == 0) {
      /* delete file */
      unlink(filename);
   }
   
   /* now run till we start from 1 */
   while ( do_stop == 0 ) {
      return_value = inc_qmaster_heartbeat(filename, timeout, &beat_val);
      i            = get_qmaster_heartbeat(filename, timeout);
      if ( only_write == 1) {
         printf("incremented heartbeat file %s\n", filename);
         printf("heartbeat value is %d\n", i);
         exit(0);
      }

      todo++;
      if (beat_val != i) {
         printf("heartbeat value not correct\n");
         do_stop = 1;
         return_value = 20;
      }

      if (i <= 0) {
         printf("get_qmaster_heartbeat() returned %d\n", i);
         return_value = -100 + i;
      } else {
         if ( return_value != 0) {
            printf("(%d) inc_qmaster_heartbeat() returned %d\n", i, return_value);
         }
      }

      /* on error:  
       *
       * exit value > 100:   get_qmaster_hearbeat() returned:   - (exit value - 100)
       * exit value < 100:   inc_qmaster_heartbeat() returned:  - (exit value)
       * exit value == 20:   unexpected heartbeat value
       */
      if (return_value != 0) {
         unlink(filename);
         DEXIT;
         return (-return_value);
      }
      if (i==1 && runs++ != 0) {
         do_stop = 1;
      }
      gettimeofday(&now,NULL);
      if (now.tv_sec != last_time.tv_sec || do_stop != 0 ) {
         printf("%6.2f %% done\n", (double)(((double)todo/99999.0)*100.0));
         fflush(stdout);
         last_time.tv_sec = now.tv_sec;
      }
   }
   
   /* delete file */
   unlink(filename);

   DEXIT;
   return 0;
} /* main() */
コード例 #11
0
int main(int argc, char *argv[])
{
   double avg[3];
   int loads;
   char *name = NULL;
#if defined(PLPA_LINUX) || defined(BINDING_SOLARIS)
   dstring msocket   = DSTRING_INIT;
   dstring mcore     = DSTRING_INIT;
   dstring mtopology = DSTRING_INIT;
#endif

#ifdef SGE_LOADMEM
   sge_mem_info_t mem_info;
#endif

#ifdef SGE_LOADCPU
	double total = 0.0;	
#endif

   int i, pos = 0, print_as_int = 0, precision = 0, core_binding = 0;
   char *m = "";

#ifndef WINDOWS
   DENTER_MAIN(TOP_LAYER, "loadcheck");
#endif

#ifdef __SGE_COMPILE_WITH_GETTEXT__   
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */
   if (argc == 2 && !strcmp(argv[1], "-cb")) {
      core_binding = 1;
   } else {
      for (i = 1; i < argc;) {
         if (!strcmp(argv[i], "-int"))
            print_as_int = 1;
         else if (!strcmp(argv[i], "-loadval")) {
            if (i + 1 < argc)
               pos=i+1;
            else
               usage();
            i++;
         }
         else
            usage();
         i++;
      }
   }   
   
   if (core_binding) {
      check_core_binding();
#ifndef WINDOWS
      DEXIT;
#endif
      return 1;
   } else if (print_as_int) {
      m = "";
      precision = 0;
   }   
   else {
      m = "M";
      precision = 6;
   }   

   if ((pos && !strcmp("arch", argv[pos])) || !pos) {
      const char *arch = "";
#if defined(WINDOWS)
      arch = "win32-x86";
#else
      arch = sge_get_arch();
#endif 
      printf("arch            %s\n", arch);
   }
      
   if ((pos && !strcmp("num_proc", argv[pos])) || !pos) {
      int nprocs = 1;
#if defined(WINDOWS)
      SYSTEM_INFO system_info;
      char        buf[100];

      GetSystemInfo(&system_info);
      nprocs = system_info.dwNumberOfProcessors;
      sprintf(buf, "num_proc        %d", nprocs);
      fflush(stdout);
      write(1, (const void*)buf, (unsigned int)strlen(buf));
      write(1, (const void*)"\0x0a", (unsigned int)1);
#else
      nprocs = sge_nprocs();
      printf("num_proc        %d\n", nprocs);
#endif
   }

#if defined(PLPA_LINUX) || defined(BINDING_SOLARIS)
   fill_socket_core_topology(&msocket, &mcore, &mtopology);
   if ((pos && !strcmp("m_socket", argv[pos])) || !pos) {
      printf("m_socket        %s\n", sge_dstring_get_string(&msocket));
   }
   if ((pos && !strcmp("m_core", argv[pos])) || !pos) {
      printf("m_core          %s\n", sge_dstring_get_string(&mcore));
   }
   if ((pos && !strcmp("m_topology", argv[pos])) || !pos) {
      printf("m_topology      %s\n", sge_dstring_get_string(&mtopology));
   }   
#else 
   if ((pos && !strcmp("m_socket", argv[pos])) || !pos) {
      printf("m_socket        -\n");
   }
   if ((pos && !strcmp("m_core", argv[pos])) || !pos) {
      printf("m_core          -\n");
   }
   if ((pos && !strcmp("m_topology", argv[pos])) || !pos) {
      printf("m_topology      -\n");
   }   
#endif 

#if defined(WINDOWS)
   loads = 0;
   avg[0] = avg[1] = avg[2] = 0;
#else
	loads = sge_getloadavg(avg, 3);
#endif

   if (loads>0 && ((pos && !strcmp("load_short", argv[pos])) || !pos)) 
      printf("load_short      %.2f\n", avg[0]);
   if (loads>1 && ((pos && !strcmp("load_medium", argv[pos])) || !pos)) 
      printf("load_medium     %.2f\n", avg[1]);
   if (loads>2 && ((pos && !strcmp("load_long", argv[pos])) || !pos))
      printf("load_long       %.2f\n", avg[2]);
      
   if (pos)
      name = argv[pos];
   else
      name = NULL;

#ifdef SGE_LOADMEM
   /* memory load report */
   memset(&mem_info, 0, sizeof(sge_mem_info_t));
   if (sge_loadmem(&mem_info)) {
      fprintf(stderr, "%s\n", MSG_SYSTEM_RETMEMORYINDICESFAILED);
#ifndef WINDOWS
      DEXIT;
#endif
#if defined(PLPA_LINUX) || defined(BINDING_SOLARIS)
      sge_dstring_free(&mcore);
      sge_dstring_free(&msocket);
      sge_dstring_free(&mtopology);
#endif
      return 1;
   }

   print_mem_load(LOAD_ATTR_MEM_FREE, name, precision, mem_info.mem_free, m); 
   print_mem_load(LOAD_ATTR_SWAP_FREE, name, precision, mem_info.swap_free, m); 
   print_mem_load(LOAD_ATTR_VIRTUAL_FREE, name, precision, mem_info.mem_free  + mem_info.swap_free, m); 

   print_mem_load(LOAD_ATTR_MEM_TOTAL, name, precision, mem_info.mem_total, m); 
   print_mem_load(LOAD_ATTR_SWAP_TOTAL, name, precision, mem_info.swap_total, m); 
   print_mem_load(LOAD_ATTR_VIRTUAL_TOTAL, name, precision, mem_info.mem_total + mem_info.swap_total, m);

   print_mem_load(LOAD_ATTR_MEM_USED, name, precision, mem_info.mem_total - mem_info.mem_free, m); 
   print_mem_load(LOAD_ATTR_SWAP_USED, name, precision, mem_info.swap_total - mem_info.swap_free, m); 
   print_mem_load(LOAD_ATTR_VIRTUAL_USED, name, precision,(mem_info.mem_total + mem_info.swap_total) - 
                                          (mem_info.mem_free  + mem_info.swap_free), m); 
#  ifdef IRIX
   print_mem_load(LOAD_ATTR_SWAP_USED, name, precision, mem_info.swap_rsvd, m); 
#  endif
#endif /* SGE_LOADMEM */

#ifdef SGE_LOADCPU
   loads = sge_getcpuload(&total);
   sleep(1);
   loads = sge_getcpuload(&total);

   if (loads != -1) {
      print_mem_load("cpu", name,  1, total, "%");
   }
#endif /* SGE_LOADCPU */
#ifndef WINDOWS
   DEXIT;
#endif
#if defined(PLPA_LINUX) || defined(BINDING_SOLARIS)
   sge_dstring_free(&mcore);
   sge_dstring_free(&msocket);
   sge_dstring_free(&mtopology);
#endif
	return 0;
}
コード例 #12
0
ファイル: shadowd.c プロジェクト: StephenDennis/gridengine
/*----------------------------------------------------------------------------*/
int 
main(int argc, char **argv)
{
   int heartbeat        = 0;
   int last_heartbeat   = 0;
   int latest_heartbeat = 0;
   int ret              = 0;
   int delay            = 0;
   time_t now, last;
/*    const char *cp; */
   char err_str[MAX_STRING_SIZE];
   char shadowd_pidfile[SGE_PATH_MAX];
   dstring ds;
   char buffer[256];
   pid_t shadowd_pid;

#if 1

static int check_interval = CHECK_INTERVAL;
static int get_active_interval = GET_ACTIVE_INTERVAL;
static int delay_time = DELAY_TIME;
static int sge_test_heartbeat = 0;

char binpath[SGE_PATH_MAX];
char oldqmaster[SGE_PATH_MAX];

char shadow_err_file[SGE_PATH_MAX];
char qmaster_out_file[SGE_PATH_MAX];

#endif

   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "sge_shadowd");
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));
   /* initialize recovery control variables */
   {
      char *s;
      int val;
      if ((s=getenv("SGE_CHECK_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         check_interval = val;
      if ((s=getenv("SGE_GET_ACTIVE_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         get_active_interval = val;
      if ((s=getenv("SGE_DELAY_TIME")) &&
          sscanf(s, "%d", &val) == 1)
         delay_time = val;
      if ((s=getenv("SGE_TEST_HEARTBEAT_TIMEOUT")) &&
          sscanf(s, "%d", &val) == 1)
         sge_test_heartbeat = val;
   }
         
   /* This needs a better solution */
   umask(022);

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   log_state_set_log_file(TMP_ERR_FILE_SHADOWD);

   if (sge_setup2(&ctx, SHADOWD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* AA: TODO: change this */
   ctx->set_exit_func(ctx, shadowd_exit_func);
   sge_setup_sig_handlers(SHADOWD);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   if (ctx->get_qmaster_spool_dir(ctx) != NULL) {
      char *shadowd_name = SGE_SHADOWD;

      /* is there a running shadowd on this host (with unqualified name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_unqualified_hostname(ctx));

      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }

      ctx->prepare_enroll(ctx);

      /* is there a running shadowd on this host (with aliased name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_qualified_hostname(ctx));
      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }  
   } else {
      ctx->prepare_enroll(ctx);
   }

   if (parse_cmdline_shadowd(argc, argv) == 1) {
      SGE_EXIT((void**)&ctx, 0);
   }
   
   if (ctx->get_qmaster_spool_dir(ctx) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTREADQMASTERSPOOLDIRFROMX_S, ctx->get_bootstrap_file(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (chdir(ctx->get_qmaster_spool_dir(ctx))) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTCHANGETOQMASTERSPOOLDIRX_S, ctx->get_qmaster_spool_dir(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_set_admin_username(ctx->get_admin_user(ctx), err_str)) {
      CRITICAL((SGE_EVENT, SFNMAX, err_str));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_switch2admin_user()) {
      CRITICAL((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSWITCHTOADMIN_USER));
      SGE_EXIT((void**)&ctx, 1);
   }

   sprintf(shadow_err_file, "messages_shadowd.%s", ctx->get_unqualified_hostname(ctx));
   sprintf(qmaster_out_file, "messages_qmaster.%s", ctx->get_unqualified_hostname(ctx));
   sge_copy_append(TMP_ERR_FILE_SHADOWD, shadow_err_file, SGE_MODE_APPEND);
   unlink(TMP_ERR_FILE_SHADOWD);
   log_state_set_log_as_admin_user(1);
   log_state_set_log_file(shadow_err_file);

   {
      int* tmp_fd_array = NULL;
      unsigned long tmp_fd_count = 0;

      if (cl_com_set_handle_fds(cl_com_get_handle(prognames[SHADOWD] ,0), &tmp_fd_array, &tmp_fd_count) == CL_RETVAL_OK) {
         sge_daemonize(tmp_fd_array, tmp_fd_count, ctx);
         if (tmp_fd_array != NULL) {
            sge_free(&tmp_fd_array);
         }
      } else {
         sge_daemonize(NULL, 0, ctx);
      }
   }

   /* shadowd pid file will contain aliased name */
   sge_write_pid(shadowd_pidfile);

   starting_up();
   
   sge_setup_sig_handlers(SHADOWD);

   last_heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

   last = (time_t) sge_get_gmt(); /* set time of last check time */

   delay = 0;
   while (!shut_me_down) {
      sleep(check_interval);

      /* get current heartbeat file content */
      heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

      now = (time_t) sge_get_gmt();


      /* Only check when we could read the heartbeat file at least two times
       * (last_heartbeat and heartbeat) without error 
       */
      if (last_heartbeat > 0 && heartbeat > 0) {

         /*
          * OK we have to heartbeat entries to check. Check times ...
          * now  = current time
          * last = last check time
          */
         if ( (now - last) >= (get_active_interval + delay) ) {

            delay = 0;
            if (last_heartbeat == heartbeat) {
               DPRINTF(("heartbeat not changed since seconds: "sge_U32CFormat"\n", sge_u32c(now - last)));
               delay = delay_time; /* set delay time */

               /*
                * check if we are a possible new qmaster host (lock file of qmaster active, etc.)
                */
               ret = check_if_valid_shadow(binpath, oldqmaster, 
                                           ctx->get_act_qmaster_file(ctx), 
                                           ctx->get_shadow_master_file(ctx), 
                                           ctx->get_qualified_hostname(ctx), 
                                           ctx->get_binary_path(ctx));

               if (ret == 0) {
                  /* we can start a qmaster on this host */
                  if (qmaster_lock(QMASTER_LOCK_FILE)) {
                     ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_FAILEDTOLOCKQMASTERSOMBODYWASFASTER));
                  } else {
                     int out, err;

                     /* still the old qmaster name in act_qmaster file and still the old heartbeat */
                     latest_heartbeat = get_qmaster_heartbeat( QMASTER_HEARTBEAT_FILE, 30);
                     /* TODO: what do we when there is a timeout ??? */
                     DPRINTF(("old qmaster name in act_qmaster and old heartbeat\n"));
                     if (!compare_qmaster_names(ctx->get_act_qmaster_file(ctx), oldqmaster) &&
                         !shadowd_is_old_master_enrolled(sge_test_heartbeat, sge_get_qmaster_port(NULL), oldqmaster) && 
                         (latest_heartbeat == heartbeat)) {
                        char qmaster_name[256];

                        strcpy(qmaster_name, SGE_PREFIX);
                        strcat(qmaster_name, prognames[QMASTER]); 
                        DPRINTF(("qmaster_name: "SFN"\n", qmaster_name)); 

                        /*
                         * open logfile as admin user for initial qmaster/schedd 
                         * startup messages
                         */
                        out = SGE_OPEN3(qmaster_out_file, O_CREAT|O_WRONLY|O_APPEND, 
                                   0644);
                        err = out;
                        if (out == -1) {
                           /*
                            * First priority is the master restart
                            * => ignore this error
                            */
                           out = 1;
                           err = 2;
                        } 

                        sge_switch2start_user();
                        ret = startprog(out, err, NULL, binpath, qmaster_name, NULL);
                        sge_switch2admin_user();
                        if (ret) {
                           ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSTARTQMASTER));
                        }
                        close(out);
                     } else {
                        qmaster_unlock(QMASTER_LOCK_FILE);
                     }
                  }      
               } else {
                  if (ret == -1) {
                     /* just log the more important failures */    
                     WARNING((SGE_EVENT, MSG_SHADOWD_DELAYINGSHADOWFUNCFORXSECONDS_U, sge_u32c(delay) ));
                  }
               } 
            }
            /* Begin a new interval, set timers and hearbeat to current values */
            last = now;
            last_heartbeat = heartbeat;
         }
      } else {
         if (last_heartbeat < 0 || heartbeat < 0) {
            /* There was an error reading heartbeat or last_heartbeat */
            DPRINTF(("can't read heartbeat file. last_heartbeat="sge_U32CFormat", heartbeat="sge_U32CFormat"\n",
                     sge_u32c(last_heartbeat), sge_u32c(heartbeat)));
         } else {
            DPRINTF(("have to read the heartbeat file twice to check time differences\n"));
         }
      }
   }

   sge_shutdown((void**)&ctx, 0);

   DRETURN(EXIT_SUCCESS);
}
コード例 #13
0
ファイル: qalter.c プロジェクト: BlueBolt/BB_GridEngine
int main(int argc, char **argv) {
   int ret = STATUS_OK;
   lList *alp = NULL;
   lList *request_list = NULL;
   lList *cmdline = NULL;
   lListElem *aep;
   int all_jobs = 0;
   int all_users = 0;
   u_long32 gdi_cmd = SGE_GDI_MOD; 
   int tmp_ret;
   int me_who;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "qalter");

   prof_mt_init();

   /*
   ** get command name: qalter or qresub
   */
   if (!strcmp(sge_basename(argv[0], '/'), "qresub")) {
      DPRINTF(("QRESUB\n"));
      me_who = QRESUB;
   } else if (!strcmp(sge_basename(argv[0], '/'), "qhold")) {
      DPRINTF(("QHOLD\n"));
      me_who = QHOLD;
   } else if (!strcmp(sge_basename(argv[0], '/'), "qrls")) {
      DPRINTF(("QRLS\n"));
      me_who = QRLS;
   } else {
      DPRINTF(("QALTER\n"));
      me_who = QALTER;
   } 

   log_state_set_log_gui(1);
   sge_setup_sig_handlers(me_who);

   if (sge_gdi2_setup(&ctx, me_who, MAIN_THREAD, &alp) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /*
   ** begin to work
   */
   opt_list_append_opts_from_qalter_cmdline(me_who, &cmdline, &alp, argv + 1, environ);
   tmp_ret = answer_list_print_err_warn(&alp, MSG_QALTER, MSG_QALTER, MSG_QALTERWARNING);
   
   if (tmp_ret > 0) {
      SGE_EXIT((void**)&ctx, tmp_ret);
   }
   
   /* handling the case that no command line parameter was specified */
   if ((me_who == QHOLD || me_who == QRLS) && lGetNumberOfElem(cmdline) == 1) {
      /* -h option is set implicitly for QHOLD and QRLS */
      sge_usage(me_who, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      SGE_EXIT((void**)&ctx, 1);
   } else if ((me_who == QRESUB || me_who == QALTER) && lGetNumberOfElem(cmdline) == 0) {
      /* qresub and qalter have nothing set */ 
      sge_usage(me_who, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      SGE_EXIT((void**)&ctx, 1);
   } else if (opt_list_has_X(cmdline, "-help")) {
      /* -help was specified */
      sge_usage(me_who, stdout);
      SGE_EXIT((void**)&ctx, 0);
   }
   
   alp = qalter_parse_job_parameter(me_who, cmdline, &request_list, &all_jobs, 
                                    &all_users);

   DPRINTF(("all_jobs = %d, all_user = %d\n", all_jobs, all_users));

   if (request_list && verify) {
      /* 
         got a request list containing one element 
         for each job to be modified 
         save jobid all fields contain the same fields
         so we may use show_job() with the first job
         in our list 
         The jobid's in our request list get printed before
         show_job()
      */
      cull_show_job(lFirst(request_list), FLG_QALTER, false);
      sge_prof_cleanup();
      SGE_EXIT((void**)&ctx, 0);
   }

   tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
   if (tmp_ret > 0) {
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   if ((me_who == QALTER) ||
       (me_who == QHOLD) ||
       (me_who == QRLS) 
      ) {
      DPRINTF(("QALTER\n"));
      gdi_cmd = SGE_GDI_MOD;
   } else if (me_who == QRESUB){
      DPRINTF(("QRESUB\n"));
      gdi_cmd = SGE_GDI_COPY;
   } else {
      printf("unknown binary name.\n");
      SGE_EXIT((void**)&ctx, 1);
   }

   if (all_jobs)
      gdi_cmd |= SGE_GDI_ALL_JOBS;
   if (all_users)
      gdi_cmd |= SGE_GDI_ALL_USERS;

   alp = ctx->gdi(ctx, SGE_JB_LIST, gdi_cmd, &request_list, NULL, NULL); 
   for_each (aep, alp) {
      printf("%s\n", lGetString(aep, AN_text));
      if (ret == STATUS_OK) {
         ret = lGetUlong(aep, AN_status);
      }
   }
コード例 #14
0
ファイル: qrsub.c プロジェクト: HPCKP/gridengine
int main(int argc, char **argv) {
   lList *pcmdline = NULL;
   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *ar_lp = NULL;

   lListElem *ar = NULL;

   DENTER_MAIN(TOP_LAYER, "qrsub");

   /* Set up the program information name */
   sge_setup_sig_handlers(QRSUB);

   log_state_set_log_gui(1);

   if (sge_gdi2_setup(&ctx, QRSUB, MAIN_THREAD, &alp) != AE_OK) {
      answer_list_output(&alp);
      goto error_exit;
   }

   /*
   ** stage 1 of commandline parsing
   */
   {
      dstring file = DSTRING_INIT;
      const char *user = ctx->get_username(ctx);
      const char *cell_root = ctx->get_cell_root(ctx);

      /* arguments from SGE_ROOT/common/sge_ar_request file */
      get_root_file_path(&file, cell_root, SGE_COMMON_DEF_AR_REQ_FILE);
      if ((alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, 
         FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE)) == NULL) {
         /* arguments from $HOME/.sge_ar_request file */
         if (get_user_home_file_path(&file, SGE_HOME_DEF_AR_REQ_FILE, user, &alp)) {
            lFreeList(&alp);
            alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, 
            FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE);
         }
      }
      sge_dstring_free(&file); 

      if (alp) {
         answer_list_output(&alp);
         lFreeList(&pcmdline);
         goto error_exit;
      }
   }
   
   alp = cull_parse_cmdline(QRSUB, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS);

   if (answer_list_print_err_warn(&alp, NULL, "qrsub: ", MSG_WARNING) > 0) {
      lFreeList(&pcmdline);
      goto error_exit;
   }
   
   if (!pcmdline) {
      /* no command line option is present: print help to stderr */
      sge_usage(QRSUB, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      goto error_exit;
   }

   /*
   ** stage 2 of command line parsing
   */
   ar = lCreateElem(AR_Type);

   if (!sge_parse_qrsub(ctx, pcmdline, &alp, &ar)) {
      answer_list_output(&alp);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   ar_lp = lCreateList(NULL, AR_Type);
   lAppendElem(ar_lp, ar);

   alp = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_ADD | SGE_GDI_RETURN_NEW_VERSION, &ar_lp, NULL, NULL);
   lFreeList(&ar_lp);
   answer_list_on_error_print_or_exit(&alp, stdout);
   if (answer_list_has_error(&alp)) {
      sge_gdi2_shutdown((void**)&ctx);
      sge_prof_cleanup();
      if (answer_list_has_status(&alp, STATUS_NOTOK_DOAGAIN)) {
         DRETURN(25);
      } else {
         DRETURN(1);
      }
   }

   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   DRETURN(0);

error_exit:
   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   DRETURN(1);
}
コード例 #15
0
/****** qmaster/sge_qmaster_main/main() ****************************************
*  NAME
*     main() -- qmaster entry point 
*
*  SYNOPSIS
*     int main(int argc, char* argv[]) 
*
*  FUNCTION
*     Qmaster entry point.
*
*     NOTE: The main thread must block all signals before any additional thread
*     is created. Failure to do so will ruin signal handling!
*
*  INPUTS
*     int argc     - number of commandline arguments 
*     char* argv[] - commandline arguments 
*
*  RESULT
*     0 - success 
*
*  NOTES
*     We check whether 'SGE_ROOT' is set before we daemonize. Once qmaster is
*     a daemon, we are no longer connected to a terminal and hence can not
*     output an error message to stdout or stderr.
*
*     We need to inovke 'prepare_enroll()' *before* the user id is switched via
*     'become_admin_user()'. This is because qmaster must be able to bind a so
*     called reserved port (requires root privileges) if configured to do so.
*
*******************************************************************************/
int main(int argc, char* argv[])
{
   int max_enroll_tries;
   int ret_val;
   int file_descriptor_settings_result = 0;
   bool has_daemonized = false;
   sge_gdi_ctx_class_t *ctx = NULL;
   u_long32 start_time = sge_get_gmt();
   monitoring_t monitor;

   DENTER_MAIN(TOP_LAYER, "qmaster");

   sge_monitor_init(&monitor, "MAIN", NONE_EXT, MT_WARNING, MT_ERROR);
   prof_mt_init();

   sge_get_root_dir(true, NULL, 0, true);
   
#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   sge_init_language_func((gettext_func_type)gettext, (setlocale_func_type)setlocale, (bindtextdomain_func_type)bindtextdomain, (textdomain_func_type)textdomain);
   sge_init_language(NULL,NULL);   
#endif 

   /* 
    * qmaster doesn't support any commandline anymore,
    * but we should show version string and -help option 
    */
   if (argc != 1) {
      sigset_t sig_set;
      sigfillset(&sig_set);
      pthread_sigmask(SIG_SETMASK, &sig_set, NULL);
      sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, true);
      sge_process_qmaster_cmdline(argv);
      SGE_EXIT((void**)&ctx, 1);
   }

   /*
    * daemonize qmaster
    * set file descriptor limits
    * and initialize libraries to be used in multi threaded environment
    * also take care that finished child processed of this process become
    * zombie jobs
    */
   has_daemonized = sge_daemonize_qmaster();
   file_descriptor_settings_result = set_file_descriptor_limit();
#if !defined(INTERIX) && !defined(CYGWIN)
   init_sig_action_and_mask();
#endif

   /* init qmaster threads without becomming admin user */
   sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, false);

   ctx->set_daemonized(ctx, has_daemonized);

   /* this must be done as root user to be able to bind ports < 1024 */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[QMASTER],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;
      if (max_enroll_tries <= 0) {
         /* exit after 30 seconds */
         CRITICAL((SGE_EVENT, MSG_QMASTER_COMMUNICATION_ERRORS ));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[QMASTER],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
      }
   }

   /*
    * now the commlib up and running. Set qmaster application status function 
    * (commlib callback function for qping status information response 
    *  messages (SIRM))
    */
   ret_val = cl_com_set_status_func(sge_qmaster_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
   }

   /* 
    * now we become admin user change into the correct root directory set the
    * the target for logging messages
    */
   sge_become_admin_user(ctx->get_admin_user(ctx));
   sge_chdir_exit(ctx->get_qmaster_spool_dir(ctx), 1);
   log_state_set_log_file(ERR_FILE);
   ctx->set_exit_func(ctx, sge_exit_func);

#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /*
    * We do increment the heartbeat manually here. This is the 'startup heartbeat'. 
    * The first time the hearbeat will be incremented through the heartbeat event 
    * handler is after about HEARTBEAT_INTERVAL seconds. The hardbeat event handler
    * is setup during the initialisazion of the timer thread.
    */
   inc_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, HEARTBEAT_INTERVAL, NULL);
     
   /*
    * Event master module has to be initialized already here because
    * sge_setup_qmaster() might already access it although event delivery
    * thread is not running.
    *
    * Corresponding shutdown is done in sge_event_master_terminate();
    *
    * EB: In my opinion the init function should called in
    * sge_event_master_initialize(). Is it possible to move that call?
    */ 
   sge_event_master_init();

   sge_setup_qmaster(ctx, argv);

#ifndef USE_POLL
   if (file_descriptor_settings_result == 1) {
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_LARGER_THAN_LIMIT_U, sge_u32c(FD_SETSIZE)));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE1_U, sge_u32c(FD_SETSIZE - 20)));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE2));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE3));
   }
#endif

   /*
    * Setup all threads and initialize corresponding modules. 
    * Order is important!
    */
   sge_signaler_initialize(ctx);
   sge_event_master_initialize(ctx);
   sge_timer_initialize(ctx, &monitor);
   sge_worker_initialize(ctx);
#if 0
   sge_test_initialize(ctx);
#endif
   sge_listener_initialize(ctx);
   sge_scheduler_initialize(ctx, NULL);
#ifndef NO_JNI
   sge_jvm_initialize(ctx, NULL);
#endif

   INFO((SGE_EVENT, "qmaster startup took "sge_u32" seconds", sge_get_gmt() - start_time));

   /*
    * Block till signal from signal thread arrives us
    */
   sge_thread_wait_for_signal();

   /* 
    * Shutdown all threads and shutdown corresponding modules.
    * Order is important!
    */
#ifndef NO_JNI
   sge_jvm_terminate(ctx, NULL);
#endif
   sge_scheduler_terminate(ctx, NULL);
   sge_listener_terminate();
#if 0
   sge_test_terminate(ctx);
#endif
   sge_worker_terminate(ctx);
   sge_timer_terminate();
   sge_event_master_terminate();
   sge_signaler_terminate();

   /*
    * Remaining shutdown operations
    */
   sge_clean_lists();
   sge_monitor_free(&monitor);

   sge_shutdown((void**)&ctx, sge_qmaster_get_exit_state());
   sge_prof_cleanup();

   DEXIT;
   return 0;
} /* main() */
コード例 #16
0
ファイル: qevent.c プロジェクト: HPCKP/gridengine
int main(int argc, char *argv[])
{
   qevent_options enabled_options;
   dstring errors = DSTRING_INIT;
   int i, gdi_setup;
   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL; 
   sge_evc_class_t *evc = NULL;

   DENTER_MAIN(TOP_LAYER, "qevent");

/*    sge_mt_init(); */

   /* dump pid to file */
   qevent_dump_pid_file();

   /* parse command line */
   enabled_options.error_message = &errors;
   qevent_set_option_struct(&enabled_options);
   qevent_parse_command_line(argc, argv, &enabled_options);

   

   /* check if help option is set */
   if (enabled_options.help_option) {
      qevent_show_usage();
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   /* are there command line parsing errors ? */
   if (sge_dstring_get_string(enabled_options.error_message)) {
      ERROR((SGE_EVENT, "%s", sge_dstring_get_string(enabled_options.error_message) ));
      qevent_show_usage();
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }


   log_state_set_log_gui(1);
   sge_setup_sig_handlers(QEVENT);

   /* setup event client */
   gdi_setup = sge_gdi2_setup(&ctx, QEVENT, MAIN_THREAD, &alp);
   if (gdi_setup != AE_OK) {
      answer_list_output(&alp);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }
   /* TODO: how is the memory we allocate here released ???, SGE_EXIT doesn't */
   if (false == sge_gdi2_evc_setup(&evc, ctx, EV_ID_ANY, &alp, NULL)) {
      answer_list_output(&alp);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* ok, start over ... */
   /* check for testsuite option */
   
   if (enabled_options.testsuite_option) {
      /* only for testsuite */
      qevent_testsuite_mode(evc);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   /* check for subscribe option */
   if (enabled_options.subscribe_option) {
      /* only for testsuite */
      qevent_subscribe_mode(evc);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   if (enabled_options.trigger_option_count > 0) {
      lCondition *where =NULL;
      lEnumeration *what = NULL;

      sge_mirror_initialize(evc, EV_ID_ANY, "sge_mirror -trigger", true, 
                            NULL, NULL, NULL, NULL, NULL);
      evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_ACK);

      /* put out information about -trigger option */
      for (i=0;i<enabled_options.trigger_option_count;i++) {
         INFO((SGE_EVENT, "trigger script for %s events: %s\n",
                         qevent_get_event_name((enabled_options.trigger_option_events)[i]), 
                         (enabled_options.trigger_option_scripts)[i]));
         switch((enabled_options.trigger_option_events)[i]) {
            case QEVENT_JB_END:
                  
                  /* build mask for the job structure to contain only the needed elements */
                  where = NULL; 
                  what = lWhat("%T(%I %I %I %I %I %I %I %I)", JB_Type, JB_job_number, JB_ja_tasks, 
                                                              JB_ja_structure, JB_ja_n_h_ids, JB_ja_u_h_ids, 
                                                              JB_ja_s_h_ids,JB_ja_o_h_ids, JB_ja_template);
                  
                  /* register for job events */ 
                  sge_mirror_subscribe(evc, SGE_TYPE_JOB, analyze_jatask_event, NULL, NULL, where, what);
                  evc->ec_set_flush(evc, sgeE_JOB_DEL,true, 1);

                  /* the mirror interface registers more events, than we need,
                     thus we free the ones, we do not need */
                /*  evc->ec_unsubscribe(evc, sgeE_JOB_LIST); */
                  evc->ec_unsubscribe(evc, sgeE_JOB_MOD);
                  evc->ec_unsubscribe(evc, sgeE_JOB_MOD_SCHED_PRIORITY);
                  evc->ec_unsubscribe(evc, sgeE_JOB_USAGE);
                  evc->ec_unsubscribe(evc, sgeE_JOB_FINAL_USAGE);
               /*   evc->ec_unsubscribe(evc, sgeE_JOB_ADD); */

                  /* free the what and where mask */
                  lFreeWhere(&where);
                  lFreeWhat(&what);
               break;
            case QEVENT_JB_TASK_END:
            
                  /* build mask for the job structure to contain only the needed elements */
                  where = NULL; 
                  what = lWhat("%T(%I)", JAT_Type, JAT_status);
                  /* register for JAT events */ 
                  sge_mirror_subscribe(evc, SGE_TYPE_JATASK, analyze_jatask_event, NULL, NULL, where, what);
                  evc->ec_set_flush(evc, sgeE_JATASK_DEL,true, 1);
                  
                  /* the mirror interface registers more events, than we need,
                     thus we free the ones, we do not need */ 
                  evc->ec_unsubscribe(evc, sgeE_JATASK_ADD);
                  evc->ec_unsubscribe(evc, sgeE_JATASK_MOD);
                  /* free the what and where mask */
                  lFreeWhere(&where);
                  lFreeWhat(&what);
               break;
         }        
      }

      while(!shut_me_down) {
         sge_mirror_error error = sge_mirror_process_events(evc);
         if (error == SGE_EM_TIMEOUT && !shut_me_down ) {
            sleep(10);
            continue;
         }
      }

      sge_mirror_shutdown(evc);

      sge_dstring_free(enabled_options.error_message);
      sge_prof_cleanup();
      SGE_EXIT((void**)&ctx, 0);
      return 0;
   }


   ERROR((SGE_EVENT, "no option selected\n" ));
   qevent_show_usage();
   sge_dstring_free(enabled_options.error_message);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   return 1;
}
コード例 #17
0
ファイル: execd.c プロジェクト: BlueBolt/BB_GridEngine
/*-------------------------------------------------------------------------*/
int main(int argc, char **argv)
{
   int ret;
   int my_pid;
   int ret_val;
   int printed_points = 0;
   int max_enroll_tries;
   static char tmp_err_file_name[SGE_PATH_MAX];
   time_t next_prof_output = 0;
   int execd_exit_state = 0;
   lList **master_job_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *alp = NULL;

   DENTER_MAIN(TOP_LAYER, "execd");

#if defined(LINUX)
   gen_procList ();
#endif

   prof_mt_init();

   set_thread_name(pthread_self(),"Execd Thread");

   prof_set_level_name(SGE_PROF_CUSTOM1, "Execd Thread", NULL); 
   prof_set_level_name(SGE_PROF_CUSTOM2, "Execd Dispatch", NULL); 

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   /* This needs a better solution */
   umask(022);
      
   /* Initialize path for temporary logging until we chdir to spool */
   my_pid = getpid();
   sprintf(tmp_err_file_name,"%s."sge_U32CFormat"", TMP_ERR_FILE_EXECD, sge_u32c(my_pid));
   log_state_set_log_file(tmp_err_file_name);

   /* exit func for SGE_EXIT() */
   sge_sig_handler_in_main_loop = 0;
   sge_setup_sig_handlers(EXECD);

   if (sge_setup2(&ctx, EXECD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }
   ctx->set_exit_func(ctx, execd_exit_func);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /* prepare daemonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_prepare(ctx);
   }

   if ((ret=sge_occupy_first_three())>=0) {
      CRITICAL((SGE_EVENT, MSG_FILE_REDIRECTFD_I, ret));
      SGE_EXIT((void**)&ctx, 1);
   }

   lInit(nmv);

   /* unset XAUTHORITY if set */
   if (getenv("XAUTHORITY") != NULL) {
      sge_unsetenv("XAUTHORITY");
   }

   parse_cmdline_execd(argv);   
   
   /* exit if we can't get communication handle (bind port) */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[EXECD],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;

      if (max_enroll_tries <= 0 || shut_me_down) {
         /* exit after 30 seconds */
         if (printed_points != 0) {
            printf("\n");
         }
         CRITICAL((SGE_EVENT, MSG_COM_ERROR));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[EXECD],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
        if (max_enroll_tries < 27) {
           printf(".");
           printed_points++;
           fflush(stdout);
        }
      }
   }

   if (printed_points != 0) {
      printf("\n");
   }

   /*
    * now the commlib up and running. Set execd application status function 
    * ( commlib callback function for qping status information response 
    *   messages (SIRM) )
    */
   ret_val = cl_com_set_status_func(sge_execd_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)) );
   }

   /* test connection */
   {
      cl_com_SIRM_t* status = NULL;
      ret_val = cl_commlib_get_endpoint_status(ctx->get_com_handle(ctx),
                                               (char *)ctx->get_master(ctx, true),
                                               (char*)prognames[QMASTER], 1, &status);
      if (ret_val != CL_RETVAL_OK) {
         ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
         ERROR((SGE_EVENT, MSG_CONF_NOCONFBG));
      }
      cl_com_free_sirm_message(&status);
   }
   
   /* finalize daeamonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_finalize(ctx);
   }

   /* daemonizes if qmaster is unreachable */   
   sge_setup_sge_execd(ctx, tmp_err_file_name);

   /* are we using qidle or not */
   sge_ls_qidle(mconf_get_use_qidle());
   sge_ls_gnu_ls(1);
   
   DPRINTF(("use_qidle: %d\n", mconf_get_use_qidle()));

   /* test load sensor (internal or external) */
   {
      lList *report_list = sge_build_load_report(ctx->get_qualified_hostname(ctx), ctx->get_binary_path(ctx));
      lFreeList(&report_list);
   }
   
   /* here we have to wait for qmaster registration */
   while (sge_execd_register_at_qmaster(ctx, false) != 0) {
      if (sge_get_com_error_flag(EXECD, SGE_COM_ACCESS_DENIED, true)) {
         /* This is no error */
         DPRINTF(("*****  got SGE_COM_ACCESS_DENIED from qmaster  *****\n"));
      }
      if (sge_get_com_error_flag(EXECD, SGE_COM_ENDPOINT_NOT_UNIQUE, false)) {
         execd_exit_state = SGE_COM_ENDPOINT_NOT_UNIQUE;
         break;
      }
      if (shut_me_down != 0) {
         break;
      }
      sleep(30);
   }

   /* 
    * Terminate on SIGTERM or hard communication error
    */
   if (execd_exit_state != 0 || shut_me_down != 0) {
      sge_shutdown((void**)&ctx, execd_exit_state);
      DRETURN(execd_exit_state);
   }

   /*
    * We write pid file when we are connected to qmaster. Otherwise an old
    * execd might overwrite our pidfile.
    */
   sge_write_pid(EXECD_PID_FILE);

   /*
    * At this point we are sure we are the only sge_execd and we are connected
    * to the current qmaster. First we have to report any reaped children
    * that might exist.
    */
   starting_up();

   /*
    * Log a warning message if execd hasn't been started by a superuser
    */
   if (!sge_is_start_user_superuser()) {
      WARNING((SGE_EVENT, MSG_SWITCH_USER_NOT_ROOT));
   }   

#ifdef COMPILE_DC
   if (ptf_init()) {
      CRITICAL((SGE_EVENT, MSG_EXECD_NOSTARTPTF));
      SGE_EXIT((void**)&ctx, 1);
   }
   INFO((SGE_EVENT, MSG_EXECD_STARTPDCANDPTF));
#endif

   master_job_list = object_type_get_master_list(SGE_TYPE_JOB);
   *master_job_list = lCreateList("Master_Job_List", JB_Type);
   job_list_read_from_disk(master_job_list, "Master_Job_List",
                           0, SPOOL_WITHIN_EXECD, 
                          job_initialize_job);
   

   /* clean up jobs hanging around (look in active_dir) */
   clean_up_old_jobs(ctx, 1);
   execd_trash_load_report();
   sge_set_flush_lr_flag(true);

   sge_sig_handler_in_main_loop = 1;

   if (thread_prof_active_by_id(pthread_self())) {
      prof_start(SGE_PROF_CUSTOM1, NULL);
      prof_start(SGE_PROF_CUSTOM2, NULL);
      prof_start(SGE_PROF_GDI_REQUEST, NULL);
   } else {
      prof_stop(SGE_PROF_CUSTOM1, NULL);
      prof_stop(SGE_PROF_CUSTOM2, NULL);
      prof_stop(SGE_PROF_GDI_REQUEST, NULL);
   }

   PROF_START_MEASUREMENT(SGE_PROF_CUSTOM1);

   /* Start dispatching */
   execd_exit_state = sge_execd_process_messages(ctx);


   /*
    * This code is only reached when dispatcher terminates and execd goes down.
    */

   /* log if we received SIGPIPE signal */
   if (sge_sig_handler_sigpipe_received) {
       sge_sig_handler_sigpipe_received = 0;
       INFO((SGE_EVENT, "SIGPIPE received\n"));
   }

#if defined(LINUX)
   free_procList();
#endif
   lFreeList(master_job_list);

   PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM1);
   if (prof_is_active(SGE_PROF_ALL)) {
     time_t now = (time_t)sge_get_gmt();

      if (now > next_prof_output) {
         prof_output_info(SGE_PROF_ALL, false, "profiling summary:\n");
         prof_reset(SGE_PROF_ALL,NULL);
         next_prof_output = now + 60;
      }
   }
   sge_prof_cleanup();

   sge_shutdown((void**)&ctx, execd_exit_state);
   DRETURN(execd_exit_state);
}