Example #1
0
/****** uti/prog/sge_get_alias_path() *****************************************
*  NAME
*     sge_get_alias_path() -- Return the path of the 'alias_file' 
*
*  SYNOPSIS
*     const char* sge_get_alias_path(void) 
*
*  FUNCTION
*     Return the path of the 'alias_file' 
*
*  NOTES
*     MT-NOTE: sge_get_alias_path() is MT safe
*
******************************************************************************/
const char *sge_get_alias_path(void) 
{
   const char *sge_root, *sge_cell;
   char *cp;
   int len;
   SGE_STRUCT_STAT sbuf;

   DENTER_(TOP_LAYER, "sge_get_alias_path");

   sge_root = sge_get_root_dir(1, NULL, 0, 1);
   sge_cell = sge_get_default_cell();

   if (SGE_STAT(sge_root, &sbuf)) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_SGEROOTNOTFOUND_S , sge_root));
      SGE_EXIT(NULL, 1);
   }

   len = strlen(sge_root) + strlen(sge_cell) + strlen(COMMON_DIR) + strlen(ALIAS_FILE) + 5;
   if (!(cp = malloc(len))) {
      CRITICAL((SGE_EVENT, MSG_MEMORY_MALLOCFAILEDFORPATHTOHOSTALIASFILE ));
      SGE_EXIT(NULL, 1);
   }

   sprintf(cp, "%s/%s/%s/%s", sge_root, sge_cell, COMMON_DIR, ALIAS_FILE);
   DRETURN_(cp);
}
Example #2
0
/****** qmaster/setup_qmaster/sge_setup_qmaster() ******************************
*  NAME
*     sge_setup_qmaster() -- setup qmaster
*
*  SYNOPSIS
*     int sge_setup_qmaster(char* anArgv[])
*
*  FUNCTION
*     Process commandline arguments. Remove qmaster lock file. Write qmaster
*     host to the 'act_qmaster' file. Initialize qmaster and reporting. Write
*     qmaster PID file.
*
*     NOTE: Before this function is invoked, qmaster must become admin user.
*
*  INPUTS
*     char* anArgv[] - commandline argument vector
*
*  RESULT
*     0 - success
*
*  NOTES
*     MT-NOTE: sge_setup_qmaster() is NOT MT safe!
*     MT-NOTE:
*     MT-NOTE: This function must be called exclusively, with the qmaster main
*     MT-NOTE: thread being the *only* active thread. In other words, do not
*     MT-NOTE: invoke this function after any additional thread (directly or
*     MT-NOTE: indirectly) has been created.
*
*     Do *not* write the qmaster pid file, before 'qmaster_init()' did return
*     successfully. Otherwise, if we do have a running qmaster and a second
*     qmaster is started (illegally) on the same host, the second qmaster will
*     overwrite the pid of the qmaster started first. The second qmaster will
*     detect it's insubordinate doing and terminate itself, thus leaving behind
*     a useless pid.
*
*******************************************************************************/
int sge_setup_qmaster(sge_gdi_ctx_class_t *ctx, char* anArgv[])
{
    char err_str[1024];
    const char *qualified_hostname = ctx->get_qualified_hostname(ctx);
    const char *act_qmaster_file = ctx->get_act_qmaster_file(ctx);

    DENTER(TOP_LAYER, "sge_setup_qmaster");

    umask(022); /* this needs a better solution */

    process_cmdline(anArgv);

    INFO((SGE_EVENT, SFNMAX, MSG_STARTUP_BEGINWITHSTARTUP));

    qmaster_unlock(QMASTER_LOCK_FILE);

    if (write_qm_name(qualified_hostname, act_qmaster_file, err_str)) {
        ERROR((SGE_EVENT, "%s\n", err_str));
        SGE_EXIT(NULL, 1);
    }

    qmaster_init(ctx, anArgv);

    sge_write_pid(QMASTER_PID_FILE);

    DEXIT;
    return 0;
} /* sge_setup_qmaster() */
Example #3
0
/****** qmaster/setup_qmaster/sge_qmaster_thread_init() ************************
*  NAME
*     sge_qmaster_thread_init() -- Initialize a qmaster thread.
*
*  SYNOPSIS
*     int sge_qmaster_thread_init(bool switch_to_admin_user)
*
*  FUNCTION
*     Subsume functions which need to be called immediately after thread
*     startup. This function does make sure that the thread local data
*     structures do contain reasonable values.
*
*  INPUTS
*     bool switch_to_admin_user - become admin user if set to true
*
*  RESULT
*     0 - success
*
*  NOTES
*     MT-NOTE: sge_qmaster_thread_init() is MT safe
*     MT-NOTE:
*     MT-NOTE: sge_qmaster_thread_init() should be invoked at the beginning
*     MT-NOTE: of a thread function.
*
*******************************************************************************/
int
sge_qmaster_thread_init(sge_gdi_ctx_class_t **ctx_ref, u_long32 prog_id,
                        u_long32 thread_id, bool switch_to_admin_user)
{
    const char *admin_user = NULL;
    lList *alp = NULL;
    sge_gdi_ctx_class_t *ctx = NULL;

    DENTER(TOP_LAYER, "sge_qmaster_thread_init");

    lInit(nmv);

    if (sge_setup2(ctx_ref, prog_id, thread_id, &alp, true) != AE_OK) {
        answer_list_output(&alp);
        SGE_EXIT((void**)ctx_ref, 1);
    }
    ctx = *ctx_ref;
    ctx->reresolve_qualified_hostname(ctx);
    DEBUG((SGE_EVENT,"%s: qualified hostname \"%s\"\n", SGE_FUNC, ctx->get_qualified_hostname(ctx)));
    admin_user = ctx->get_admin_user(ctx);

    if (switch_to_admin_user == true) {
        char str[MAX_STRING_SIZE];
        if (sge_set_admin_username(admin_user, str) == -1) {
            CRITICAL((SGE_EVENT, SFNMAX, str));
            SGE_EXIT((void**)ctx_ref, 1);
        }

        if (sge_switch2admin_user()) {
            CRITICAL((SGE_EVENT, SFNMAX, MSG_ERROR_CANTSWITCHTOADMINUSER));
            SGE_EXIT((void**)ctx_ref, 1);
        }
    }

    DEXIT;
    return 0;
} /* sge_qmaster_thread_init() */
Example #4
0
/****** uti/prog/sge_get_root_dir() *******************************************
*  NAME
*     sge_get_root_dir() -- SGE/SGEEE installation directory 
*
*  SYNOPSIS
*     const char* sge_get_root_dir(int do_exit, 
*                                  char *buffer, 
*                                  size_t size,
*                                  int do_error_log ) 
*
*  FUNCTION
*     This function returns the installation directory of SGE/SGEEE.
*     This directory is defined by the SGE_ROOT environment variable 
*     of the calling process. 
*     If the environment variable does not exist or is not set then
*     this function will handle this as error and return NULL 
*     (do_exit = 0). If 'do_exit' is 1 and an error occures, the 
*     function will terminate the  calling application.
*
*  INPUTS
*     int do_exit - Terminate the application in case of an error
*     char *buffer - buffer to be used for error message
*     size_t size - size of buffer
*     int do_error_log - enable/disable error logging
*
*  RESULT
*     const char* - Root directory of the SGE/SGEEE installation
*
*  NOTES
*     MT-NOTE: sge_get_arch() is MT safe
*******************************************************************************/
const char *sge_get_root_dir(int do_exit, char *buffer, size_t size, int do_error_log)
{
   char *sge_root; 
   char *s;

   DENTER_(TOP_LAYER, "sge_get_root_dir");

   /*
    * Read some env variables
    */
   sge_root = getenv("SGE_ROOT");

   /*
    * Check the env variables
    */
   if (sge_root) {
      s = sge_root;
   } else {
      goto error;
   } 
   if (!s || strlen(s)==0) { 
      goto error;
   } else {
      /*
       * Get rid of trailing slash
       */ 
      if (s[strlen(s)-1] == '/') { 
         s[strlen(s)-1] = '\0';
      }
   }
   DRETURN_(s);

error:
   if (do_error_log) {
      if (buffer != NULL) {
         sge_strlcpy(buffer, MSG_SGEROOTNOTSET, size);
      } else {
         CRITICAL((SGE_EVENT, MSG_SGEROOTNOTSET));
      }
   }

   DEXIT_;
   if (do_exit) {
      SGE_EXIT(NULL, 1);   
   }
   return NULL;
}
Example #5
0
/*---------------------------------------------------------------------
 * parse_cmdline_execd
 *---------------------------------------------------------------------*/
static void parse_cmdline_execd(char **argv)
{
   lList *ref_list = NULL, *alp = NULL, *pcmdline = NULL;
   lListElem *aep;
   u_long32 help = 0;

   DENTER(TOP_LAYER, "parse_cmdline_execd");
            
   alp = sge_parse_cmdline_execd(argv+1, &pcmdline);
   if(alp) {
      /* 
      ** high level parsing error! show answer list
      */
      for_each(aep, alp) {
         fprintf(stderr, "%s", lGetString(aep, AN_text));
      }
      lFreeList(&alp);
      lFreeList(&pcmdline);
      /* TODO: replace with alpp and DRETURN */
      SGE_EXIT(NULL, 1);
   }
Example #6
0
/****** qmaster/setup_qmaster/process_cmdline() ********************************
*  NAME
*     process_cmdline() -- Handle command line arguments
*
*  SYNOPSIS
*     static void process_cmdline(char **anArgv)
*
*  FUNCTION
*     Handle command line arguments. Parse argument vector and handle options.
*
*  INPUTS
*     char **anArgv - pointer to agrument vector
*
*  RESULT
*     void - none
*
*  NOTES
*     MT-NOTE: process_cmdline() is NOT MT safe.
*
*******************************************************************************/
static void process_cmdline(char **anArgv)
{
    lList *alp, *pcmdline;
    lListElem *aep;
    u_long32 help = 0;

    DENTER(TOP_LAYER, "process_cmdline");

    alp = pcmdline = NULL;

    alp = parse_cmdline_qmaster(&anArgv[1], &pcmdline);
    if(alp) {
        /*
        ** high level parsing error! show answer list
        */
        for_each(aep, alp) {
            fprintf(stderr, "%s", lGetString(aep, AN_text));
        }
        lFreeList(&alp);
        lFreeList(&pcmdline);
        SGE_EXIT(NULL, 1);
    }
Example #7
0
/****** uti/stdio/sge_peopen() ************************************************
*  NAME
*     sge_peopen_r() -- Advanced popen()
*
*  SYNOPSIS
*     pid_t sge_peopen_r(const char *shell, int login_shell,
*                        const char *command, const char *user,
*                        char **env, FILE **fp_in, FILE **fp_out,
*                        FILE **fp_err)
*
*  FUNCTION
*     Advanced popen() with additional parameters:
*        - free shell usage
*        - login shell if wanted
*        - user under which to start (for root only)
*        - stdin and stderr file pointers
*        - wait for exactly the process we started
*     File descriptors have to be closed with sge_peclose().
*
*     This function is reentrant as long as env is not provided to
*     this function. This means that the function can be used in 
*     multi thread processed as long as env is not used. 
*
*  INPUTS
*     const char *shell   - which shell to use
*     int login_shell     - make it a login shell?
*     const char *command - name of the program
*     const char *user    - user under which to start (for root only)
*     char **env          - env variables to add to child
*     FILE **fp_in        - file input stream
*     FILE **fp_out       - file output stream
*     FILE **fp_err       - file error stream
*
*  RESULT
*     pid_t - process id
*
*  NOTES
*     MT-NOTE: sge_peopen() is MT safe 
*
*     DO NOT ADD ASYNC SIGNAL UNSAFE FUNCTIONS BETWEEN FORK AND EXEC
*     DUE TO THE FACT THAT THIS FUNCTION WILL BE USED IN QMASTER
*     (MULTITHREADED ENVIRONMENT) THIS MIGHT CAUSE A DEADLOCK 
*     IN A MASTER THREAD. 
*
*  SEE ALSO
*     uti/stdio/sge_peclose()
******************************************************************************/ 
pid_t sge_peopen_r(const char *shell, int login_shell, const char *command,
                   const char *user, char **env,  FILE **fp_in, FILE **fp_out,
                   FILE **fp_err, bool null_stderr)
{
   pid_t pid;
   int pipefds[3][2];
   int i;
   char arg0[256];
#if defined(SOLARIS)
   char err_str[256];
#endif
   struct passwd *pw = NULL;
   uid_t myuid;
   uid_t tuid;
 
   DENTER(TOP_LAYER, "sge_peopen_r");

   if (sge_has_admin_user()) {
      sge_switch2start_user();
   }
   myuid = geteuid();
   tuid = myuid;

   /* 
    * open pipes - close on failure 
    */
   for (i = 0; i < 3; i++) {
      if (pipe(pipefds[i]) != 0) {
         while (--i >= 0) {
            close(pipefds[i][0]);
            close(pipefds[i][1]);
         }
         ERROR((SGE_EVENT, MSG_SYSTEM_FAILOPENPIPES_SS, command, strerror(errno)));
         if (sge_has_admin_user()) {
            sge_switch2admin_user();
         }
         DRETURN(-1);
      }
   }

   /*
    * set arg0 for exec call correctly to that
    * either a normal shell or a login shell will be started
    */
   if (login_shell) {
      strcpy(arg0, "-");
   } else {
      strcpy(arg0, "");
   }
   strcat(arg0, shell);
   DPRINTF(("arg0 = %s\n", arg0));
   DPRINTF(("arg1 = -c\n"));
   DPRINTF(("arg2 = %s\n", command));


   /*
    * prepare the change of the user which might be done after fork()
    * if a user name is provided.
    *
    * this has to be done before the fork() afterwards it might cause
    * a deadlock of the child because getpwnam() is not async-thread safe.
    */
   if (user) {
      struct passwd pw_struct;
      int size = get_pw_buffer_size();
      char *buffer = sge_malloc(size);

      /*
       * get information about the target user
       */
      if (buffer != NULL) {
         pw = sge_getpwnam_r(user, &pw_struct, buffer, size);
         if (pw == NULL) {
            ERROR((SGE_EVENT, MSG_SYSTEM_NOUSERFOUND_SS, user, strerror(errno)));
            FREE(buffer);
            if (sge_has_admin_user()) {
               sge_switch2admin_user();
            }
            DRETURN(-1);
         }
      } else {
         ERROR((SGE_EVENT, MSG_UTI_MEMPWNAM));
         FREE(buffer);
         if (sge_has_admin_user()) {
            sge_switch2admin_user();
         }
         DRETURN(-1);
      }

      DPRINTF(("was able to resolve user\n"));

      /* 
       * only prepare change of user if target user is different from current one
       */
      if (myuid != pw->pw_uid) {
#if !(defined(WIN32) || defined(INTERIX)) /* var not needed */
         int res;
#endif 

         if (myuid != SGE_SUPERUSER_UID) {
            DPRINTF(("only root is allowed to switch to a different user\n"));
            ERROR((SGE_EVENT, MSG_SYSTEM_NOROOTRIGHTSTOSWITCHUSER));
            FREE(buffer);
            DRETURN(-2);
         }                             

         DPRINTF(("Before initgroups\n"));

#if !(defined(WIN32) || defined(INTERIX))  /* initgroups not called */
         res = initgroups(pw->pw_name, pw->pw_gid);
#  if defined(SVR3) || defined(sun)
         if (res < 0)
#  else
         if (res)
#  endif
         {
            ERROR((SGE_EVENT, MSG_SYSTEM_INITGROUPSFORUSERFAILED_ISS, res, user, strerror(errno)));
            FREE(buffer);
            SGE_EXIT(NULL, 1);
         }

         DPRINTF(("Initgroups was successful\n"));

#endif /* WIN32 */
      }
      DPRINTF(("user = %s\n", user));
      DPRINTF(("myuid = %d\n", (int)myuid));
      if (pw != NULL) {
         tuid = pw->pw_uid;
         DPRINTF(("target uid = %d\n", (int)tuid));
      }

      FREE(buffer);
   }

   DPRINTF(("Now process will fork\n"));

#if defined(SOLARIS)
   pid = sge_smf_contract_fork(err_str, 256);
#else
   pid = fork();
#endif

   /* 
    * in the child pid is 0 
    */
   if (pid == 0) {  
      /*
       * close all fd's except that ones mentioned in keep_open 
       */
      int keep_open[6];
      keep_open[0] = 0;
      keep_open[1] = 1;
      keep_open[2] = 2;
      keep_open[3] = pipefds[0][0];
      keep_open[4] = pipefds[1][1];
      keep_open[5] = pipefds[2][1];
      sge_close_all_fds(keep_open, 6);

      /* 
       * shall we redirect stderr to /dev/null? Then
       *    - open "/dev/null"
       *    - set stderr to "dev/null"
       *    - close the stderr-pipe
       * otherwise
       *    - redirect stderr to the pipe
       */
      if (null_stderr) {
         int fd = open("/dev/null", O_WRONLY);

         if (fd != -1) {
            close(2);
            dup(fd);
            close(pipefds[2][1]);
         } else {
            SGE_EXIT(NULL, 1);
         }
      } else {
         close(2);
         dup(pipefds[2][1]);
      }

      /* 
       * redirect stdin and stdout to the pipes 
       */
      close(0);
      close(1);
      dup(pipefds[0][0]);
      dup(pipefds[1][1]);

      if (pw != NULL) {
         int lret = setuid(tuid);
         if (lret) {
            SGE_EXIT(NULL, 1);
         }
      }

      /*
       * set the environment if we got one as argument
       */
      if (env != NULL) {
         if (pw != NULL) {
            addenv("HOME", pw->pw_dir);
            addenv("SHELL", pw->pw_shell);
            addenv("USER", pw->pw_name);
            addenv("LOGNAME", pw->pw_name);
         }
         addenv("PATH", SGE_DEFAULT_PATH);
         for(; *env; env++) {
            putenv(*env);
         }
      }
      execlp(shell, arg0, "-c", command, NULL);
   }
 
   if (pid < 0) {
      for (i=0; i<3; i++) {
         close(pipefds[i][0]);
         close(pipefds[i][1]);
      }
#if defined(SOLARIS)
      if (pid < -1 && err_str) {
          ERROR((SGE_EVENT, MSG_SMF_FORK_FAILED_SS, "sge_peopen()", err_str));
      }
#endif
      if (sge_has_admin_user()) {
         sge_switch2admin_user();
      }
      DRETURN(-1);
   }
 
   /* close the childs ends of the pipes */
   close(pipefds[0][0]);
   close(pipefds[1][1]);
   close(pipefds[2][1]);
  
   /* return filehandles for stdin and stdout */
   *fp_in  = fdopen(pipefds[0][1], "a");
   *fp_out = fdopen(pipefds[1][0], "r");

   /* is stderr redirected to /dev/null? */
   if (null_stderr) {
      /* close the pipe and return NULL as filehandle */
      close(pipefds[2][0]);
      *fp_err = NULL;
   } else {
      /* return filehandle for stderr */
      *fp_err = fdopen(pipefds[2][0], "r");
   }

   if (sge_has_admin_user()) {
      sge_switch2admin_user();
   }
   DRETURN(pid);
}
Example #8
0
bool sge_parse_qrsub(sge_gdi_ctx_class_t *ctx, lList *pcmdline, lList **alpp, lListElem **ar)
{
   lListElem *ep = NULL, *next_ep = NULL;
   lList *lp = NULL;
   DENTER(TOP_LAYER, "sge_parse_qrsub");

   /*  -help 	 print this help */
   if ((ep = lGetElemStr(pcmdline, SPA_switch, "-help"))) {
      lRemoveElem(pcmdline, &ep);
      sge_usage(QRSUB, stdout);
      DEXIT;
      SGE_EXIT((void **)&ctx, 0);
   }

   /*  -a date_time 	 start time in [[CC]YY]MMDDhhmm[.SS] SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-a"))) {
      lSetUlong(*ar, AR_start_time, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*  -e date_time 	 end time in [[CC]YY]MMDDhhmm[.SS] SGE_ULONG*/
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-e"))) {
      lSetUlong(*ar, AR_end_time, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*  -d time 	 duration in TIME format SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-d"))) {
      lSetUlong(*ar, AR_duration, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }
   
   /*  -w e/v 	 validate availability of AR request, default e SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-w"))) {
      lSetUlong(*ar, AR_verify, lGetInt(ep, SPA_argval_lIntT));
      lRemoveElem(pcmdline, &ep);
   }
  
   /*  -N name 	 AR name SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-N"))) {
      lSetString(*ar, AR_name, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
      
   /*  -A account_string 	 AR name in accounting record SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-A"))) {
      lSetString(*ar, AR_account, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
     
   /*  -l resource_list 	 request the given resources  SGE_LIST */
   parse_list_simple(pcmdline, "-l", *ar, AR_resource_list, 0, 0, FLG_LIST_APPEND);
   centry_list_remove_duplicates(lGetList(*ar, AR_resource_list));

   /*  -u wc_user 	       access list SGE_LIST */
   /*  -u ! wc_user TBD: Think about eval_expression support in compare allowed and excluded lists */
   parse_list_simple(pcmdline, "-u", *ar, AR_acl_list, ARA_name, 0, FLG_LIST_MERGE);
   /*  -u ! list separation */
   lp = lGetList(*ar,  AR_acl_list);
   next_ep = lFirst(lp);
   while ((ep = next_ep)) {
      bool is_xacl = false;
      const char *name = lGetString(ep, ARA_name);

      next_ep = lNext(ep);
      if (name[0] == '!') { /* move this element to xacl_list */
         is_xacl = true;
         name++;
      }

      if (!is_hgroup_name(name)) {
         struct passwd *pw;
         struct passwd pw_struct;
         char *buffer;
         int size;
         stringT group;

         size = get_pw_buffer_size();
         buffer = sge_malloc(size);
         pw = sge_getpwnam_r(name, &pw_struct, buffer, size);
         
         if (pw == NULL) {
           answer_list_add_sprintf(alpp, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_USER_XISNOKNOWNUSER_S, name);
           FREE(buffer);
           DRETURN(false);
         }
         sge_gid2group(pw->pw_gid, group, MAX_STRING_SIZE, MAX_NIS_RETRIES);
         lSetString(ep, ARA_group, group);
         FREE(buffer);
      }

      if (is_xacl) {
         lListElem *new_ep = lAddSubStr(*ar, ARA_name, name, AR_xacl_list, ARA_Type);
         lSetString(new_ep, ARA_group, lGetString(ep, ARA_group));
         lRemoveElem(lp, &ep);
      }

   }

   /*  -q wc_queue_list 	 reserve in queue(s) SGE_LIST */
   parse_list_simple(pcmdline, "-q", *ar, AR_queue_list, 0, 0, FLG_LIST_APPEND);

  /*    -pe pe_name slot_range reserve slot range for parallel jobs */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-pe"))) {
      lSetString(*ar, AR_pe, lGetString(ep, SPA_argval_lStringT)); /* SGE_STRING, */
      lSwapList(*ar, AR_pe_range, ep, SPA_argval_lListT);       /* SGE_LIST */
      lRemoveElem(pcmdline, &ep);
   }
   /*   AR_master_queue_list  -masterq wc_queue_list, SGE_LIST bind master task to queue(s) */
   parse_list_simple(pcmdline, "-masterq", *ar, AR_master_queue_list, 0, 0, FLG_LIST_APPEND);

   /*  -ckpt ckpt-name 	 reserve in queue with ckpt method SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-ckpt"))) {
      lSetString(*ar, AR_checkpoint_name, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
   
   /*  -m b/e/a/n 	 define mail notification events SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-m"))) {
      u_long32 ul;
      u_long32 old_mail_opts;

      ul = lGetInt(ep, SPA_argval_lIntT);
      if  ((ul & NO_MAIL)) {
         lSetUlong(*ar, AR_mail_options, 0);
      } else {
         old_mail_opts = lGetUlong(*ar, AR_mail_options);
         lSetUlong(*ar, AR_mail_options, ul | old_mail_opts);
      }
      lRemoveElem(pcmdline, &ep);
   }

   /*   -M user[@host],... 	 notify these e-mail addresses SGE_LIST*/
   parse_list_simple(pcmdline, "-M", *ar, AR_mail_list, MR_host, MR_user, FLG_LIST_MERGE);

   /*  -he yes/no 	 hard error handling SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-he"))) {
      lSetUlong(*ar, AR_error_handling, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*   -now 	 reserve in queues with qtype interactive  SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-now"))) {
      u_long32 ar_now = lGetUlong(*ar, AR_type);
      if(lGetInt(ep, SPA_argval_lIntT)) {
         JOB_TYPE_SET_IMMEDIATE(ar_now);
      } else {
         JOB_TYPE_CLEAR_IMMEDIATE(ar_now);
      }

      lSetUlong(*ar, AR_type, ar_now);

      lRemoveElem(pcmdline, &ep);
   }

  /* Remove the script elements. They are not stored in the ar structure */
  if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPT))) {
      lRemoveElem(pcmdline, &ep);
   }

   if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPTLEN))) {
      lRemoveElem(pcmdline, &ep);
   }

   if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPTPTR))) {
      lRemoveElem(pcmdline, &ep);
   }

   ep = lFirst(pcmdline);   
   if(ep) {
      const char *option = lGetString(ep,SPA_switch);
      /* as jobarg are stored no switch values, need to be filtered */ 
      if(sge_strnullcmp(option, "jobarg") != 0) {
         answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
                              MSG_PARSE_INVALIDOPTIONARGUMENTX_S,
                              lGetString(ep,SPA_switch)); 
      } else {
         answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
                              MSG_PARSE_INVALIDOPTIONARGUMENT);
       }
      DRETURN(false);
   }

   if (lGetUlong(*ar, AR_start_time) == 0 && lGetUlong(*ar, AR_end_time) != 0 && lGetUlong(*ar, AR_duration) != 0) {
      lSetUlong(*ar, AR_start_time, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_duration));
   } else if (lGetUlong(*ar, AR_start_time) != 0 && lGetUlong(*ar, AR_end_time) == 0 && lGetUlong(*ar, AR_duration) != 0) {
      lSetUlong(*ar, AR_end_time, duration_add_offset(lGetUlong(*ar, AR_start_time), lGetUlong(*ar, AR_duration)));
      lSetUlong(*ar, AR_duration, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_start_time));
   } else if (lGetUlong(*ar, AR_start_time) != 0 && lGetUlong(*ar, AR_end_time) != 0 && lGetUlong(*ar, AR_duration) == 0) {
      lSetUlong(*ar, AR_duration, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_start_time));
   }

   DRETURN(true);
}
Example #9
0
static bool 
sge_parse_qrstat(sge_gdi_ctx_class_t *ctx, lList **answer_list,
                 qrstat_env_t *qrstat_env, lList **cmdline)
{
   bool ret = true;
   
   DENTER(TOP_LAYER, "sge_parse_qrstat");

   qrstat_env->is_summary = true;
   while (lGetNumberOfElem(*cmdline)) {
      u_long32 value;
   
      /* -help */
      if (opt_list_has_X(*cmdline, "-help")) {
         sge_usage(QRSTAT, stdout);
         DEXIT;
         SGE_EXIT((void**)&ctx, 0);
      }

      /* -u */
      while (parse_multi_stringlist(cmdline, "-u", answer_list, 
                                    &(qrstat_env->user_list), ST_Type, ST_name)) {
         continue;
      }

      /* -explain */
      while (parse_flag(cmdline, "-explain", answer_list, &value)) {
         qrstat_filter_add_core_attributes(qrstat_env);
         qrstat_filter_add_explain_attributes(qrstat_env);
         qrstat_env->is_explain = (value > 0) ? true : false;
         continue;
      }

      /* -xml */
      while (parse_flag(cmdline, "-xml", answer_list, &value)) {
         qrstat_filter_add_core_attributes(qrstat_env);
         qrstat_filter_add_xml_attributes(qrstat_env);
         qrstat_env->is_xml = (value > 0) ? true : false;
         continue;
      }

      /* -ar */
      while (parse_u_longlist(cmdline, "-ar", answer_list, &(qrstat_env->ar_id_list))) {         
         qrstat_filter_add_core_attributes(qrstat_env);
         qrstat_filter_add_ar_attributes(qrstat_env);
         qrstat_filter_add_ar_where(qrstat_env);
         qrstat_env->is_summary = false;
         continue;      
      }

      if (lGetNumberOfElem(*cmdline)) {
         sge_usage(QRSTAT, stdout);
         answer_list_add(answer_list, MSG_PARSE_TOOMANYOPTIONS, 
                         STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR);
         ret = false;
         break;
      }
   } 

   if (qrstat_env->is_summary) {
      char user[128] = "";
      if (sge_uid2user(geteuid(), user, sizeof(user), MAX_NIS_RETRIES)) {
         answer_list_add_sprintf(answer_list, STATUS_ESEMANTIC,
                                 ANSWER_QUALITY_CRITICAL,
                                 MSG_SYSTEM_RESOLVEUSER_U, (u_long32)geteuid());
         ret = false;
      } else {
         str_list_transform_user_list(&(qrstat_env->user_list), answer_list, user);
         qrstat_filter_add_core_attributes(qrstat_env);
         qrstat_filter_add_u_where(qrstat_env);
      }
   }

   DRETURN(ret);
}
Example #10
0
int main(int argc, char **argv) {
   int ret = 0;
   lList *pcmdline = NULL;
   lList *answer_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   qrstat_env_t qrstat_env;

   DENTER_MAIN(TOP_LAYER, "qrsub");

   /* Set up the program information name */
   sge_setup_sig_handlers(QRSTAT);

   log_state_set_log_gui(1);

   if (sge_gdi2_setup(&ctx, QRSTAT, MAIN_THREAD, &answer_list) != AE_OK) {
      answer_list_output(&answer_list);
      goto error_exit;
   }

   qrstat_filter_init(&qrstat_env);
   qrstat_filter_set_ctx(&qrstat_env, ctx);

   /*
    * stage 1: commandline parsing
    */
   {
      dstring file = DSTRING_INIT;
      const char *user = ctx->get_username(ctx);
      const char *cell_root = ctx->get_cell_root(ctx);

      /* arguments from SGE_ROOT/common/sge_qrstat file */
      get_root_file_path(&file, cell_root, SGE_COMMON_DEF_QRSTAT_FILE);
      if (sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list) == true) {
         /* arguments from $HOME/.sge_qrstat file */
         if (get_user_home_file_path(&file, SGE_HOME_DEF_QRSTAT_FILE, user, &answer_list)) {
            sge_parse_from_file_qrstat(sge_dstring_get_string(&file), &pcmdline, &answer_list);
         }
      }
      sge_dstring_free(&file); 

      if (answer_list) {
         answer_list_output(&answer_list);
         lFreeList(&pcmdline);
         sge_prof_cleanup();
         SGE_EXIT((void**)&ctx, 1);
      }
   }

   answer_list = cull_parse_cmdline(QRSTAT, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS);
   if (answer_list != NULL) {
      answer_list_output(&answer_list);
      lFreeList(&pcmdline);
      goto error_exit;
   }
  
   /* 
    * stage 2: evalutate switches and modify qrstat_env
    */
   if (!sge_parse_qrstat(ctx, &answer_list, &qrstat_env, &pcmdline)) {
      answer_list_output(&answer_list);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   /* 
    * stage 3: fetch data from master 
    */
   {
      answer_list = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_GET, &qrstat_env.ar_list, 
                     qrstat_env.where_AR_Type, qrstat_env.what_AR_Type, false);

      if (answer_list_has_error(&answer_list)) {
         answer_list_output(&answer_list);
         goto error_exit;
      }
   }

   /*
    * stage 4: create output in correct format
    */
   {
      qrstat_report_handler_t *handler = NULL;

      if (qrstat_env.is_xml) {
         handler = qrstat_create_report_handler_xml(&qrstat_env, &answer_list);
      } else {
         handler = qrstat_create_report_handler_stdout(&qrstat_env, &answer_list);
      }
      if (!qrstat_print(&answer_list, handler, &qrstat_env)) {
         ret = 1;
      }
      if (qrstat_env.is_xml) {
         qrstat_destroy_report_handler_xml(&handler, &answer_list); 
      } else {
         qrstat_destroy_report_handler_stdout(&handler, &answer_list); 
      }
   }

   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   DRETURN(ret);

error_exit:
   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   DRETURN(1);
}
Example #11
0
int 
main(int argc, char **argv) 
{
   lList *opts_cmdline = NULL;
   lList *opts_defaults = NULL;
   lList *opts_scriptfile = NULL;
   lList *opts_all = NULL;
   lListElem *job = NULL;
   lList *alp = NULL;
   lListElem *ep;
   int exit_status = 0;
   int just_verify;
   int tmp_ret;
   int wait_for_job = 0, is_immediate = 0;
   dstring session_key_out = DSTRING_INIT;
   dstring diag = DSTRING_INIT;
   dstring jobid = DSTRING_INIT;
   u_long32 start, end, step;
   u_long32 num_tasks;
   int count, stat;
   char *jobid_string = NULL;
   bool has_terse;
   drmaa_attr_values_t *jobids = NULL;

   u_long32 prog_number = 0;
   u_long32 myuid = 0;
   const char *sge_root = NULL;
   const char *cell_root = NULL;
   const char *username = NULL;
   const char *qualified_hostname = NULL;
   const char *unqualified_hostname = NULL;
   const char *mastername = NULL;

   DENTER_MAIN(TOP_LAYER, "qsub");

   prof_mt_init();

   /* Set up the program information name */
   sge_setup_sig_handlers(QSUB);

   DPRINTF(("Initializing JAPI\n"));

   if (japi_init(NULL, NULL, NULL, QSUB, false, NULL, &diag)
                                                      != DRMAA_ERRNO_SUCCESS) {
      fprintf(stderr, "\n");
      fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
              sge_dstring_get_string(&diag));
      fprintf(stderr, "\n");
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   prog_number = ctx->get_who(ctx);
   myuid = ctx->get_uid(ctx);
   sge_root = ctx->get_sge_root(ctx);
   cell_root = ctx->get_cell_root(ctx);
   username = ctx->get_username(ctx);
   qualified_hostname = ctx->get_qualified_hostname(ctx);
   unqualified_hostname = ctx->get_unqualified_hostname(ctx);
   mastername = ctx->get_master(ctx, false);

   /*
    * read switches from the various defaults files
    */
   opt_list_append_opts_from_default_files(prog_number, cell_root, username, &opts_defaults, &alp, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * append the commandline switches to the list
    */
   opt_list_append_opts_from_qsub_cmdline(prog_number, &opts_cmdline, &alp,
                                          argv + 1, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_QSUB_WARNING_S);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * show usage if -help was in commandline
    */
   if (opt_list_has_X(opts_cmdline, "-help")) {
      sge_usage(QSUB, stdout);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   /*
    * We will only read commandline options from scripfile if the script
    * itself should not be handled as binary
    */
   if (opt_list_is_X_true(opts_cmdline, "-b") ||
       (!opt_list_has_X(opts_cmdline, "-b") &&
        opt_list_is_X_true(opts_defaults, "-b"))) {
      DPRINTF(("Skipping options from script due to -b option\n"));
   } else {
      opt_list_append_opts_from_script(prog_number,
                                       &opts_scriptfile, &alp, 
                                       opts_cmdline, environ);
      tmp_ret = answer_list_print_err_warn(&alp, NULL, MSG_QSUB_COULDNOTREADSCRIPT_S,
                                           MSG_WARNING);
      if (tmp_ret > 0) {
         DEXIT;
         SGE_EXIT((void**)&ctx, tmp_ret);
      }
   }

   /*
    * Merge all commandline options and interprete them
    */
   opt_list_merge_command_lines(&opts_all, &opts_defaults, 
                                &opts_scriptfile, &opts_cmdline);

   /*
    * Check if -terse is requested
    */
   has_terse = opt_list_has_X(opts_all, "-terse");

   /* If "-sync y" is set, wait for the job to end. */   
   /* Remove all -sync switches since cull_parse_job_parameter()
    * doesn't know what to do with them. */
   while ((ep = lGetElemStr(opts_all, SPA_switch, "-sync"))) {
      if (lGetInt(ep, SPA_argval_lIntT) == TRUE) {
         wait_for_job = 1;
      }
      
      lRemoveElem(opts_all, &ep);
   }

   if (wait_for_job) {
      DPRINTF(("Wait for job end\n"));
   }

   alp = cull_parse_job_parameter(myuid, username, cell_root, unqualified_hostname, 
                                  qualified_hostname, opts_all, &job);

   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   if (set_sec_cred(sge_root, mastername, job, &alp) != 0) {
      answer_list_output(&alp);
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   /* Check if job is immediate */
   is_immediate = (int)JOB_TYPE_IS_IMMEDIATE(lGetUlong(job, JB_type));
   DPRINTF(("Job is%s immediate\n", is_immediate ? "" : " not"));

   DPRINTF(("Everything ok\n"));

   if (lGetUlong(job, JB_verify)) {
      cull_show_job(job, 0, false);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   if (is_immediate || wait_for_job) {
      pthread_t sigt;
      
      qsub_setup_sig_handlers(); 

      if (pthread_create(&sigt, NULL, sig_thread, (void *)NULL) != 0) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 " error preparing signal handling thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
      
      if (japi_enable_job_wait(username, unqualified_hostname, NULL, &session_key_out, error_handler, &diag) ==
                                       DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
         const char *msg = sge_dstring_get_string(&diag);
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 msg?msg:" error starting event client thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
   }
   
   job_get_submit_task_ids(job, &start, &end, &step);
   num_tasks = (end - start) / step + 1;

   if (num_tasks > 1) {
      int error = japi_run_bulk_jobs(&jobids, &job, start, end, step, &diag);
      if (error != DRMAA_ERRNO_SUCCESS) {
         /* No active session here means that japi_enable_job_wait() was
          * interrupted by the signal handler, in which case we just break out
          * quietly. */
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the JAPI error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      DPRINTF(("job id is: %ld\n", jobids->it.ji.jobid));
      
      jobid_string = get_bulk_jobid_string((long)jobids->it.ji.jobid, start, end, step);
   }
   else if (num_tasks == 1) {
      int error = japi_run_job(&jobid, &job, &diag);
      
      if (error != DRMAA_ERRNO_SUCCESS) {
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the DRMAA error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      jobid_string = strdup(sge_dstring_get_string(&jobid));
      DPRINTF(("job id is: %s\n", jobid_string));

      sge_dstring_free(&jobid);
   }
   else {
      fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, "invalid task structure");
      fprintf(stderr, "\n");
      
      exit_status = 1;
      goto Error;
   }
  
   /* only success message is printed to stdout */

   just_verify = (lGetUlong(job, JB_verify_suitable_queues)==JUST_VERIFY || 
                  lGetUlong(job, JB_verify_suitable_queues)==POKE_VERIFY);
   DPRINTF(("Just verifying job\n"));

   if (!just_verify) {
      const char *output = sge_dstring_get_string(&diag); 

      /* print the tersed output */
      if (has_terse) {
         printf("%s", jobid_string);
      } else if (output != NULL) {
        printf("%s", output);
      } else {
        printf(MSG_QSUB_YOURJOBHASBEENSUBMITTED_SS, jobid_string, lGetString(job, JB_job_name));
      }
      printf("\n");
   } else {
      printf(MSG_JOB_VERIFYFOUNDQ);
      printf("\n");
   }   

   if ((wait_for_job || is_immediate) && !just_verify) {
      int event;

      if (is_immediate) {
         fprintf(stderr, "%s\n", MSG_QSUB_WAITINGFORIMMEDIATEJOBTOBESCHEDULED);

         /* We only need to wait for the first task to be scheduled to be able
          * to say that the job is running. */
         tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                             DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_START, &event,
                             NULL, &diag);

         if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_START)) {
            fprintf(stderr, "\n");
            fprintf(stderr, MSG_QSUB_YOURIMMEDIATEJOBXHASBEENSUCCESSFULLYSCHEDULED_S,
                  jobid_string);
            fprintf(stderr, "\n");
         }
         /* A job finish event here means that the job was rejected. */
         else if ((tmp_ret == DRMAA_ERRNO_SUCCESS) &&
                  (event == JAPI_JOB_FINISH)) {
            fprintf(stderr, "\n%s\n", MSG_QSUB_YOURQSUBREQUESTCOULDNOTBESCHEDULEDDTRYLATER);
            
            exit_status = 1;
            goto Error;
         }
         else {
         /* Since we told japi_wait to wait forever, we know that if it gets
          * a timeout, it's because it's been interrupted to exit, in which
          * case we don't complain.  Same for no active session. */
            if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
               fprintf(stderr, "\n");
               fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S,
                       sge_dstring_get_string(&diag));
               fprintf(stderr, "\n");
            }

            exit_status = 1;
            goto Error;
         }
      }
         
      if (wait_for_job) {
         /* Rather than using japi_synchronize on ALL for bulk jobs, we use
          * japi_wait on ANY num_tasks times because with synchronize, we would
          * have to wait for all the tasks to finish before we know if any
          * finished. */
         for (count = 0; count < num_tasks; count++) {
            /* Since there's only one running job in the session, we can just
             * wait for ANY. */
            if ((tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                          DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_FINISH, &event,
                          NULL, &diag)) != DRMAA_ERRNO_SUCCESS) {
               if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                   (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
                  fprintf(stderr, "\n");
                  fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag));
                  fprintf(stderr, "\n");
               }
               
               exit_status = 1;
               goto Error;
            }
            
            /* report how job finished */
            /* If the job is an array job, use the first non-zero exit code as
             * the exit code for qsub. */
            if (exit_status == 0) {
               exit_status = report_exit_status(stat,
                                              sge_dstring_get_string(&jobid));
            }
            /* If we've already found a non-zero exit code, just print the exit
             * info for the task. */
            else {
               report_exit_status(stat, sge_dstring_get_string(&jobid));
            }               
         }
      }
   }

Error:
   FREE(jobid_string);
   lFreeList(&alp);
   lFreeList(&opts_all);
   
   if ((tmp_ret = japi_exit(JAPI_EXIT_NO_FLAG, &diag)) != DRMAA_ERRNO_SUCCESS) {
      if (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTFINALIZEENV_S, sge_dstring_get_string(&diag));
         fprintf(stderr, "\n");
      }
      else {
         struct timespec ts;
         /* We know that if we get a DRMAA_ERRNO_NO_ACTIVE_SESSION here, it's
          * because the signal handler thread called japi_exit().  We know this
          * because if the call to japi_init() fails, we just exit directly.
          * If the call to japi_init() succeeds, then we have an active session,
          * so coming here because of an error would not result in the
          * DRMAA_ERRNO_NO_ACTIVE_SESSION error. */
         DPRINTF(("Sleeping for 15 seconds to wait for the exit to finish.\n"));
         
         sge_relative_timespec(15, &ts);
         sge_mutex_lock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
         
         while (!exited) {
            if (pthread_cond_timedwait(&exit_cv, &exit_mutex, &ts) == ETIMEDOUT) {
               DPRINTF(("Exit has not finished after 15 seconds.  Exiting.\n"));
               break;
            }
         }
         
         sge_mutex_unlock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
      }
   }

   sge_prof_cleanup();

   /* This is an exit() instead of an SGE_EXIT() because when the qmaster is
    * supended, SGE_EXIT() hangs. */
   exit(exit_status);
   DEXIT;
   return exit_status;
}
Example #12
0
int main(int argc, char **argv) {
   lList *pcmdline = NULL;
   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *ar_lp = NULL;

   lListElem *ar = NULL;

   DENTER_MAIN(TOP_LAYER, "qrsub");

   /* Set up the program information name */
   sge_setup_sig_handlers(QRSUB);

   log_state_set_log_gui(1);

   if (sge_gdi2_setup(&ctx, QRSUB, MAIN_THREAD, &alp) != AE_OK) {
      answer_list_output(&alp);
      goto error_exit;
   }

   /*
   ** stage 1 of commandline parsing
   */
   {
      dstring file = DSTRING_INIT;
      const char *user = ctx->get_username(ctx);
      const char *cell_root = ctx->get_cell_root(ctx);

      /* arguments from SGE_ROOT/common/sge_ar_request file */
      get_root_file_path(&file, cell_root, SGE_COMMON_DEF_AR_REQ_FILE);
      if ((alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, 
         FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE)) == NULL) {
         /* arguments from $HOME/.sge_ar_request file */
         if (get_user_home_file_path(&file, SGE_HOME_DEF_AR_REQ_FILE, user, &alp)) {
            lFreeList(&alp);
            alp = parse_script_file(QRSUB, sge_dstring_get_string(&file), "", &pcmdline, environ, 
            FLG_HIGHER_PRIOR | FLG_IGN_NO_FILE);
         }
      }
      sge_dstring_free(&file); 

      if (alp) {
         answer_list_output(&alp);
         lFreeList(&pcmdline);
         goto error_exit;
      }
   }
   
   alp = cull_parse_cmdline(QRSUB, argv+1, environ, &pcmdline, FLG_USE_PSEUDOS);

   if (answer_list_print_err_warn(&alp, NULL, "qrsub: ", MSG_WARNING) > 0) {
      lFreeList(&pcmdline);
      goto error_exit;
   }
   
   if (!pcmdline) {
      /* no command line option is present: print help to stderr */
      sge_usage(QRSUB, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      goto error_exit;
   }

   /*
   ** stage 2 of command line parsing
   */
   ar = lCreateElem(AR_Type);

   if (!sge_parse_qrsub(ctx, pcmdline, &alp, &ar)) {
      answer_list_output(&alp);
      lFreeList(&pcmdline);
      goto error_exit;
   }

   ar_lp = lCreateList(NULL, AR_Type);
   lAppendElem(ar_lp, ar);

   alp = ctx->gdi(ctx, SGE_AR_LIST, SGE_GDI_ADD | SGE_GDI_RETURN_NEW_VERSION, &ar_lp, NULL, NULL);
   lFreeList(&ar_lp);
   answer_list_on_error_print_or_exit(&alp, stdout);
   if (answer_list_has_error(&alp)) {
      sge_gdi2_shutdown((void**)&ctx);
      sge_prof_cleanup();
      if (answer_list_has_status(&alp, STATUS_NOTOK_DOAGAIN)) {
         DRETURN(25);
      } else {
         DRETURN(1);
      }
   }

   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   DRETURN(0);

error_exit:
   sge_gdi2_shutdown((void**)&ctx);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   DRETURN(1);
}
Example #13
0
/*-------------------------------------------------------------------*/
void sge_setup_sge_execd(sge_gdi_ctx_class_t *ctx, const char* tmp_err_file_name)
{
   char err_str[MAX_STRING_SIZE];
   int allowed_get_conf_errors     = 5;
   char* spool_dir = NULL;
   const char *unqualified_hostname = ctx->get_unqualified_hostname(ctx);
   const char *admin_user = ctx->get_admin_user(ctx);

   DENTER(TOP_LAYER, "sge_setup_sge_execd");

   /* TODO: is this the right place to switch the user ?
            ports below 1024 ok */
   /*
   ** switch to admin user
   */
   if (sge_set_admin_username(admin_user, err_str)) {
      CRITICAL((SGE_EVENT, SFNMAX, err_str));
      /* TODO: remove */
      SGE_EXIT(NULL, 1);
   }

   if (sge_switch2admin_user()) {
      CRITICAL((SGE_EVENT, SFNMAX, MSG_ERROR_CANTSWITCHTOADMINUSER));
      /* TODO: remove */
      SGE_EXIT(NULL, 1);
   }

   while (gdi2_wait_for_conf(ctx, &Execd_Config_List)) {
      if (allowed_get_conf_errors-- <= 0) {
         CRITICAL((SGE_EVENT, SFNMAX, MSG_EXECD_CANT_GET_CONFIGURATION_EXIT));
         /* TODO: remove */
         SGE_EXIT(NULL, 1);
      }
      sleep(1);
      ctx->get_master(ctx, true);
   }
   sge_show_conf();         


   /* get aliased hostname */
   /* TODO: is this call needed ? */
   ctx->reresolve_qualified_hostname(ctx);
   spool_dir = mconf_get_execd_spool_dir();

   DPRINTF(("chdir(\"/\")----------------------------\n"));
   sge_chdir_exit("/",1);
   DPRINTF(("Making directories----------------------------\n"));
   sge_mkdir(spool_dir, 0755, 1, 0);
   DPRINTF(("chdir(\"%s\")----------------------------\n", spool_dir));
   sge_chdir_exit(spool_dir,1);
   sge_mkdir(unqualified_hostname, 0755, 1, 0);
   DPRINTF(("chdir(\"%s\",me.unqualified_hostname)--------------------------\n",
            unqualified_hostname));
   sge_chdir_exit(unqualified_hostname, 1); 
   /* having passed the  previous statement we may 
      log messages into the ERR_FILE  */
   if ( tmp_err_file_name != NULL) {
      sge_copy_append((char*)tmp_err_file_name, ERR_FILE, SGE_MODE_APPEND);
   }
   sge_switch2start_user();
   if ( tmp_err_file_name != NULL) {
      unlink(tmp_err_file_name);
   }
   sge_switch2admin_user();
   log_state_set_log_as_admin_user(1);
   sprintf(execd_messages_file, "%s/%s/%s", spool_dir, 
           unqualified_hostname, ERR_FILE);
   log_state_set_log_file(execd_messages_file);

   sprintf(execd_spool_dir, "%s/%s", spool_dir, 
           unqualified_hostname);
   
   DPRINTF(("Making directories----------------------------\n"));
   sge_mkdir(EXEC_DIR, 0775, 1, 0);
   sge_mkdir(JOB_DIR, 0775, 1, 0);
   sge_mkdir(ACTIVE_DIR,  0775, 1, 0);

#if defined(PLPA_LINUX) || defined(SOLARIS86) || defined(SOLARISAMD64)
   /* initialize processor topology */
   if (initialize_topology() != true) {
      DPRINTF(("Couldn't initizalize topology-----------------------\n"));
   }
#endif

   sge_free(&spool_dir);
   DRETURN_VOID;
}
Example #14
0
/****** qmaster/sge_qmaster_main/main() ****************************************
*  NAME
*     main() -- qmaster entry point 
*
*  SYNOPSIS
*     int main(int argc, char* argv[]) 
*
*  FUNCTION
*     Qmaster entry point.
*
*     NOTE: The main thread must block all signals before any additional thread
*     is created. Failure to do so will ruin signal handling!
*
*  INPUTS
*     int argc     - number of commandline arguments 
*     char* argv[] - commandline arguments 
*
*  RESULT
*     0 - success 
*
*  NOTES
*     We check whether 'SGE_ROOT' is set before we daemonize. Once qmaster is
*     a daemon, we are no longer connected to a terminal and hence can not
*     output an error message to stdout or stderr.
*
*     We need to inovke 'prepare_enroll()' *before* the user id is switched via
*     'become_admin_user()'. This is because qmaster must be able to bind a so
*     called reserved port (requires root privileges) if configured to do so.
*
*******************************************************************************/
int main(int argc, char* argv[])
{
   int max_enroll_tries;
   int ret_val;
   int file_descriptor_settings_result = 0;
   bool has_daemonized = false;
   sge_gdi_ctx_class_t *ctx = NULL;
   u_long32 start_time = sge_get_gmt();
   monitoring_t monitor;

   DENTER_MAIN(TOP_LAYER, "qmaster");

   sge_monitor_init(&monitor, "MAIN", NONE_EXT, MT_WARNING, MT_ERROR);
   prof_mt_init();

   sge_get_root_dir(true, NULL, 0, true);
   
#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   sge_init_language_func((gettext_func_type)gettext, (setlocale_func_type)setlocale, (bindtextdomain_func_type)bindtextdomain, (textdomain_func_type)textdomain);
   sge_init_language(NULL,NULL);   
#endif 

   /* 
    * qmaster doesn't support any commandline anymore,
    * but we should show version string and -help option 
    */
   if (argc != 1) {
      sigset_t sig_set;
      sigfillset(&sig_set);
      pthread_sigmask(SIG_SETMASK, &sig_set, NULL);
      sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, true);
      sge_process_qmaster_cmdline(argv);
      SGE_EXIT((void**)&ctx, 1);
   }

   /*
    * daemonize qmaster
    * set file descriptor limits
    * and initialize libraries to be used in multi threaded environment
    * also take care that finished child processed of this process become
    * zombie jobs
    */
   has_daemonized = sge_daemonize_qmaster();
   file_descriptor_settings_result = set_file_descriptor_limit();
#if !defined(INTERIX) && !defined(CYGWIN)
   init_sig_action_and_mask();
#endif

   /* init qmaster threads without becomming admin user */
   sge_qmaster_thread_init(&ctx, QMASTER, MAIN_THREAD, false);

   ctx->set_daemonized(ctx, has_daemonized);

   /* this must be done as root user to be able to bind ports < 1024 */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[QMASTER],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;
      if (max_enroll_tries <= 0) {
         /* exit after 30 seconds */
         CRITICAL((SGE_EVENT, MSG_QMASTER_COMMUNICATION_ERRORS ));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[QMASTER],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
      }
   }

   /*
    * now the commlib up and running. Set qmaster application status function 
    * (commlib callback function for qping status information response 
    *  messages (SIRM))
    */
   ret_val = cl_com_set_status_func(sge_qmaster_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
   }

   /* 
    * now we become admin user change into the correct root directory set the
    * the target for logging messages
    */
   sge_become_admin_user(ctx->get_admin_user(ctx));
   sge_chdir_exit(ctx->get_qmaster_spool_dir(ctx), 1);
   log_state_set_log_file(ERR_FILE);
   ctx->set_exit_func(ctx, sge_exit_func);

#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /*
    * We do increment the heartbeat manually here. This is the 'startup heartbeat'. 
    * The first time the hearbeat will be incremented through the heartbeat event 
    * handler is after about HEARTBEAT_INTERVAL seconds. The hardbeat event handler
    * is setup during the initialisazion of the timer thread.
    */
   inc_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, HEARTBEAT_INTERVAL, NULL);
     
   /*
    * Event master module has to be initialized already here because
    * sge_setup_qmaster() might already access it although event delivery
    * thread is not running.
    *
    * Corresponding shutdown is done in sge_event_master_terminate();
    *
    * EB: In my opinion the init function should called in
    * sge_event_master_initialize(). Is it possible to move that call?
    */ 
   sge_event_master_init();

   sge_setup_qmaster(ctx, argv);

#ifndef USE_POLL
   if (file_descriptor_settings_result == 1) {
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_LARGER_THAN_LIMIT_U, sge_u32c(FD_SETSIZE)));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE1_U, sge_u32c(FD_SETSIZE - 20)));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE2));
      WARNING((SGE_EVENT, MSG_QMASTER_FD_SETSIZE_COMPILE_MESSAGE3));
   }
#endif

   /*
    * Setup all threads and initialize corresponding modules. 
    * Order is important!
    */
   sge_signaler_initialize(ctx);
   sge_event_master_initialize(ctx);
   sge_timer_initialize(ctx, &monitor);
   sge_worker_initialize(ctx);
#if 0
   sge_test_initialize(ctx);
#endif
   sge_listener_initialize(ctx);
   sge_scheduler_initialize(ctx, NULL);
#ifndef NO_JNI
   sge_jvm_initialize(ctx, NULL);
#endif

   INFO((SGE_EVENT, "qmaster startup took "sge_u32" seconds", sge_get_gmt() - start_time));

   /*
    * Block till signal from signal thread arrives us
    */
   sge_thread_wait_for_signal();

   /* 
    * Shutdown all threads and shutdown corresponding modules.
    * Order is important!
    */
#ifndef NO_JNI
   sge_jvm_terminate(ctx, NULL);
#endif
   sge_scheduler_terminate(ctx, NULL);
   sge_listener_terminate();
#if 0
   sge_test_terminate(ctx);
#endif
   sge_worker_terminate(ctx);
   sge_timer_terminate();
   sge_event_master_terminate();
   sge_signaler_terminate();

   /*
    * Remaining shutdown operations
    */
   sge_clean_lists();
   sge_monitor_free(&monitor);

   sge_shutdown((void**)&ctx, sge_qmaster_get_exit_state());
   sge_prof_cleanup();

   DEXIT;
   return 0;
} /* main() */
Example #15
0
/*----------------------------------------------------------------------------*/
int 
main(int argc, char **argv)
{
   int heartbeat        = 0;
   int last_heartbeat   = 0;
   int latest_heartbeat = 0;
   int ret              = 0;
   int delay            = 0;
   time_t now, last;
/*    const char *cp; */
   char err_str[MAX_STRING_SIZE];
   char shadowd_pidfile[SGE_PATH_MAX];
   dstring ds;
   char buffer[256];
   pid_t shadowd_pid;

#if 1

static int check_interval = CHECK_INTERVAL;
static int get_active_interval = GET_ACTIVE_INTERVAL;
static int delay_time = DELAY_TIME;
static int sge_test_heartbeat = 0;

char binpath[SGE_PATH_MAX];
char oldqmaster[SGE_PATH_MAX];

char shadow_err_file[SGE_PATH_MAX];
char qmaster_out_file[SGE_PATH_MAX];

#endif

   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "sge_shadowd");
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));
   /* initialize recovery control variables */
   {
      char *s;
      int val;
      if ((s=getenv("SGE_CHECK_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         check_interval = val;
      if ((s=getenv("SGE_GET_ACTIVE_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         get_active_interval = val;
      if ((s=getenv("SGE_DELAY_TIME")) &&
          sscanf(s, "%d", &val) == 1)
         delay_time = val;
      if ((s=getenv("SGE_TEST_HEARTBEAT_TIMEOUT")) &&
          sscanf(s, "%d", &val) == 1)
         sge_test_heartbeat = val;
   }
         
   /* This needs a better solution */
   umask(022);

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   log_state_set_log_file(TMP_ERR_FILE_SHADOWD);

   if (sge_setup2(&ctx, SHADOWD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* AA: TODO: change this */
   ctx->set_exit_func(ctx, shadowd_exit_func);
   sge_setup_sig_handlers(SHADOWD);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   if (ctx->get_qmaster_spool_dir(ctx) != NULL) {
      char *shadowd_name = SGE_SHADOWD;

      /* is there a running shadowd on this host (with unqualified name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_unqualified_hostname(ctx));

      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }

      ctx->prepare_enroll(ctx);

      /* is there a running shadowd on this host (with aliased name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_qualified_hostname(ctx));
      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }  
   } else {
      ctx->prepare_enroll(ctx);
   }

   if (parse_cmdline_shadowd(argc, argv) == 1) {
      SGE_EXIT((void**)&ctx, 0);
   }
   
   if (ctx->get_qmaster_spool_dir(ctx) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTREADQMASTERSPOOLDIRFROMX_S, ctx->get_bootstrap_file(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (chdir(ctx->get_qmaster_spool_dir(ctx))) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTCHANGETOQMASTERSPOOLDIRX_S, ctx->get_qmaster_spool_dir(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_set_admin_username(ctx->get_admin_user(ctx), err_str)) {
      CRITICAL((SGE_EVENT, SFNMAX, err_str));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_switch2admin_user()) {
      CRITICAL((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSWITCHTOADMIN_USER));
      SGE_EXIT((void**)&ctx, 1);
   }

   sprintf(shadow_err_file, "messages_shadowd.%s", ctx->get_unqualified_hostname(ctx));
   sprintf(qmaster_out_file, "messages_qmaster.%s", ctx->get_unqualified_hostname(ctx));
   sge_copy_append(TMP_ERR_FILE_SHADOWD, shadow_err_file, SGE_MODE_APPEND);
   unlink(TMP_ERR_FILE_SHADOWD);
   log_state_set_log_as_admin_user(1);
   log_state_set_log_file(shadow_err_file);

   {
      int* tmp_fd_array = NULL;
      unsigned long tmp_fd_count = 0;

      if (cl_com_set_handle_fds(cl_com_get_handle(prognames[SHADOWD] ,0), &tmp_fd_array, &tmp_fd_count) == CL_RETVAL_OK) {
         sge_daemonize(tmp_fd_array, tmp_fd_count, ctx);
         if (tmp_fd_array != NULL) {
            sge_free(&tmp_fd_array);
         }
      } else {
         sge_daemonize(NULL, 0, ctx);
      }
   }

   /* shadowd pid file will contain aliased name */
   sge_write_pid(shadowd_pidfile);

   starting_up();
   
   sge_setup_sig_handlers(SHADOWD);

   last_heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

   last = (time_t) sge_get_gmt(); /* set time of last check time */

   delay = 0;
   while (!shut_me_down) {
      sleep(check_interval);

      /* get current heartbeat file content */
      heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

      now = (time_t) sge_get_gmt();


      /* Only check when we could read the heartbeat file at least two times
       * (last_heartbeat and heartbeat) without error 
       */
      if (last_heartbeat > 0 && heartbeat > 0) {

         /*
          * OK we have to heartbeat entries to check. Check times ...
          * now  = current time
          * last = last check time
          */
         if ( (now - last) >= (get_active_interval + delay) ) {

            delay = 0;
            if (last_heartbeat == heartbeat) {
               DPRINTF(("heartbeat not changed since seconds: "sge_U32CFormat"\n", sge_u32c(now - last)));
               delay = delay_time; /* set delay time */

               /*
                * check if we are a possible new qmaster host (lock file of qmaster active, etc.)
                */
               ret = check_if_valid_shadow(binpath, oldqmaster, 
                                           ctx->get_act_qmaster_file(ctx), 
                                           ctx->get_shadow_master_file(ctx), 
                                           ctx->get_qualified_hostname(ctx), 
                                           ctx->get_binary_path(ctx));

               if (ret == 0) {
                  /* we can start a qmaster on this host */
                  if (qmaster_lock(QMASTER_LOCK_FILE)) {
                     ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_FAILEDTOLOCKQMASTERSOMBODYWASFASTER));
                  } else {
                     int out, err;

                     /* still the old qmaster name in act_qmaster file and still the old heartbeat */
                     latest_heartbeat = get_qmaster_heartbeat( QMASTER_HEARTBEAT_FILE, 30);
                     /* TODO: what do we when there is a timeout ??? */
                     DPRINTF(("old qmaster name in act_qmaster and old heartbeat\n"));
                     if (!compare_qmaster_names(ctx->get_act_qmaster_file(ctx), oldqmaster) &&
                         !shadowd_is_old_master_enrolled(sge_test_heartbeat, sge_get_qmaster_port(NULL), oldqmaster) && 
                         (latest_heartbeat == heartbeat)) {
                        char qmaster_name[256];

                        strcpy(qmaster_name, SGE_PREFIX);
                        strcat(qmaster_name, prognames[QMASTER]); 
                        DPRINTF(("qmaster_name: "SFN"\n", qmaster_name)); 

                        /*
                         * open logfile as admin user for initial qmaster/schedd 
                         * startup messages
                         */
                        out = SGE_OPEN3(qmaster_out_file, O_CREAT|O_WRONLY|O_APPEND, 
                                   0644);
                        err = out;
                        if (out == -1) {
                           /*
                            * First priority is the master restart
                            * => ignore this error
                            */
                           out = 1;
                           err = 2;
                        } 

                        sge_switch2start_user();
                        ret = startprog(out, err, NULL, binpath, qmaster_name, NULL);
                        sge_switch2admin_user();
                        if (ret) {
                           ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSTARTQMASTER));
                        }
                        close(out);
                     } else {
                        qmaster_unlock(QMASTER_LOCK_FILE);
                     }
                  }      
               } else {
                  if (ret == -1) {
                     /* just log the more important failures */    
                     WARNING((SGE_EVENT, MSG_SHADOWD_DELAYINGSHADOWFUNCFORXSECONDS_U, sge_u32c(delay) ));
                  }
               } 
            }
            /* Begin a new interval, set timers and hearbeat to current values */
            last = now;
            last_heartbeat = heartbeat;
         }
      } else {
         if (last_heartbeat < 0 || heartbeat < 0) {
            /* There was an error reading heartbeat or last_heartbeat */
            DPRINTF(("can't read heartbeat file. last_heartbeat="sge_U32CFormat", heartbeat="sge_U32CFormat"\n",
                     sge_u32c(last_heartbeat), sge_u32c(heartbeat)));
         } else {
            DPRINTF(("have to read the heartbeat file twice to check time differences\n"));
         }
      }
   }

   sge_shutdown((void**)&ctx, 0);

   DRETURN(EXIT_SUCCESS);
}
Example #16
0
int main(int argc, char **argv) {
   int ret = STATUS_OK;
   lList *alp = NULL;
   lList *request_list = NULL;
   lList *cmdline = NULL;
   lListElem *aep;
   int all_jobs = 0;
   int all_users = 0;
   u_long32 gdi_cmd = SGE_GDI_MOD; 
   int tmp_ret;
   int me_who;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "qalter");

   prof_mt_init();

   /*
   ** get command name: qalter or qresub
   */
   if (!strcmp(sge_basename(argv[0], '/'), "qresub")) {
      DPRINTF(("QRESUB\n"));
      me_who = QRESUB;
   } else if (!strcmp(sge_basename(argv[0], '/'), "qhold")) {
      DPRINTF(("QHOLD\n"));
      me_who = QHOLD;
   } else if (!strcmp(sge_basename(argv[0], '/'), "qrls")) {
      DPRINTF(("QRLS\n"));
      me_who = QRLS;
   } else {
      DPRINTF(("QALTER\n"));
      me_who = QALTER;
   } 

   log_state_set_log_gui(1);
   sge_setup_sig_handlers(me_who);

   if (sge_gdi2_setup(&ctx, me_who, MAIN_THREAD, &alp) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /*
   ** begin to work
   */
   opt_list_append_opts_from_qalter_cmdline(me_who, &cmdline, &alp, argv + 1, environ);
   tmp_ret = answer_list_print_err_warn(&alp, MSG_QALTER, MSG_QALTER, MSG_QALTERWARNING);
   
   if (tmp_ret > 0) {
      SGE_EXIT((void**)&ctx, tmp_ret);
   }
   
   /* handling the case that no command line parameter was specified */
   if ((me_who == QHOLD || me_who == QRLS) && lGetNumberOfElem(cmdline) == 1) {
      /* -h option is set implicitly for QHOLD and QRLS */
      sge_usage(me_who, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      SGE_EXIT((void**)&ctx, 1);
   } else if ((me_who == QRESUB || me_who == QALTER) && lGetNumberOfElem(cmdline) == 0) {
      /* qresub and qalter have nothing set */ 
      sge_usage(me_who, stderr);
      fprintf(stderr, "%s\n", MSG_PARSE_NOOPTIONARGUMENT);
      SGE_EXIT((void**)&ctx, 1);
   } else if (opt_list_has_X(cmdline, "-help")) {
      /* -help was specified */
      sge_usage(me_who, stdout);
      SGE_EXIT((void**)&ctx, 0);
   }
   
   alp = qalter_parse_job_parameter(me_who, cmdline, &request_list, &all_jobs, 
                                    &all_users);

   DPRINTF(("all_jobs = %d, all_user = %d\n", all_jobs, all_users));

   if (request_list && verify) {
      /* 
         got a request list containing one element 
         for each job to be modified 
         save jobid all fields contain the same fields
         so we may use show_job() with the first job
         in our list 
         The jobid's in our request list get printed before
         show_job()
      */
      cull_show_job(lFirst(request_list), FLG_QALTER, false);
      sge_prof_cleanup();
      SGE_EXIT((void**)&ctx, 0);
   }

   tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
   if (tmp_ret > 0) {
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   if ((me_who == QALTER) ||
       (me_who == QHOLD) ||
       (me_who == QRLS) 
      ) {
      DPRINTF(("QALTER\n"));
      gdi_cmd = SGE_GDI_MOD;
   } else if (me_who == QRESUB){
      DPRINTF(("QRESUB\n"));
      gdi_cmd = SGE_GDI_COPY;
   } else {
      printf("unknown binary name.\n");
      SGE_EXIT((void**)&ctx, 1);
   }

   if (all_jobs)
      gdi_cmd |= SGE_GDI_ALL_JOBS;
   if (all_users)
      gdi_cmd |= SGE_GDI_ALL_USERS;

   alp = ctx->gdi(ctx, SGE_JB_LIST, gdi_cmd, &request_list, NULL, NULL); 
   for_each (aep, alp) {
      printf("%s\n", lGetString(aep, AN_text));
      if (ret == STATUS_OK) {
         ret = lGetUlong(aep, AN_status);
      }
   }
/****** sge_hostname/sge_get_qmaster_port() ************************************
*  NAME
*     sge_get_qmaster_port() -- get qmaster port
*
*  SYNOPSIS
*     int sge_get_qmaster_port(bool *from_services) 
*
*  FUNCTION
*     This function returns the TCP/IP port of the qmaster daemon. It returns
*     a cached value from a previous run. The cached value will be refreshed
*     every 10 Minutes. The port may come from environment variable
*     SGE_QMASTER_PORT or the services files entry "sge_qmaster".
*
*  INPUTS
*     bool *from_services - Pointer to a boolean which is set to true
*                           when the port value comes from the services file.
*
*  RESULT
*     int - port of qmaster
*
*******************************************************************************/
#define SGE_PORT_CACHE_TIMEOUT 60*10   /* 10 Min. */
int sge_get_qmaster_port(bool *from_services) {
   char* port = NULL;
   int int_port = -1;

   struct timeval now;
   static long next_timeout = 0;
   static int cached_port = -1;
   static bool is_port_from_services_file = false;

   DENTER(GDI_LAYER, "sge_get_qmaster_port");

   sge_mutex_lock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);

   /* check for reresolve timeout */
   gettimeofday(&now,NULL);

   if (next_timeout > 0 ) {
      DPRINTF(("reresolve port timeout in "sge_U32CFormat"\n", sge_u32c( next_timeout - now.tv_sec)));
   }

   /* get port from cache when next_timeout for re-resolving is not reached */
   if (cached_port >= 0 && next_timeout > now.tv_sec) {
      int_port = cached_port;
      if (from_services != NULL) {
         *from_services = is_port_from_services_file;
      }
      DPRINTF(("returning cached port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      sge_mutex_unlock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);
      DEXIT;
      return int_port;
   }

   /* get port from environment variable SGE_QMASTER_PORT */
   port = getenv("SGE_QMASTER_PORT");   
   if (port != NULL) {
      int_port = atoi(port);
      is_port_from_services_file = false;
   }

   /* get port from services file */
   if (int_port <= 0) {
      char buffer[2048];
      struct servent se_result;
      struct servent* se_help = NULL;

      se_help = sge_getservbyname_r(&se_result, "sge_qmaster", buffer, sizeof(buffer));
      if (se_help != NULL) {
         int_port = ntohs(se_help->s_port);
         if (int_port > 0) {
            /* we found a port in the services */
            is_port_from_services_file = true;
            if (from_services != NULL) {
               *from_services = is_port_from_services_file;
            }
         }
      }
   }

   if (int_port <= 0 ) {
      ERROR((SGE_EVENT, MSG_UTI_CANT_GET_ENV_OR_PORT_SS, "SGE_QMASTER_PORT", "sge_qmaster"));
      if ( cached_port > 0 ) {
         WARNING((SGE_EVENT, MSG_UTI_USING_CACHED_PORT_SU, "sge_qmaster", sge_u32c(cached_port) ));
         int_port = cached_port; 
      } else {
         sge_mutex_unlock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);
         SGE_EXIT(NULL, 1);
      }
   } else {
      DPRINTF(("returning port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      /* set new timeout time */
      gettimeofday(&now,NULL);
      next_timeout = now.tv_sec + SGE_PORT_CACHE_TIMEOUT;

      /* set new port value */
      cached_port = int_port;
   }

   sge_mutex_unlock("get_qmaster_port_mutex", SGE_FUNC, __LINE__, &get_qmaster_port_mutex);
   
   DEXIT;
   return int_port;
}
Example #18
0
/* 
 * TODO: CLEANUP
 *
 * This function is DEPRECATED and should be removed with the next
 * major release.
 *
 * This function can't be used in multi threaded environments because it
 * might cause a deadlock in the executing qmaster thread.
 * Use sge_peopen_r() instead.
 */
pid_t sge_peopen(const char *shell, int login_shell, const char *command,
                 const char *user, char **env,  FILE **fp_in, FILE **fp_out,
                 FILE **fp_err, bool null_stderr)
{
   pid_t pid;
   int pipefds[3][2];
   const char *could_not = MSG_SYSTEM_EXECBINSHFAILED;
   const char *not_root = MSG_SYSTEM_NOROOTRIGHTSTOSWITCHUSER;
   int i;
   char arg0[256];
   char err_str[256];
#if !(defined(WIN32) || defined(INTERIX)) /* var not needed */
   int res;
#endif /* WIN32 */
   uid_t myuid;
 
   DENTER(TOP_LAYER, "sge_peopen");
 
   /* open pipes - close on failure */
   for (i=0; i<3; i++) {
      if (pipe(pipefds[i]) != 0) {
         while (--i >= 0) {
            close(pipefds[i][0]);
            close(pipefds[i][1]);
         }
         ERROR((SGE_EVENT, MSG_SYSTEM_FAILOPENPIPES_SS,
                command, strerror(errno)));
         DEXIT;
         return -1;
      }
   }

#if defined(SOLARIS)
   pid = sge_smf_contract_fork(err_str, 256);
#else
   pid = fork();
#endif
   if (pid == 0) {  /* child */
      int keep_open[6];
      keep_open[0] = 0;
      keep_open[1] = 1;
      keep_open[2] = 2;
      keep_open[3] = pipefds[0][0];
      keep_open[4] = pipefds[1][1];
      keep_open[5] = pipefds[2][1];
      sge_close_all_fds(keep_open, 6);
      /* shall we redirect stderr to /dev/null? */
      if (null_stderr) {
         /* open /dev/null */
         int fd = open("/dev/null", O_WRONLY);
         if (fd == -1) {
            sprintf(err_str, MSG_ERROROPENINGFILEFORWRITING_SS, "/dev/null", 
                    strerror(errno));
            sprintf(err_str, "\n");
            write(2, err_str, strlen(err_str));
            SGE_EXIT(NULL, 1);
         }

         /* set stderr to /dev/null */
         close(2);
         dup(fd);

         /* we don't need the stderr the pipe - close it */
         close(pipefds[2][1]);
      } else {
         /* redirect stderr to the pipe */
         close(2);
         dup(pipefds[2][1]);
      }

      /* redirect stdin and stdout to the pipes */
      close(0);
      close(1);
      dup(pipefds[0][0]);
      dup(pipefds[1][1]);
 
      if (user) {
         struct passwd *pw;
         struct passwd pw_struct;
         char *buffer;
         int size;

         size = get_pw_buffer_size();
         buffer = sge_malloc(size);
         if (!(pw=sge_getpwnam_r(user, &pw_struct, buffer, size))) {
            sprintf(err_str, MSG_SYSTEM_NOUSERFOUND_SS , user, strerror(errno));            
            sprintf(err_str, "\n");
            write(2, err_str, strlen(err_str));
            FREE(buffer);
            SGE_EXIT(NULL, 1);
         }
 
         myuid = geteuid();
 
         if (myuid != pw->pw_uid) {
 
            /* Only change user if we differ from the wanted user */
            if(myuid != SGE_SUPERUSER_UID) {
               write(2, not_root, sizeof(not_root));
               FREE(buffer);
               SGE_EXIT(NULL, 1);
            }                             
            sprintf(err_str, "%s %d\n", pw->pw_name, (int)pw->pw_gid);
            write(2, err_str, strlen(err_str));
#if !(defined(WIN32) || defined(INTERIX))  /* initgroups not called */
            res = initgroups(pw->pw_name,pw->pw_gid);
#  if defined(SVR3) || defined(sun)
            if (res < 0)
#  else
            if (res)
#  endif
            {
               sprintf(err_str, MSG_SYSTEM_INITGROUPSFORUSERFAILED_ISS ,
                     res, user, strerror(errno));
               sprintf(err_str, "\n");
               write(2, err_str, strlen(err_str));
               FREE(buffer);
               SGE_EXIT(NULL, 1);
            }
#endif /* WIN32 */
 
            if (setuid(pw->pw_uid)) {
               sprintf(err_str, MSG_SYSTEM_SWITCHTOUSERFAILED_SS , user,
                     strerror(errno));
               sprintf(err_str, "\n");
               write(2, err_str, strlen(err_str));
               FREE(buffer);
               SGE_EXIT(NULL, 1);
            }
         }
 
         addenv("HOME", pw->pw_dir);
         addenv("SHELL", pw->pw_shell);
         addenv("USER", pw->pw_name);
         addenv("LOGNAME", pw->pw_name);
         addenv("PATH", SGE_DEFAULT_PATH);

         FREE(buffer);
      }
 
      if (login_shell)
         strcpy(arg0, "-");
      else
         strcpy(arg0, "");
      strcat(arg0, shell);
 
      if (env)
         for(; *env; env++)
            putenv(*env);
 
      execlp(shell, arg0, "-c", command, NULL);
 
      write(2, could_not, sizeof(could_not));
      SGE_EXIT(NULL, 1);
   }
 
   if (pid < 0) {
      for (i=0; i<3; i++) {
         close(pipefds[i][0]);
         close(pipefds[i][1]);
      }
#if defined(SOLARIS)
      if (pid < -1 && err_str) {
          ERROR((SGE_EVENT, MSG_SMF_FORK_FAILED_SS, "sge_peopen()", err_str));
      }
#endif
      /* fork could have failed, report it */
      ERROR((SGE_EVENT, MSG_SMF_FORK_FAILED_SS, "sge_peopen()", strerror(errno)));
      DEXIT;
      return -1;
   }
 
   /* close the childs ends of the pipes */
   close(pipefds[0][0]);
   close(pipefds[1][1]);
   close(pipefds[2][1]);
  
   /* return filehandles for stdin and stdout */
   *fp_in  = fdopen(pipefds[0][1], "a");
   *fp_out = fdopen(pipefds[1][0], "r");

   /* is stderr redirected to /dev/null? */
   if (null_stderr) {
      /* close the pipe and return NULL as filehandle */
      close(pipefds[2][0]);
      *fp_err = NULL;
   } else {
      /* return filehandle for stderr */
      *fp_err = fdopen(pipefds[2][0], "r");
   }

   DEXIT;
   return pid;
}
int main(int argc, char *argv[])
{
   const char *url;
   int i, threads;
   pthread_t *t;
   int *args;

   lList *answer_list = NULL;
   lListElem *spooling_context;

   DENTER_MAIN(TOP_LAYER, "test_berkeleydb_mt");

   /* parse commandline parameters */
   if (argc < 3) {
      ERROR((SGE_EVENT, "usage: test_berkeleydb_mt <url> <threads> [<delay>]\n"));
      ERROR((SGE_EVENT, "       <url>     = path or host:database\n"));
      ERROR((SGE_EVENT, "       <threads> = number of threads\n"));
      ERROR((SGE_EVENT, "       <delay>   = delay after writing [ms]\n"));
      SGE_EXIT(NULL, 1);
   }

   url = argv[1];
   threads = atoi(argv[2]);

   if (argc > 3) {
      delay = atoi(argv[3]);
   }

   /* allocate memory for pthreads and arguments */
   t = (pthread_t *)malloc(threads * sizeof(pthread_t));
   args = (int *)malloc(threads * sizeof(int));

   DPRINTF(("writing to database %s from %d threads\n", url, threads));

   /* initialize spooling */
   spooling_context = spool_create_dynamic_context(&answer_list, NULL, url, NULL);
   answer_list_output(&answer_list);
   if (spooling_context == NULL) {
      SGE_EXIT(NULL, EXIT_FAILURE);
   }

   spool_set_default_context(spooling_context);

   if (!spool_startup_context(&answer_list, spooling_context, true)) {
      answer_list_output(&answer_list);
      SGE_EXIT(NULL, EXIT_FAILURE);
   }
   answer_list_output(&answer_list);

   /* let n threads to parallel spooling */
   for (i = 0; i < threads; i++) {
      args[i] = i + 1;     
      pthread_create(&(t[i]), NULL, work, (void*)(&args[i]));
   }

   /* also work in current thread */
   work((void *)0);

   /* wait for termination of all threads */
   for (i = 0; i < threads; i++) {
      pthread_join(t[i], NULL);
   }

   /* shutdown spooling */
   spool_shutdown_context(&answer_list, spooling_context);
   answer_list_output(&answer_list);

   sge_free(&t);

   DEXIT;
   return EXIT_SUCCESS;
}
int sge_get_execd_port(void) {
   char* port = NULL;
   int int_port = -1;

   struct timeval now;
   static long next_timeout = 0;
   static int cached_port = -1;

   DENTER(TOP_LAYER, "sge_get_execd_port");

   sge_mutex_lock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);
   
   /* check for reresolve timeout */
   gettimeofday(&now,NULL);

   if ( next_timeout > 0 ) {
      DPRINTF(("reresolve port timeout in "sge_U32CFormat"\n", sge_u32c( next_timeout - now.tv_sec)));
   }
   if ( cached_port >= 0 && next_timeout > now.tv_sec ) {
      int_port = cached_port;
      DPRINTF(("returning cached port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      sge_mutex_unlock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);
      return int_port;
   }

   port = getenv("SGE_EXECD_PORT");   
   if (port != NULL) {
      int_port = atoi(port);
   }

   /* get port from services file */
   if (int_port <= 0) {
      char buffer[2048];
      struct servent se_result;
      struct servent* se_help = NULL;

      se_help = sge_getservbyname_r(&se_result, "sge_execd", buffer, sizeof(buffer));
      if (se_help != NULL) {
         int_port = ntohs(se_help->s_port);
      }
   }

   if (int_port <= 0 ) {
      ERROR((SGE_EVENT, MSG_UTI_CANT_GET_ENV_OR_PORT_SS, "SGE_EXECD_PORT" , "sge_execd"));
      if ( cached_port > 0 ) {
         WARNING((SGE_EVENT, MSG_UTI_USING_CACHED_PORT_SU, "sge_execd", sge_u32c(cached_port) ));
         int_port = cached_port; 
      } else {
         sge_mutex_unlock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);
         SGE_EXIT(NULL, 1);
      }
   } else {
      DPRINTF(("returning port value: "sge_U32CFormat"\n", sge_u32c(int_port)));
      /* set new timeout time */
      gettimeofday(&now,NULL);
      next_timeout = now.tv_sec + SGE_PORT_CACHE_TIMEOUT;

      /* set new port value */
      cached_port = int_port;
   } 

   sge_mutex_unlock("get_execd_port_mutex", SGE_FUNC, __LINE__, &get_execd_port_mutex);

   DEXIT;
   return int_port;

}
Example #21
0
int main(int argc, char *argv[])
{
   qevent_options enabled_options;
   dstring errors = DSTRING_INIT;
   int i, gdi_setup;
   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL; 
   sge_evc_class_t *evc = NULL;

   DENTER_MAIN(TOP_LAYER, "qevent");

/*    sge_mt_init(); */

   /* dump pid to file */
   qevent_dump_pid_file();

   /* parse command line */
   enabled_options.error_message = &errors;
   qevent_set_option_struct(&enabled_options);
   qevent_parse_command_line(argc, argv, &enabled_options);

   

   /* check if help option is set */
   if (enabled_options.help_option) {
      qevent_show_usage();
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   /* are there command line parsing errors ? */
   if (sge_dstring_get_string(enabled_options.error_message)) {
      ERROR((SGE_EVENT, "%s", sge_dstring_get_string(enabled_options.error_message) ));
      qevent_show_usage();
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }


   log_state_set_log_gui(1);
   sge_setup_sig_handlers(QEVENT);

   /* setup event client */
   gdi_setup = sge_gdi2_setup(&ctx, QEVENT, MAIN_THREAD, &alp);
   if (gdi_setup != AE_OK) {
      answer_list_output(&alp);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }
   /* TODO: how is the memory we allocate here released ???, SGE_EXIT doesn't */
   if (false == sge_gdi2_evc_setup(&evc, ctx, EV_ID_ANY, &alp, NULL)) {
      answer_list_output(&alp);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* ok, start over ... */
   /* check for testsuite option */
   
   if (enabled_options.testsuite_option) {
      /* only for testsuite */
      qevent_testsuite_mode(evc);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   /* check for subscribe option */
   if (enabled_options.subscribe_option) {
      /* only for testsuite */
      qevent_subscribe_mode(evc);
      sge_dstring_free(enabled_options.error_message);
      SGE_EXIT((void**)&ctx, 0);
   }

   if (enabled_options.trigger_option_count > 0) {
      lCondition *where =NULL;
      lEnumeration *what = NULL;

      sge_mirror_initialize(evc, EV_ID_ANY, "sge_mirror -trigger", true, 
                            NULL, NULL, NULL, NULL, NULL);
      evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_ACK);

      /* put out information about -trigger option */
      for (i=0;i<enabled_options.trigger_option_count;i++) {
         INFO((SGE_EVENT, "trigger script for %s events: %s\n",
                         qevent_get_event_name((enabled_options.trigger_option_events)[i]), 
                         (enabled_options.trigger_option_scripts)[i]));
         switch((enabled_options.trigger_option_events)[i]) {
            case QEVENT_JB_END:
                  
                  /* build mask for the job structure to contain only the needed elements */
                  where = NULL; 
                  what = lWhat("%T(%I %I %I %I %I %I %I %I)", JB_Type, JB_job_number, JB_ja_tasks, 
                                                              JB_ja_structure, JB_ja_n_h_ids, JB_ja_u_h_ids, 
                                                              JB_ja_s_h_ids,JB_ja_o_h_ids, JB_ja_template);
                  
                  /* register for job events */ 
                  sge_mirror_subscribe(evc, SGE_TYPE_JOB, analyze_jatask_event, NULL, NULL, where, what);
                  evc->ec_set_flush(evc, sgeE_JOB_DEL,true, 1);

                  /* the mirror interface registers more events, than we need,
                     thus we free the ones, we do not need */
                /*  evc->ec_unsubscribe(evc, sgeE_JOB_LIST); */
                  evc->ec_unsubscribe(evc, sgeE_JOB_MOD);
                  evc->ec_unsubscribe(evc, sgeE_JOB_MOD_SCHED_PRIORITY);
                  evc->ec_unsubscribe(evc, sgeE_JOB_USAGE);
                  evc->ec_unsubscribe(evc, sgeE_JOB_FINAL_USAGE);
               /*   evc->ec_unsubscribe(evc, sgeE_JOB_ADD); */

                  /* free the what and where mask */
                  lFreeWhere(&where);
                  lFreeWhat(&what);
               break;
            case QEVENT_JB_TASK_END:
            
                  /* build mask for the job structure to contain only the needed elements */
                  where = NULL; 
                  what = lWhat("%T(%I)", JAT_Type, JAT_status);
                  /* register for JAT events */ 
                  sge_mirror_subscribe(evc, SGE_TYPE_JATASK, analyze_jatask_event, NULL, NULL, where, what);
                  evc->ec_set_flush(evc, sgeE_JATASK_DEL,true, 1);
                  
                  /* the mirror interface registers more events, than we need,
                     thus we free the ones, we do not need */ 
                  evc->ec_unsubscribe(evc, sgeE_JATASK_ADD);
                  evc->ec_unsubscribe(evc, sgeE_JATASK_MOD);
                  /* free the what and where mask */
                  lFreeWhere(&where);
                  lFreeWhat(&what);
               break;
         }        
      }

      while(!shut_me_down) {
         sge_mirror_error error = sge_mirror_process_events(evc);
         if (error == SGE_EM_TIMEOUT && !shut_me_down ) {
            sleep(10);
            continue;
         }
      }

      sge_mirror_shutdown(evc);

      sge_dstring_free(enabled_options.error_message);
      sge_prof_cleanup();
      SGE_EXIT((void**)&ctx, 0);
      return 0;
   }


   ERROR((SGE_EVENT, "no option selected\n" ));
   qevent_show_usage();
   sge_dstring_free(enabled_options.error_message);
   sge_prof_cleanup();
   SGE_EXIT((void**)&ctx, 1);
   return 1;
}
Example #22
0
/*-------------------------------------------------------------------------*/
int main(int argc, char **argv)
{
   int ret;
   int my_pid;
   int ret_val;
   int printed_points = 0;
   int max_enroll_tries;
   static char tmp_err_file_name[SGE_PATH_MAX];
   time_t next_prof_output = 0;
   int execd_exit_state = 0;
   lList **master_job_list = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;
   lList *alp = NULL;

   DENTER_MAIN(TOP_LAYER, "execd");

#if defined(LINUX)
   gen_procList ();
#endif

   prof_mt_init();

   set_thread_name(pthread_self(),"Execd Thread");

   prof_set_level_name(SGE_PROF_CUSTOM1, "Execd Thread", NULL); 
   prof_set_level_name(SGE_PROF_CUSTOM2, "Execd Dispatch", NULL); 

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   /* This needs a better solution */
   umask(022);
      
   /* Initialize path for temporary logging until we chdir to spool */
   my_pid = getpid();
   sprintf(tmp_err_file_name,"%s."sge_U32CFormat"", TMP_ERR_FILE_EXECD, sge_u32c(my_pid));
   log_state_set_log_file(tmp_err_file_name);

   /* exit func for SGE_EXIT() */
   sge_sig_handler_in_main_loop = 0;
   sge_setup_sig_handlers(EXECD);

   if (sge_setup2(&ctx, EXECD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }
   ctx->set_exit_func(ctx, execd_exit_func);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   /* prepare daemonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_prepare(ctx);
   }

   if ((ret=sge_occupy_first_three())>=0) {
      CRITICAL((SGE_EVENT, MSG_FILE_REDIRECTFD_I, ret));
      SGE_EXIT((void**)&ctx, 1);
   }

   lInit(nmv);

   /* unset XAUTHORITY if set */
   if (getenv("XAUTHORITY") != NULL) {
      sge_unsetenv("XAUTHORITY");
   }

   parse_cmdline_execd(argv);   
   
   /* exit if we can't get communication handle (bind port) */
   max_enroll_tries = 30;
   while (cl_com_get_handle(prognames[EXECD],1) == NULL) {
      ctx->prepare_enroll(ctx);
      max_enroll_tries--;

      if (max_enroll_tries <= 0 || shut_me_down) {
         /* exit after 30 seconds */
         if (printed_points != 0) {
            printf("\n");
         }
         CRITICAL((SGE_EVENT, MSG_COM_ERROR));
         SGE_EXIT((void**)&ctx, 1);
      }
      if (cl_com_get_handle(prognames[EXECD],1) == NULL) {
        /* sleep when prepare_enroll() failed */
        sleep(1);
        if (max_enroll_tries < 27) {
           printf(".");
           printed_points++;
           fflush(stdout);
        }
      }
   }

   if (printed_points != 0) {
      printf("\n");
   }

   /*
    * now the commlib up and running. Set execd application status function 
    * ( commlib callback function for qping status information response 
    *   messages (SIRM) )
    */
   ret_val = cl_com_set_status_func(sge_execd_application_status);
   if (ret_val != CL_RETVAL_OK) {
      ERROR((SGE_EVENT, cl_get_error_text(ret_val)) );
   }

   /* test connection */
   {
      cl_com_SIRM_t* status = NULL;
      ret_val = cl_commlib_get_endpoint_status(ctx->get_com_handle(ctx),
                                               (char *)ctx->get_master(ctx, true),
                                               (char*)prognames[QMASTER], 1, &status);
      if (ret_val != CL_RETVAL_OK) {
         ERROR((SGE_EVENT, cl_get_error_text(ret_val)));
         ERROR((SGE_EVENT, MSG_CONF_NOCONFBG));
      }
      cl_com_free_sirm_message(&status);
   }
   
   /* finalize daeamonize */
   if (!getenv("SGE_ND")) {
      sge_daemonize_finalize(ctx);
   }

   /* daemonizes if qmaster is unreachable */   
   sge_setup_sge_execd(ctx, tmp_err_file_name);

   /* are we using qidle or not */
   sge_ls_qidle(mconf_get_use_qidle());
   sge_ls_gnu_ls(1);
   
   DPRINTF(("use_qidle: %d\n", mconf_get_use_qidle()));

   /* test load sensor (internal or external) */
   {
      lList *report_list = sge_build_load_report(ctx->get_qualified_hostname(ctx), ctx->get_binary_path(ctx));
      lFreeList(&report_list);
   }
   
   /* here we have to wait for qmaster registration */
   while (sge_execd_register_at_qmaster(ctx, false) != 0) {
      if (sge_get_com_error_flag(EXECD, SGE_COM_ACCESS_DENIED, true)) {
         /* This is no error */
         DPRINTF(("*****  got SGE_COM_ACCESS_DENIED from qmaster  *****\n"));
      }
      if (sge_get_com_error_flag(EXECD, SGE_COM_ENDPOINT_NOT_UNIQUE, false)) {
         execd_exit_state = SGE_COM_ENDPOINT_NOT_UNIQUE;
         break;
      }
      if (shut_me_down != 0) {
         break;
      }
      sleep(30);
   }

   /* 
    * Terminate on SIGTERM or hard communication error
    */
   if (execd_exit_state != 0 || shut_me_down != 0) {
      sge_shutdown((void**)&ctx, execd_exit_state);
      DRETURN(execd_exit_state);
   }

   /*
    * We write pid file when we are connected to qmaster. Otherwise an old
    * execd might overwrite our pidfile.
    */
   sge_write_pid(EXECD_PID_FILE);

   /*
    * At this point we are sure we are the only sge_execd and we are connected
    * to the current qmaster. First we have to report any reaped children
    * that might exist.
    */
   starting_up();

   /*
    * Log a warning message if execd hasn't been started by a superuser
    */
   if (!sge_is_start_user_superuser()) {
      WARNING((SGE_EVENT, MSG_SWITCH_USER_NOT_ROOT));
   }   

#ifdef COMPILE_DC
   if (ptf_init()) {
      CRITICAL((SGE_EVENT, MSG_EXECD_NOSTARTPTF));
      SGE_EXIT((void**)&ctx, 1);
   }
   INFO((SGE_EVENT, MSG_EXECD_STARTPDCANDPTF));
#endif

   master_job_list = object_type_get_master_list(SGE_TYPE_JOB);
   *master_job_list = lCreateList("Master_Job_List", JB_Type);
   job_list_read_from_disk(master_job_list, "Master_Job_List",
                           0, SPOOL_WITHIN_EXECD, 
                          job_initialize_job);
   

   /* clean up jobs hanging around (look in active_dir) */
   clean_up_old_jobs(ctx, 1);
   execd_trash_load_report();
   sge_set_flush_lr_flag(true);

   sge_sig_handler_in_main_loop = 1;

   if (thread_prof_active_by_id(pthread_self())) {
      prof_start(SGE_PROF_CUSTOM1, NULL);
      prof_start(SGE_PROF_CUSTOM2, NULL);
      prof_start(SGE_PROF_GDI_REQUEST, NULL);
   } else {
      prof_stop(SGE_PROF_CUSTOM1, NULL);
      prof_stop(SGE_PROF_CUSTOM2, NULL);
      prof_stop(SGE_PROF_GDI_REQUEST, NULL);
   }

   PROF_START_MEASUREMENT(SGE_PROF_CUSTOM1);

   /* Start dispatching */
   execd_exit_state = sge_execd_process_messages(ctx);


   /*
    * This code is only reached when dispatcher terminates and execd goes down.
    */

   /* log if we received SIGPIPE signal */
   if (sge_sig_handler_sigpipe_received) {
       sge_sig_handler_sigpipe_received = 0;
       INFO((SGE_EVENT, "SIGPIPE received\n"));
   }

#if defined(LINUX)
   free_procList();
#endif
   lFreeList(master_job_list);

   PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM1);
   if (prof_is_active(SGE_PROF_ALL)) {
     time_t now = (time_t)sge_get_gmt();

      if (now > next_prof_output) {
         prof_output_info(SGE_PROF_ALL, false, "profiling summary:\n");
         prof_reset(SGE_PROF_ALL,NULL);
         next_prof_output = now + 60;
      }
   }
   sge_prof_cleanup();

   sge_shutdown((void**)&ctx, execd_exit_state);
   DRETURN(execd_exit_state);
}