/****** SCHEDD/order_remove_order_and_immediate()*******************************
*  NAME
*     order_remove_order_and_immediate() -- add a remove order for the job task
*
*  SYNOPSIS
*     int order_remove_order_and_immediate(lListElem *job, lListElem *ja_task,
                                 order_t *orders)
*
*  FUNCTION
*     Generates an order of type ORT_remove_immediate_job for the given job
*     task.  Also removes the ORT_start_job order for this task from the order
*     list.
*
*  INPUTS
*     lListElem *job       - The job to remove  (JB_Type)
*     lListElem *ja_task   - The task to remove (JAT_Type)
*     order_t *orders      - The order structurestructure  for this scheduler pass be removed
*
*  RESULT
*     int - Error code: 0 = OK, 1 = Errors
*
*  NOTES
*     MT-NOTE: order_remove_order_and_immediate() is MT safe
*
*******************************************************************************/
static void 
order_remove_order_and_immediate( lListElem *job, lListElem *ja_task, order_t *orders) 
{
   /* The possibility exists that this task is part of an array task, that it
    * already has earned an order to be scheduled, and that one or more other
    * tasks in this same job were not scheduled, resulting in this delete
    * order.  In this case, we have to remove the schedule order before we add
    * the delete order.  Otherwise, the qmaster will think we're trying to do
    * an ORT_remove_immediate_job on a non-idle task. */   

   /* Warning: we have a problem, if we send start orders during the dispatch run. We
    * might have started a array task of an immediate array job without verifying, that
    * the whole job can run
    */  
   lList *orderList = orders->jobStartOrderList;
   lCondition *where = lWhere("%T(%I==%u && %I==%u && %I==%u)", OR_Type,
                      OR_type, ORT_start_job,
                      OR_job_number, lGetUlong(job, JB_job_number),
                      OR_ja_task_number, lGetUlong(ja_task, JAT_task_number));
   lListElem *ep = lFindFirst (orderList, where);
   
   DENTER(TOP_LAYER, "order_remove_order_and_immediate");
   
   if (ep != NULL) {
      DPRINTF (("Removing job start order for job task "sge_u32"."sge_u32"\n",
                lGetUlong(job, JB_job_number),
                lGetUlong(ja_task, JAT_task_number)));
      lRemoveElem(orderList, &ep);
   }
   
   order_remove_immediate(job, ja_task, orders);
   lFreeWhere(&where);
   
   DEXIT;
}
Exemplo n.º 2
0
/****** sgeobj/var/var_list_delete_string() ***********************************
*  NAME
*     var_list_delete_string -- delete a variable
*
*  SYNOPSIS
*     void var_list_delete_string(lList **varl, 
*                                 const char *name);
*
*  FUNCTION
*     Deletes the variable <name> from the list <varl> if it exists.
*
*  INPUTS
*     lList **varl      - VA_Type list
*     const char *name  - the name of the variable
*
*  SEE ALSO
*     sgeobj/var/var_list_set_string() 
******************************************************************************/
void var_list_delete_string(lList **varl, const char *name)
{
   lListElem *elem;

   DENTER(TOP_LAYER, "var_list_delete_string");
   if (varl == NULL || name == NULL) {
      DRETURN_VOID;
   }
   elem = lGetElemStr(*varl, VA_variable, name);
   if (elem != NULL) {
      lRemoveElem(*varl, &elem);
   }
   DEXIT;
}
Exemplo n.º 3
0
/****** cull/db/lSplit() ******************************************************
*  NAME
*     lSplit() -- Splits a list into two list 
*
*  SYNOPSIS
*     int lSplit(lList **slp, lList **ulp, const char *ulp_name, 
*                const lCondition *cp) 
*
*  FUNCTION
*     Unchains the list elements from the list 'slp' NOT fullfilling
*     the condition 'cp' and returns a list containing the 
*     unchained elements in 'ulp' 
*
*  INPUTS
*     lList **slp          - source list pointer 
*     lList **ulp          - unchained list pointer 
*     const char *ulp_name - 'ulp' list name 
*     const lCondition *cp - selects rows within 'slp' 
*
*  RESULT
*     int - error status
*         0 - OK
*        -1 - Error 
******************************************************************************/
int lSplit(lList **slp, lList **ulp, const char *ulp_name, 
           const lCondition *cp) 
{

   lListElem *ep, *next;
   int has_been_allocated = 0;

   DENTER(TOP_LAYER, "lSplit");

   /*
      iterate through the source list call lCompare and chain all elems
      that don't fullfill the condition into a new list.
    */
   if (!slp) {
      DEXIT;
      return -1;
   }

   for (ep = lFirst(*slp); ep; ep = next) {
      next = ep->next;          /* this is important, cause the elem is dechained */

      if (!lCompare(ep, cp)) {
         if (ulp && !*ulp) {
            *ulp = lCreateList(ulp_name ? ulp_name : "ulp", (*slp)->descr);
            if (!*ulp) {
               DEXIT;
               return -1;
            }
            has_been_allocated = 1;
         }
         if (ulp) {
            ep = lDechainElem(*slp, ep);
            lAppendElem(*ulp, ep);
         } else {
            lRemoveElem(*slp, &ep);
         }
      }
   }

   /* if no elements remain, free the list and return NULL */
   if (*slp && lGetNumberOfElem(*slp) == 0) {
      lFreeList(slp);
   }
   if (has_been_allocated && *ulp && lGetNumberOfElem(*ulp) == 0) {
      lFreeList(ulp);
   }

   DEXIT;
   return 0;
}
Exemplo n.º 4
0
/**
* @brief remove potentially dangerous environment variables
*
* The function removes potentially dangerous environment variables from
* an environment list (e.g. from job or pe task submission).
* The following variables are removed:
*  - LD_PRELOAD
*  - all flavours of LD_LIBRARY_PATH, SHLIB_PATH etc.
*    unless qmaster param ENABLE_SUBMIT_LIB_PATH is set to TRUE
* We generate an INFO message to be output by the submit client.
*
* @param env_list       the environment list
* @param answer_list    answer_list to report removal of variables
*/
void var_list_filter_env_list(lList *env_list, lList **answer_list)
{
   lListElem *ep;

   /* LD_PRELOAD */
   ep = lGetElemStr(env_list, VA_variable, "LD_PRELOAD");
   if (ep != NULL) {
      lRemoveElem(env_list, &ep);
      answer_list_add_sprintf(answer_list, STATUS_ESYNTAX, ANSWER_QUALITY_INFO,
                              MSG_REMOVED_ENV_VAR_S, "LD_PRELOAD");
   }

   /* other potentially dangerous variables */
   if (!mconf_get_enable_submit_lib_path()) {
      static const char *lib_path_names[] = {
         "LD_LIBRARY_PATH",
         "LD_LIBRARY_PATH_32",
         "LD_LIBRARY_PATH_64",
         "LIBPATH",
         "SHLIB_PATH",
         "DYLD_LIBRARY_PATH",
         "LD_ORIGIN_PATH",
         "LD_CONFIG",
         NULL
      };
      const char *var_name;
      int i;
      for (i = 0; (var_name = lib_path_names[i]) != NULL; i++) {
         ep = lGetElemStr(env_list, VA_variable, var_name);
         if (ep != NULL) {
            lRemoveElem(env_list, &ep);
            answer_list_add_sprintf(answer_list, STATUS_ESYNTAX, ANSWER_QUALITY_INFO,
                                    MSG_REMOVED_ENV_VAR_S, var_name);
         }
      }
   }
}
Exemplo n.º 5
0
/****** sgeobj/var/var_list_remove_prefix_vars() ******************************
*  NAME
*     var_list_remove_prefix_vars() -- remove vars with certain prefix 
*
*  SYNOPSIS
*     void var_list_remove_prefix_vars(lList **varl, 
*                                      const char *prefix) 
*
*  FUNCTION
*     Remove all entries from "varl" where the name
*     beginns with "prefix" 
*
*  INPUTS
*     lList **varl       - VA_Type list 
*     const char *prefix - prefix string (e.g. VAR_PREFIX) 
*
*  SEE ALSO
*     sgeobj/var/var_list_remove_prefix_vars()
******************************************************************************/
void var_list_remove_prefix_vars(lList **varl, const char *prefix)
{
   int prefix_len = strlen(prefix);
   lListElem *var_elem = NULL;
   lListElem *next_var_elem = NULL;

   DENTER(TOP_LAYER, "var_list_remove_prefix_vars");
   next_var_elem = lFirst(*varl);
   while((var_elem = next_var_elem)) {
      const char *prefix_name = lGetString(var_elem, VA_variable);
      next_var_elem = lNext(var_elem);

      if (!strncmp(prefix_name, prefix, prefix_len)) {
         lRemoveElem(*varl, &var_elem);
      } 
   }
   DEXIT;
   return;
}
/****** SCHEDD/remove_immediate_job()*******************************************
*  NAME
*     remove_immediate_job() -- test for and remove immediate job which can't
*                                be scheduled
*
*  SYNOPSIS
*     int remove_immediate_job(lList *job_list, lListElem *job, order_t *orders,
                               int remove_orders)
*
*  FUNCTION
*     Removes immediate jobs which cannot be scheduled from the given job list.
*     This is done by generating an order of type ORT_remove_immediate_job.  If
*     remove_orders is set, the ORT_start_job orders are first removed from the
*     order list before adding the remove order.
*
*  INPUTS
*     lList     *job_list     - The list of jobs from which the job should be
*                               removed (JB_Type)
*     lListElem *job          - The job to remove (JB_Type)
*     order_t *orders         - The order structure for this scheduler pass
*     int       remove_orders - Whether the ORT_start_job orders should also be
*                               be removed
*
*  NOTES
*     MT-NOTE: remove_immediate_job() is MT safe
*
*******************************************************************************/
void 
remove_immediate_job(lList *job_list, lListElem *job, order_t *orders, int remove_orders) 
{
   lListElem *ja_task; 
   lListElem *range = NULL;
   lList *range_list = NULL;
   u_long32 ja_task_id;

   DENTER (TOP_LAYER, "remove_immediate_job");

   for_each (ja_task, lGetList(job, JB_ja_tasks)) {
      if (remove_orders) {
         order_remove_order_and_immediate(job, ja_task, orders);
      }
      else {
         order_remove_immediate(job, ja_task, orders);
      }
   }
   
   range_list = lGetList(job, JB_ja_n_h_ids);
   
   for_each(range, range_list) {
      for(ja_task_id = lGetUlong(range, RN_min);
          ja_task_id <= lGetUlong(range, RN_max);
          ja_task_id += lGetUlong(range, RN_step)) {  
         ja_task = job_get_ja_task_template_pending(job, ja_task_id);
         
         /* No need to remove the orders here because tasks in JB_ja_n_h_ids
          * haven't been scheduled, and hence don't have start orders to
          * remove. */
         order_remove_immediate(job, ja_task, orders);
      }
   }
   lRemoveElem(job_list, &job);
   
   DEXIT;
}
Exemplo n.º 7
0
/* ------------------------------------------------------------

   sge_add_manop() - adds an manop list to the global manager/operator
                     list

   if the invoking process is the qmaster 
   the added manop list is spooled in the MANAGER_FILE/OPERATOR_FILE

*/
int sge_add_manop(
sge_gdi_ctx_class_t *ctx,
lListElem *ep,
lList **alpp,
char *ruser,
char *rhost,
u_long32 target  /* may be SGE_UM_LIST or SGE_UO_LIST */
) {
   const char *manop_name;
   const char *object_name;
   lList **lpp = NULL;
   lListElem *added;
   int pos;
   int key;
   lDescr *descr = NULL;
   ev_event eve = sgeE_EVENTSIZE; 

   DENTER(TOP_LAYER, "sge_add_manop");

   if ( !ep || !ruser || !rhost ) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   switch (target) {
   case SGE_UM_LIST:
      lpp = object_type_get_master_list(SGE_TYPE_MANAGER);
      object_name = MSG_OBJ_MANAGER;
      key = UM_name;
      descr = UM_Type;
      eve = sgeE_MANAGER_ADD;
      break;
   case SGE_UO_LIST:
      lpp = object_type_get_master_list(SGE_TYPE_OPERATOR);
      object_name = MSG_OBJ_OPERATOR;
      key = UO_name;
      descr = UO_Type;
      eve = sgeE_OPERATOR_ADD;
      break;
   default :
      DPRINTF(("unknown target passed to %s\n", SGE_FUNC));
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   /* ep is no acl element, if ep has no UM_name/UO_name */
   if ((pos = lGetPosViaElem(ep, key, SGE_NO_ABORT)) < 0) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS,
            lNm2Str(key), SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   manop_name = lGetPosString(ep, pos);
   if (!manop_name) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   if (lGetElemStr(*lpp, key, manop_name)) {
      ERROR((SGE_EVENT, MSG_SGETEXT_ALREADYEXISTS_SS, object_name, manop_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EEXIST;
   }

   /* update in interal lists */
   added = lAddElemStr(lpp, key, manop_name, descr);

   /* update on file */
   if(!sge_event_spool(ctx, alpp, 0, eve, 
                       0, 0, manop_name, NULL, NULL,
                       added, NULL, NULL, true, true)) {
      ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, object_name, manop_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR);
   
      /* remove element from list */
      lRemoveElem(*lpp, &added);

      DEXIT;
      return STATUS_EDISK;
   }

   INFO((SGE_EVENT, MSG_SGETEXT_ADDEDTOLIST_SSSS,
            ruser, rhost, manop_name, object_name));
   answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
   DEXIT;
   return STATUS_OK;
}
Exemplo n.º 8
0
bool sge_parse_qrsub(sge_gdi_ctx_class_t *ctx, lList *pcmdline, lList **alpp, lListElem **ar)
{
   lListElem *ep = NULL, *next_ep = NULL;
   lList *lp = NULL;
   DENTER(TOP_LAYER, "sge_parse_qrsub");

   /*  -help 	 print this help */
   if ((ep = lGetElemStr(pcmdline, SPA_switch, "-help"))) {
      lRemoveElem(pcmdline, &ep);
      sge_usage(QRSUB, stdout);
      DEXIT;
      SGE_EXIT((void **)&ctx, 0);
   }

   /*  -a date_time 	 start time in [[CC]YY]MMDDhhmm[.SS] SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-a"))) {
      lSetUlong(*ar, AR_start_time, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*  -e date_time 	 end time in [[CC]YY]MMDDhhmm[.SS] SGE_ULONG*/
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-e"))) {
      lSetUlong(*ar, AR_end_time, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*  -d time 	 duration in TIME format SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-d"))) {
      lSetUlong(*ar, AR_duration, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }
   
   /*  -w e/v 	 validate availability of AR request, default e SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-w"))) {
      lSetUlong(*ar, AR_verify, lGetInt(ep, SPA_argval_lIntT));
      lRemoveElem(pcmdline, &ep);
   }
  
   /*  -N name 	 AR name SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-N"))) {
      lSetString(*ar, AR_name, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
      
   /*  -A account_string 	 AR name in accounting record SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-A"))) {
      lSetString(*ar, AR_account, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
     
   /*  -l resource_list 	 request the given resources  SGE_LIST */
   parse_list_simple(pcmdline, "-l", *ar, AR_resource_list, 0, 0, FLG_LIST_APPEND);
   centry_list_remove_duplicates(lGetList(*ar, AR_resource_list));

   /*  -u wc_user 	       access list SGE_LIST */
   /*  -u ! wc_user TBD: Think about eval_expression support in compare allowed and excluded lists */
   parse_list_simple(pcmdline, "-u", *ar, AR_acl_list, ARA_name, 0, FLG_LIST_MERGE);
   /*  -u ! list separation */
   lp = lGetList(*ar,  AR_acl_list);
   next_ep = lFirst(lp);
   while ((ep = next_ep)) {
      bool is_xacl = false;
      const char *name = lGetString(ep, ARA_name);

      next_ep = lNext(ep);
      if (name[0] == '!') { /* move this element to xacl_list */
         is_xacl = true;
         name++;
      }

      if (!is_hgroup_name(name)) {
         struct passwd *pw;
         struct passwd pw_struct;
         char *buffer;
         int size;
         stringT group;

         size = get_pw_buffer_size();
         buffer = sge_malloc(size);
         pw = sge_getpwnam_r(name, &pw_struct, buffer, size);
         
         if (pw == NULL) {
           answer_list_add_sprintf(alpp, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_USER_XISNOKNOWNUSER_S, name);
           FREE(buffer);
           DRETURN(false);
         }
         sge_gid2group(pw->pw_gid, group, MAX_STRING_SIZE, MAX_NIS_RETRIES);
         lSetString(ep, ARA_group, group);
         FREE(buffer);
      }

      if (is_xacl) {
         lListElem *new_ep = lAddSubStr(*ar, ARA_name, name, AR_xacl_list, ARA_Type);
         lSetString(new_ep, ARA_group, lGetString(ep, ARA_group));
         lRemoveElem(lp, &ep);
      }

   }

   /*  -q wc_queue_list 	 reserve in queue(s) SGE_LIST */
   parse_list_simple(pcmdline, "-q", *ar, AR_queue_list, 0, 0, FLG_LIST_APPEND);

  /*    -pe pe_name slot_range reserve slot range for parallel jobs */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-pe"))) {
      lSetString(*ar, AR_pe, lGetString(ep, SPA_argval_lStringT)); /* SGE_STRING, */
      lSwapList(*ar, AR_pe_range, ep, SPA_argval_lListT);       /* SGE_LIST */
      lRemoveElem(pcmdline, &ep);
   }
   /*   AR_master_queue_list  -masterq wc_queue_list, SGE_LIST bind master task to queue(s) */
   parse_list_simple(pcmdline, "-masterq", *ar, AR_master_queue_list, 0, 0, FLG_LIST_APPEND);

   /*  -ckpt ckpt-name 	 reserve in queue with ckpt method SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-ckpt"))) {
      lSetString(*ar, AR_checkpoint_name, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
   
   /*  -m b/e/a/n 	 define mail notification events SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-m"))) {
      u_long32 ul;
      u_long32 old_mail_opts;

      ul = lGetInt(ep, SPA_argval_lIntT);
      if  ((ul & NO_MAIL)) {
         lSetUlong(*ar, AR_mail_options, 0);
      } else {
         old_mail_opts = lGetUlong(*ar, AR_mail_options);
         lSetUlong(*ar, AR_mail_options, ul | old_mail_opts);
      }
      lRemoveElem(pcmdline, &ep);
   }

   /*   -M user[@host],... 	 notify these e-mail addresses SGE_LIST*/
   parse_list_simple(pcmdline, "-M", *ar, AR_mail_list, MR_host, MR_user, FLG_LIST_MERGE);

   /*  -he yes/no 	 hard error handling SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-he"))) {
      lSetUlong(*ar, AR_error_handling, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*   -now 	 reserve in queues with qtype interactive  SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-now"))) {
      u_long32 ar_now = lGetUlong(*ar, AR_type);
      if(lGetInt(ep, SPA_argval_lIntT)) {
         JOB_TYPE_SET_IMMEDIATE(ar_now);
      } else {
         JOB_TYPE_CLEAR_IMMEDIATE(ar_now);
      }

      lSetUlong(*ar, AR_type, ar_now);

      lRemoveElem(pcmdline, &ep);
   }

  /* Remove the script elements. They are not stored in the ar structure */
  if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPT))) {
      lRemoveElem(pcmdline, &ep);
   }

   if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPTLEN))) {
      lRemoveElem(pcmdline, &ep);
   }

   if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPTPTR))) {
      lRemoveElem(pcmdline, &ep);
   }

   ep = lFirst(pcmdline);   
   if(ep) {
      const char *option = lGetString(ep,SPA_switch);
      /* as jobarg are stored no switch values, need to be filtered */ 
      if(sge_strnullcmp(option, "jobarg") != 0) {
         answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
                              MSG_PARSE_INVALIDOPTIONARGUMENTX_S,
                              lGetString(ep,SPA_switch)); 
      } else {
         answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
                              MSG_PARSE_INVALIDOPTIONARGUMENT);
       }
      DRETURN(false);
   }

   if (lGetUlong(*ar, AR_start_time) == 0 && lGetUlong(*ar, AR_end_time) != 0 && lGetUlong(*ar, AR_duration) != 0) {
      lSetUlong(*ar, AR_start_time, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_duration));
   } else if (lGetUlong(*ar, AR_start_time) != 0 && lGetUlong(*ar, AR_end_time) == 0 && lGetUlong(*ar, AR_duration) != 0) {
      lSetUlong(*ar, AR_end_time, duration_add_offset(lGetUlong(*ar, AR_start_time), lGetUlong(*ar, AR_duration)));
      lSetUlong(*ar, AR_duration, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_start_time));
   } else if (lGetUlong(*ar, AR_start_time) != 0 && lGetUlong(*ar, AR_end_time) != 0 && lGetUlong(*ar, AR_duration) == 0) {
      lSetUlong(*ar, AR_duration, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_start_time));
   }

   DRETURN(true);
}
Exemplo n.º 9
0
/****** qmaster/ckpt/sge_del_ckpt() *******************************************
*
*  NAME
*     sge_del_ckpt -- delete ckpt object in Master_Ckpt_List 
*
*  SYNOPSIS
*     int sge_del_ckpt(lListElem *ep, lList **alpp, char *ruser, char *rhost);
* 
*  FUNCTION
*     This function will be called from the framework which will
*     add/modify/delete generic gdi objects.
*     The purpose of this function is it to delete ckpt objects. 
*
*
*  INPUTS
*     ep          - element which should be deleted
*     alpp        - reference to an answer list.
*     ruser       - username of person who invoked this gdi request
*     rhost       - hostname of the host where someone initiated an gdi call
*
*  RESULT
*     [alpp] - error messages will be added to this list
*     0 - success
*     STATUS_EUNKNOWN - an error occured
******************************************************************************/ 
int sge_del_ckpt(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, char *ruser, char *rhost) 
{
   lListElem *found;
   int pos;
   const char *ckpt_name;
   lList **lpp = object_type_get_master_list(SGE_TYPE_CKPT);

   DENTER(TOP_LAYER, "sge_del_ckpt");

   if ( !ep || !ruser || !rhost ) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   /* ep is no ckpt element, if ep has no CK_name */
   if ((pos = lGetPosViaElem(ep, CK_name, SGE_NO_ABORT)) < 0) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS,
            lNm2Str(CK_name), SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   ckpt_name = lGetPosString(ep, pos);
   if (!ckpt_name) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }                    
   found = ckpt_list_locate(*lpp, ckpt_name);

   if (!found) {
      ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, MSG_OBJ_CKPT, ckpt_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EEXIST;
   }

   /* 
    * Try to find references in other objects
    */
   {
      lList *local_answer_list = NULL;

      if (ckpt_is_referenced(found, &local_answer_list, *(object_type_get_master_list(SGE_TYPE_JOB)),
                             *(object_type_get_master_list(SGE_TYPE_CQUEUE)))) {
         lListElem *answer = lFirst(local_answer_list);

         ERROR((SGE_EVENT, "denied: %s", lGetString(answer, AN_text)));
         answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN,
                         ANSWER_QUALITY_ERROR);
         lFreeList(&local_answer_list);
         DEXIT;
         return STATUS_EUNKNOWN;
      }
   }

   /* remove ckpt file 1st */
   if (!sge_event_spool(ctx, alpp, 0, sgeE_CKPT_DEL, 0, 0, ckpt_name, NULL, NULL,
                        NULL, NULL, NULL, true, true)) {
      ERROR((SGE_EVENT, MSG_CANTSPOOL_SS, MSG_OBJ_CKPT, ckpt_name));
      answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EDISK;
   }

   /* now we can remove the element */
   lRemoveElem(*lpp, &found);

   INFO((SGE_EVENT, MSG_SGETEXT_REMOVEDFROMLIST_SSSS,
            ruser, rhost, ckpt_name, MSG_OBJ_CKPT));
   answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
   DEXIT;
   return STATUS_OK;
}                     
Exemplo n.º 10
0
int 
main(int argc, char **argv) 
{
   lList *opts_cmdline = NULL;
   lList *opts_defaults = NULL;
   lList *opts_scriptfile = NULL;
   lList *opts_all = NULL;
   lListElem *job = NULL;
   lList *alp = NULL;
   lListElem *ep;
   int exit_status = 0;
   int just_verify;
   int tmp_ret;
   int wait_for_job = 0, is_immediate = 0;
   dstring session_key_out = DSTRING_INIT;
   dstring diag = DSTRING_INIT;
   dstring jobid = DSTRING_INIT;
   u_long32 start, end, step;
   u_long32 num_tasks;
   int count, stat;
   char *jobid_string = NULL;
   bool has_terse;
   drmaa_attr_values_t *jobids = NULL;

   u_long32 prog_number = 0;
   u_long32 myuid = 0;
   const char *sge_root = NULL;
   const char *cell_root = NULL;
   const char *username = NULL;
   const char *qualified_hostname = NULL;
   const char *unqualified_hostname = NULL;
   const char *mastername = NULL;

   DENTER_MAIN(TOP_LAYER, "qsub");

   prof_mt_init();

   /* Set up the program information name */
   sge_setup_sig_handlers(QSUB);

   DPRINTF(("Initializing JAPI\n"));

   if (japi_init(NULL, NULL, NULL, QSUB, false, NULL, &diag)
                                                      != DRMAA_ERRNO_SUCCESS) {
      fprintf(stderr, "\n");
      fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
              sge_dstring_get_string(&diag));
      fprintf(stderr, "\n");
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   prog_number = ctx->get_who(ctx);
   myuid = ctx->get_uid(ctx);
   sge_root = ctx->get_sge_root(ctx);
   cell_root = ctx->get_cell_root(ctx);
   username = ctx->get_username(ctx);
   qualified_hostname = ctx->get_qualified_hostname(ctx);
   unqualified_hostname = ctx->get_unqualified_hostname(ctx);
   mastername = ctx->get_master(ctx, false);

   /*
    * read switches from the various defaults files
    */
   opt_list_append_opts_from_default_files(prog_number, cell_root, username, &opts_defaults, &alp, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * append the commandline switches to the list
    */
   opt_list_append_opts_from_qsub_cmdline(prog_number, &opts_cmdline, &alp,
                                          argv + 1, environ);
   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_QSUB_WARNING_S);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   /*
    * show usage if -help was in commandline
    */
   if (opt_list_has_X(opts_cmdline, "-help")) {
      sge_usage(QSUB, stdout);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   /*
    * We will only read commandline options from scripfile if the script
    * itself should not be handled as binary
    */
   if (opt_list_is_X_true(opts_cmdline, "-b") ||
       (!opt_list_has_X(opts_cmdline, "-b") &&
        opt_list_is_X_true(opts_defaults, "-b"))) {
      DPRINTF(("Skipping options from script due to -b option\n"));
   } else {
      opt_list_append_opts_from_script(prog_number,
                                       &opts_scriptfile, &alp, 
                                       opts_cmdline, environ);
      tmp_ret = answer_list_print_err_warn(&alp, NULL, MSG_QSUB_COULDNOTREADSCRIPT_S,
                                           MSG_WARNING);
      if (tmp_ret > 0) {
         DEXIT;
         SGE_EXIT((void**)&ctx, tmp_ret);
      }
   }

   /*
    * Merge all commandline options and interprete them
    */
   opt_list_merge_command_lines(&opts_all, &opts_defaults, 
                                &opts_scriptfile, &opts_cmdline);

   /*
    * Check if -terse is requested
    */
   has_terse = opt_list_has_X(opts_all, "-terse");

   /* If "-sync y" is set, wait for the job to end. */   
   /* Remove all -sync switches since cull_parse_job_parameter()
    * doesn't know what to do with them. */
   while ((ep = lGetElemStr(opts_all, SPA_switch, "-sync"))) {
      if (lGetInt(ep, SPA_argval_lIntT) == TRUE) {
         wait_for_job = 1;
      }
      
      lRemoveElem(opts_all, &ep);
   }

   if (wait_for_job) {
      DPRINTF(("Wait for job end\n"));
   }

   alp = cull_parse_job_parameter(myuid, username, cell_root, unqualified_hostname, 
                                  qualified_hostname, opts_all, &job);

   tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_WARNING);
   if (tmp_ret > 0) {
      DEXIT;
      SGE_EXIT((void**)&ctx, tmp_ret);
   }

   if (set_sec_cred(sge_root, mastername, job, &alp) != 0) {
      answer_list_output(&alp);
      DEXIT;
      SGE_EXIT((void**)&ctx, 1);
   }

   /* Check if job is immediate */
   is_immediate = (int)JOB_TYPE_IS_IMMEDIATE(lGetUlong(job, JB_type));
   DPRINTF(("Job is%s immediate\n", is_immediate ? "" : " not"));

   DPRINTF(("Everything ok\n"));

   if (lGetUlong(job, JB_verify)) {
      cull_show_job(job, 0, false);
      DEXIT;
      SGE_EXIT((void**)&ctx, 0);
   }

   if (is_immediate || wait_for_job) {
      pthread_t sigt;
      
      qsub_setup_sig_handlers(); 

      if (pthread_create(&sigt, NULL, sig_thread, (void *)NULL) != 0) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 " error preparing signal handling thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
      
      if (japi_enable_job_wait(username, unqualified_hostname, NULL, &session_key_out, error_handler, &diag) ==
                                       DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
         const char *msg = sge_dstring_get_string(&diag);
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                 msg?msg:" error starting event client thread");
         fprintf(stderr, "\n");
         
         exit_status = 1;
         goto Error;
      }
   }
   
   job_get_submit_task_ids(job, &start, &end, &step);
   num_tasks = (end - start) / step + 1;

   if (num_tasks > 1) {
      int error = japi_run_bulk_jobs(&jobids, &job, start, end, step, &diag);
      if (error != DRMAA_ERRNO_SUCCESS) {
         /* No active session here means that japi_enable_job_wait() was
          * interrupted by the signal handler, in which case we just break out
          * quietly. */
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the JAPI error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      DPRINTF(("job id is: %ld\n", jobids->it.ji.jobid));
      
      jobid_string = get_bulk_jobid_string((long)jobids->it.ji.jobid, start, end, step);
   }
   else if (num_tasks == 1) {
      int error = japi_run_job(&jobid, &job, &diag);
      
      if (error != DRMAA_ERRNO_SUCCESS) {
         if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                    sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
         }
         
         /* BUGFIX: Issuezilla #1013
          * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
          * back into a GDI error code.  This is the easy solution.  The
          * correct solution would be to address issue #859, presumably by
          * having JAPI reuse the GDI error codes instead of the DRMAA error
          * codes. */
         if (error == DRMAA_ERRNO_TRY_LATER) {
            exit_status = STATUS_NOTOK_DOAGAIN;
         }
         else {
            exit_status = 1;
         }
         
         goto Error;
      }

      jobid_string = strdup(sge_dstring_get_string(&jobid));
      DPRINTF(("job id is: %s\n", jobid_string));

      sge_dstring_free(&jobid);
   }
   else {
      fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, "invalid task structure");
      fprintf(stderr, "\n");
      
      exit_status = 1;
      goto Error;
   }
  
   /* only success message is printed to stdout */

   just_verify = (lGetUlong(job, JB_verify_suitable_queues)==JUST_VERIFY || 
                  lGetUlong(job, JB_verify_suitable_queues)==POKE_VERIFY);
   DPRINTF(("Just verifying job\n"));

   if (!just_verify) {
      const char *output = sge_dstring_get_string(&diag); 

      /* print the tersed output */
      if (has_terse) {
         printf("%s", jobid_string);
      } else if (output != NULL) {
        printf("%s", output);
      } else {
        printf(MSG_QSUB_YOURJOBHASBEENSUBMITTED_SS, jobid_string, lGetString(job, JB_job_name));
      }
      printf("\n");
   } else {
      printf(MSG_JOB_VERIFYFOUNDQ);
      printf("\n");
   }   

   if ((wait_for_job || is_immediate) && !just_verify) {
      int event;

      if (is_immediate) {
         fprintf(stderr, "%s\n", MSG_QSUB_WAITINGFORIMMEDIATEJOBTOBESCHEDULED);

         /* We only need to wait for the first task to be scheduled to be able
          * to say that the job is running. */
         tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                             DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_START, &event,
                             NULL, &diag);

         if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_START)) {
            fprintf(stderr, "\n");
            fprintf(stderr, MSG_QSUB_YOURIMMEDIATEJOBXHASBEENSUCCESSFULLYSCHEDULED_S,
                  jobid_string);
            fprintf(stderr, "\n");
         }
         /* A job finish event here means that the job was rejected. */
         else if ((tmp_ret == DRMAA_ERRNO_SUCCESS) &&
                  (event == JAPI_JOB_FINISH)) {
            fprintf(stderr, "\n%s\n", MSG_QSUB_YOURQSUBREQUESTCOULDNOTBESCHEDULEDDTRYLATER);
            
            exit_status = 1;
            goto Error;
         }
         else {
         /* Since we told japi_wait to wait forever, we know that if it gets
          * a timeout, it's because it's been interrupted to exit, in which
          * case we don't complain.  Same for no active session. */
            if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
               fprintf(stderr, "\n");
               fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S,
                       sge_dstring_get_string(&diag));
               fprintf(stderr, "\n");
            }

            exit_status = 1;
            goto Error;
         }
      }
         
      if (wait_for_job) {
         /* Rather than using japi_synchronize on ALL for bulk jobs, we use
          * japi_wait on ANY num_tasks times because with synchronize, we would
          * have to wait for all the tasks to finish before we know if any
          * finished. */
         for (count = 0; count < num_tasks; count++) {
            /* Since there's only one running job in the session, we can just
             * wait for ANY. */
            if ((tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                          DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_FINISH, &event,
                          NULL, &diag)) != DRMAA_ERRNO_SUCCESS) {
               if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                   (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
                  fprintf(stderr, "\n");
                  fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag));
                  fprintf(stderr, "\n");
               }
               
               exit_status = 1;
               goto Error;
            }
            
            /* report how job finished */
            /* If the job is an array job, use the first non-zero exit code as
             * the exit code for qsub. */
            if (exit_status == 0) {
               exit_status = report_exit_status(stat,
                                              sge_dstring_get_string(&jobid));
            }
            /* If we've already found a non-zero exit code, just print the exit
             * info for the task. */
            else {
               report_exit_status(stat, sge_dstring_get_string(&jobid));
            }               
         }
      }
   }

Error:
   FREE(jobid_string);
   lFreeList(&alp);
   lFreeList(&opts_all);
   
   if ((tmp_ret = japi_exit(JAPI_EXIT_NO_FLAG, &diag)) != DRMAA_ERRNO_SUCCESS) {
      if (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
         fprintf(stderr, "\n");
         fprintf(stderr, MSG_QSUB_COULDNOTFINALIZEENV_S, sge_dstring_get_string(&diag));
         fprintf(stderr, "\n");
      }
      else {
         struct timespec ts;
         /* We know that if we get a DRMAA_ERRNO_NO_ACTIVE_SESSION here, it's
          * because the signal handler thread called japi_exit().  We know this
          * because if the call to japi_init() fails, we just exit directly.
          * If the call to japi_init() succeeds, then we have an active session,
          * so coming here because of an error would not result in the
          * DRMAA_ERRNO_NO_ACTIVE_SESSION error. */
         DPRINTF(("Sleeping for 15 seconds to wait for the exit to finish.\n"));
         
         sge_relative_timespec(15, &ts);
         sge_mutex_lock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
         
         while (!exited) {
            if (pthread_cond_timedwait(&exit_cv, &exit_mutex, &ts) == ETIMEDOUT) {
               DPRINTF(("Exit has not finished after 15 seconds.  Exiting.\n"));
               break;
            }
         }
         
         sge_mutex_unlock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
      }
   }

   sge_prof_cleanup();

   /* This is an exit() instead of an SGE_EXIT() because when the qmaster is
    * supended, SGE_EXIT() hangs. */
   exit(exit_status);
   DEXIT;
   return exit_status;
}
Exemplo n.º 11
0
/************************************************************
  sge_mod_sharetree - Master code

  Modify a share tree
  lListElem *ep,   This is the new share tree 
  lList **lpp,     list to change 
 ************************************************************/
int sge_mod_sharetree(sge_gdi_ctx_class_t *ctx,
                      lListElem *ep, lList **lpp, lList **alpp, 
                      char *ruser, char *rhost ) 
{
   int ret;
   int prev_version;
   int adding; 
   lList *found = NULL;
   object_description *object_base = object_type_get_object_description();

   DENTER(TOP_LAYER, "sge_mod_sharetree");

   /* do some checks */
   if (!ep || !ruser || !rhost) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DEXIT;
      return STATUS_EUNKNOWN;
   }

   ret = check_sharetree(alpp, ep, *object_base[SGE_TYPE_USER].list, 
                         *object_base[SGE_TYPE_PROJECT].list, 
                         NULL, &found);
   lFreeList(&found);
   if (ret) {
      /* alpp gets filled by check_sharetree() */
      DRETURN(STATUS_EUNKNOWN);
   }

   /* check for presence of sharetree list and create if neccesary */
   if (!*lpp) {
      prev_version = 0;
      *lpp = lCreateList("sharetree list", STN_Type);
      adding = 1;
   } else if (lGetNumberOfElem(*lpp) == 0) {
      prev_version = 0;
      adding = 1;
   } else {
      lListElem *first = lFirst(*lpp);
      /* real action: change user or project
         We simply replace the old element with the new one. If there is no
         old we simle add the new. */
     
      prev_version = lGetUlong(lFirst(*lpp), STN_version); 
      lRemoveElem(*lpp, &first);
      adding = 0;
   }

   lSetUlong(ep, STN_version, prev_version+1);

   id_sharetree(alpp, ep, 0, NULL);

   /* now insert new element */
   lAppendElem(*lpp, lCopyElem(ep));
  
   /* write sharetree to file */
   if (!sge_event_spool(ctx,
                        alpp, 0, sgeE_NEW_SHARETREE,
                        0, 0, NULL, NULL, NULL,
                        ep, NULL, NULL, true, true)) {

      /* answer list gets filled in sge_event_spool() */
      DEXIT;
      return ret;
   }

   if (adding) {
      INFO((SGE_EVENT, MSG_STREE_ADDSTREE_SSII, 
         ruser, rhost, lGetNumberOfNodes(ep, NULL, STN_children), lGetNumberOfLeafs(ep, NULL, STN_children)));
   } else {
      INFO((SGE_EVENT, MSG_STREE_MODSTREE_SSII, 
         ruser, rhost, lGetNumberOfNodes(ep, NULL, STN_children), lGetNumberOfLeafs(ep, NULL, STN_children)));
   }

   answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);

   DEXIT;
   return STATUS_OK;
}
Exemplo n.º 12
0
/************************************************************************
 Master routine for job exit

 We need a rusage struct filled.
 In normal cases this is done by the execd, sending this structure
 to notify master about job finish.

 In case of an error noticed by the master which needs the job to be 
 removed we can fill this structure by hand. We need:

 rusage->job_number
 rusage->qname to clean up the queue (if we didn't find it we nevertheless
               clean up the job

 for functions regarding rusage see sge_rusage.c
 ************************************************************************/
void sge_job_exit(sge_gdi_ctx_class_t *ctx, lListElem *jr, lListElem *jep, lListElem *jatep, monitoring_t *monitor) 
{
   lListElem *queueep = NULL;
   const char *err_str = NULL;
   const char *qname = NULL;  
   const char *hostname = MSG_OBJ_UNKNOWNHOST;
   u_long32 jobid, jataskid;
   lListElem *hep = NULL;
   u_long32 timestamp;
   object_description *object_base = object_type_get_object_description();

   u_long32 failed, general_failure;
   lList *saved_gdil;

   DENTER(TOP_LAYER, "sge_job_exit");

   /* JG: TODO: we'd prefer some more precise timestamp, e.g. from jr */
   timestamp = sge_get_gmt();
                     
   qname = lGetString(jr, JR_queue_name);
   if (qname == NULL) {
      qname = (char *)MSG_OBJ_UNKNOWNQ;
   }

   err_str = lGetString(jr, JR_err_str);
   if (err_str == NULL) {
      err_str = MSG_UNKNOWNREASON;
   }

   jobid = lGetUlong(jr, JR_job_number);
   jataskid = lGetUlong(jr, JR_ja_task_number);
   failed = lGetUlong(jr, JR_failed);
   general_failure = lGetUlong(jr, JR_general_failure);

   cancel_job_resend(jobid, jataskid);

   /* This only has a meaning for Hibernator jobs. The job pid must
    * be saved accross restarts, since jobs get their old pid
    */
   lSetUlong(jatep, JAT_pvm_ckpt_pid, lGetUlong(jr, JR_job_pid));

   DPRINTF(("reaping job "sge_u32"."sge_u32" in queue >%s< job_pid %d\n", 
      jobid, jataskid, qname, (int) lGetUlong(jatep, JAT_pvm_ckpt_pid)));

   queueep = cqueue_list_locate_qinstance(*object_base[SGE_TYPE_CQUEUE].list, qname);
   if (queueep == NULL) {
      ERROR((SGE_EVENT, MSG_JOB_WRITEJFINISH_S, qname));
   }

   sge_job_remove_enforce_limit_trigger(jobid, jataskid);

   /* retrieve hostname for later use */
   if (queueep != NULL) {
      hostname = lGetHost(queueep, QU_qhostname);
   }

   if (failed) {        /* a problem occured */
      WARNING((SGE_EVENT, MSG_JOB_FAILEDONHOST_UUSSSS, sge_u32c(jobid), 
               sge_u32c(jataskid), 
               hostname,
               general_failure ? MSG_GENERAL : "",
               get_sstate_description(failed), err_str));
   } else {
      INFO((SGE_EVENT, MSG_JOB_JFINISH_UUS,  sge_u32c(jobid), sge_u32c(jataskid), hostname));
   }

   /*-------------------------------------------------*/

   /* test if this job is in state JRUNNING or JTRANSFERING */
   if (lGetUlong(jatep, JAT_status) != JRUNNING && 
       lGetUlong(jatep, JAT_status) != JTRANSFERING) {
      ERROR((SGE_EVENT, MSG_JOB_JEXITNOTRUN_UU, sge_u32c(lGetUlong(jep, JB_job_number)), sge_u32c(jataskid)));
      DRETURN_VOID;
   }

   saved_gdil = lCopyList("cpy", lGetList(jatep, JAT_granted_destin_identifier_list));

   /*
    * case 1: job being trashed because 
    *    --> failed starting interactive job
    *    --> job was deleted
    *    --> a failed batch job that explicitely shall not enter error state
    */
   if (((lGetUlong(jatep, JAT_state) & JDELETED) == JDELETED) ||
         (failed && !lGetString(jep, JB_exec_file)) ||
         (failed && general_failure==GFSTATE_JOB && JOB_TYPE_IS_NO_ERROR(lGetUlong(jep, JB_type)))) {
      reporting_create_acct_record(ctx, NULL, jr, jep, jatep, false);
      /* JG: TODO: we need more information in the log message */
      reporting_create_job_log(NULL, timestamp, JL_DELETED, MSG_EXECD, hostname, jr, jep, jatep, NULL, MSG_LOG_JREMOVED);

      sge_commit_job(ctx, jep, jatep, jr, COMMIT_ST_FINISHED_FAILED_EE, COMMIT_DEFAULT | COMMIT_NEVER_RAN, monitor);

      if (lGetUlong(jep, JB_ar) != 0 && (lGetUlong(jatep, JAT_state) & JDELETED) == JDELETED) {
         /* get AR and remove it if no other jobs are debited */
         lList *master_ar_list = *object_base[SGE_TYPE_AR].list;
         lListElem *ar = ar_list_locate(master_ar_list, lGetUlong(jep, JB_ar));

         if (ar != NULL && lGetUlong(ar, AR_state) == AR_DELETED) {
            lListElem *ar_queue;
            u_long32 ar_id = lGetUlong(ar, AR_id);

            for_each(ar_queue, lGetList(ar, AR_reserved_queues)) {
               if (qinstance_slots_used(ar_queue) != 0) {
                  break;
               }
            }
            if (ar_queue == NULL) {
               /* no jobs registered in advance reservation */
               dstring buffer = DSTRING_INIT;

               sge_dstring_sprintf(&buffer, sge_U32CFormat,
                                   sge_u32c(ar_id));

               ar_do_reservation(ar, false);

               reporting_create_ar_log_record(NULL, ar, ARL_DELETED, 
                                              "AR deleted",
                                              timestamp);
               reporting_create_ar_acct_records(NULL, ar, timestamp); 

               lRemoveElem(master_ar_list, &ar);

               sge_event_spool(ctx, NULL, 0, sgeE_AR_DEL, 
                      ar_id, 0, sge_dstring_get_string(&buffer), NULL, NULL,
                      NULL, NULL, NULL, true, true);
               sge_dstring_free(&buffer);
            }
         }
Exemplo n.º 13
0
/****** cull/db/lJoinSublist() ************************************************
*  NAME
*     lJoinSublist() -- Join a list with one of its sublists 
*
*  SYNOPSIS
*     lList* lJoinSublist(const char *name, 
*                         int nm0, 
*                         const lList *lp, 
*                         const lCondition *cp0, 
*                         const lEnumeration *enp0, 
*                         const lDescr *sldp, 
*                         const lCondition *cp1, 
*                         const lEnumeration *enp1) 
*
*  FUNCTION
*     Joins a list and one of its sublists together. The other 
*     parameters are equal to them from lJoin(). In the enumeration
*     'enp0' the sublist field neither may be selected nor 'enp0'
*     may be NULL. 
*
*  INPUTS
*     const char *name         - new list name 
*     int nm0                  - 
*     const lList *lp          - list 
*     const lCondition *cp0    - selects rows within 'lp' 
*     const lEnumeration *enp0 - selects columns within 'lp' 
*     const lDescr *sldp       - sublist descriptor pointer 
*     const lCondition *cp1    - selects rows within 'sldp' 
*     const lEnumeration *enp1 - selects columns within 'enp1' 
*
*  RESULT
*     lList* - Joined list 
******************************************************************************/
lList *lJoinSublist(const char *name, int nm0, const lList *lp, 
                    const lCondition *cp0, const lEnumeration *enp0,
                    const lDescr *sldp, const lCondition *cp1, 
                    const lEnumeration *enp1) 
{
   lList *dlp, *tlp, *joinedlist, *sublist;
   lListElem *ep;
   lDescr *dp; 
   const lDescr *tdp;
   int i, pos;

   DENTER(CULL_LAYER, "lJoinSublist");

   /* check different pointers */
   if (!name || !lp || !enp0 || !sldp || !enp1) {
      LERROR(LENULLARGS);
      DEXIT;
      return NULL;
   }

   /* make sure that nm0 is a sublist field of lp */
   if (!(tdp = lGetListDescr(lp))) {
      LERROR(LEDESCRNULL);
      DEXIT;
      return NULL;
   }
   if ((pos = lGetPosInDescr(tdp, nm0)) < 0) {
      LERROR(LENAMENOT);
      DEXIT;
      return NULL;
   }

   if (mt_get_type(tdp[pos].mt) != lListT) {
      LERROR(LEINCTYPE);
      DEXIT;
      return NULL;
   }

   /* is nm0 enumerated in enp0 ? */
   if (enp0[0].pos == WHAT_ALL) {
      LERROR(LEFALSEFIELD);
      DEXIT;
      return NULL;
   }
   for (i = 0; enp0[i].nm != NoName; i++)
      if (enp0[i].nm == nm0) {
         LERROR(LEFALSEFIELD);
         DEXIT;
         return NULL;
      }

   /* create destination list */
   if (!(dp = lJoinDescr(lGetListDescr(lp), sldp, enp0, enp1))) {
      LERROR(LEJOINDESCR);
      DEXIT;
      return NULL;
   }
   if (!(dlp = lCreateList(name, dp))) {
      sge_free(&dp);
      LERROR(LECREATELIST);
      DEXIT;
      return NULL;
   }
   /* free dp it has been copied in lCreateList */
   sge_free(&dp);

   /* create a temporary list to be used by lJoin */
   if (!(tlp = lCreateList("lJoinSublist: tlp", lGetListDescr(lp)))) {
      lFreeList(&dlp);
      LERROR(LECREATELIST);
      DEXIT;
      return NULL;
   }

   for_each_where(ep, lp, cp0) {
      /* is there a sublist for the join */
      if ((sublist = lGetList(ep, nm0)) != NULL) {

         /* put each element in the tlp to be used by lJoin */
         if (lAppendElem(tlp, lCopyElem(ep)) == -1) {
            lFreeList(&tlp);
            lFreeList(&dlp);
            LERROR(LEAPPENDELEM);
            DEXIT;
            return NULL;
         }

         /* join the tlp with one element together with its sublist */
         joinedlist = lJoin("lJoinSublist: joinedlist", nm0, tlp, NULL, enp0,
                            NoName, sublist, cp1, enp1);

         if (!joinedlist) {
            lFreeList(&tlp);
            lFreeList(&dlp);
            LERROR(LEJOIN);
            DEXIT;
            return NULL;
         }

         /* joinedlist is freed in lAddList */
         if (joinedlist && lAddList(dlp, &joinedlist) == -1) {
            LERROR(LEADDLIST);
            lFreeList(&tlp);
            lFreeList(&dlp);
            DEXIT;
            return NULL;
         }

         /* dechain the only element from tlp and free it (copy) */
         lRemoveElem(tlp, &(tlp->first));
      }
   }
   /* temporary list has to be freed */
   lFreeList(&tlp);

   /* RETURN AN EMPTY LIST OR NULL THAT'S THE QUESTION */

   if (lGetNumberOfElem(dlp) == 0) {
      lFreeList(&dlp);
   }

   DEXIT;
   return dlp;
}
Exemplo n.º 14
0
static void do_test(bool unique_hash, bool non_unique_hash, 
                    int num_objects, int num_names)
{
   lList *lp = NULL;
   lList *copy;
   lListElem *ep;
   clock_t now, start;
   struct tms tms_buffer;
#ifdef MALLINFO
   struct mallinfo meminfo;
#endif
   int i;
#ifdef HASH_STATISTICS
   cull_htable cht;
   dstring stat_dstring = DSTRING_INIT;
#endif

   /* measurement data */
   double  prof_create,  /* create objects */
           prof_copy,    /* copy list including all hashtables */
           prof_rau,     /* random access by unique attrib */
           prof_inu,     /* iterate by non unique attrib */
           prof_curo,    /* change unique attrib of random object */
           prof_cnuro,   /* change non unique attrib of random object */
           prof_dru,     /* delete random object by unique attribute */
           prof_dinu;    /* delete by iterating over non unique attrib */
   int     objs_dru,     /* objects deleted by random unique */
           objs_dinu;    /* objects deleted by iterate non unique */

   /* create list and hash tables */
   lp = lCreateList("test list ", DESCR);
   if (unique_hash) {
      cull_hash_new(lp, NM_ULONG, true);
   }
   if (non_unique_hash) {
      cull_hash_new(lp, NM_STRING, false);
   }
#ifdef HASH_STATISTICS
   cht = lp->descr[1].ht;
   printf("%s\n", cull_hash_statistics(cht, &stat_dstring));
#endif
   start = times(&tms_buffer);

   /* TEST: build objects */
   for (i = 0; i < num_objects; i++) {
      ep = lAddElemUlong(&lp, NM_ULONG, i, DESCR);
      lSetString(ep, NM_STRING, names[rand() % num_names]);
   }
   /* measure time */
   now = times(&tms_buffer);
   prof_create = (now - start) * 1.0 / clk_tck;

#ifdef HASH_STATISTICS
   printf("%s\n", cull_hash_statistics(cht, &stat_dstring));
#endif


   /* measure memory usage */
#ifdef MALLINFO
   meminfo = mallinfo();
#endif
  
   /* TEST: copy list */
   start = times(&tms_buffer);
   copy = lCopyList("copy", lp);

   /* measure time */
   now = times(&tms_buffer);
   prof_copy = (now - start) * 1.0 / clk_tck;
   lFreeList(&copy);

   /* TEST: random access by unique attrib */
   start = times(&tms_buffer);
   for (i = 0; i < num_objects; i++) {
      ep = lGetElemUlong(lp, NM_ULONG, rand() % num_objects);
   }

   /* measure time */
   now = times(&tms_buffer);
   prof_rau = (now - start) * 1.0 / clk_tck;
   start = now;

   /* TEST: iterate by non unique attrib */
   for (i = 0; i < num_names; i++) {
      const void *iterator = NULL;
      lListElem *next_ep;

      next_ep = lGetElemStrFirst(lp, NM_STRING, names[i], &iterator);
      while ((ep = next_ep) != NULL) {
         next_ep = lGetElemStrNext(lp, NM_STRING, names[i], &iterator);
      }
   }

   /* measure time */
   now = times(&tms_buffer);
   prof_inu = (now - start) * 1.0 / clk_tck;
   start = now;

   /* TEST: change unique attribute of random object */
   /* measure time */
   for (i = 0; i < num_objects; i++) {
      /* object to access */
      int unique = rand() % num_objects;
      /* new value to set, make sure we stay unique */
      int newval = unique + num_objects;

      /* search object */
      ep = lGetElemUlong(lp, NM_ULONG, unique);

      /* set a new value */
      lSetUlong(ep, NM_ULONG, newval);

      /* restore old value */
      lSetUlong(ep, NM_ULONG, unique);
   }

   now = times(&tms_buffer);
   prof_curo = (now - start) * 1.0 / clk_tck;
   start = now;

   /* TEST: change non unique attribute of random object */
   for (i = 0; i < num_objects; i++) {
      /* search object */
      ep = lGetElemUlong(lp, NM_ULONG, rand() % num_objects);

      /* set a new value */
      lSetString(ep, NM_STRING, names[rand() % num_names]);
   }
   /* measure time */
   now = times(&tms_buffer);
   prof_cnuro = (now - start) * 1.0 / clk_tck;
   start = now;

   /* TEST: delete random object */
   objs_dru = 0;
   for (i = 0; i < num_objects / 2; i++) {
      /* search object */
      ep = lGetElemUlong(lp, NM_ULONG, rand() % num_objects);
      /* if same rand showed up earlier, object does no longer exist! */
      if (ep != NULL) {
         lRemoveElem(lp, &ep);
         objs_dru++;
      }
   }   
   /* measure time */
   now = times(&tms_buffer);
   prof_dru = (now - start) * 1.0 / clk_tck;
   start = now;

   /* TEST: delete all objects having the same non unique attribute */
   objs_dinu = 0;
   for (i = 0; i < num_names; i++) {
      const void *iterator = NULL;
      lListElem *next_ep;

      next_ep = lGetElemStrFirst(lp, NM_STRING, names[i], &iterator);
      while ((ep = next_ep) != NULL) {
         next_ep = lGetElemStrNext(lp, NM_STRING, names[i], &iterator);
         lRemoveElem(lp, &ep);
         objs_dinu++;
      }
   }

   /* measure time */
   now = times(&tms_buffer);
   prof_dinu = (now - start) * 1.0 / clk_tck;
   start = now;

   printf(DATA_FORMAT, 
          unique_hash ? " * " : "   ",
          non_unique_hash ? " * " : "   ",
          prof_create, 
          prof_copy, 
          prof_rau, prof_inu, 
          prof_curo, prof_cnuro,
          prof_dru, objs_dru, prof_dinu, objs_dinu,
#ifdef MALLINFO
          (meminfo.usmblks + meminfo.uordblks) / 1024
#else
          0L
#endif
          );

#ifdef HASH_STATISTICS
printf("%s\n", cull_hash_statistics(cht, &stat_dstring));
   sge_dstring_free(&stat_dstring);
#endif
   lFreeList(&lp);
}