Example #1
0
int add_walltime_remaining(
   
  int         index,
  attribute  *pattr,
  tlist_head *phead)

  {
  int            len;
  char           buf[MAXPATHLEN];
  char          *pname;
  svrattrl      *pal;
  resource      *pres;
  
  int            found = 0;
  unsigned long  remaining = 0;
  unsigned long  upperBound = 0;

  /* encode walltime remaining, this is custom because walltime 
   * remaining isn't an attribute */
  if ((pattr + JOB_ATR_state)->at_val.at_char != 'R')
    {
    /* only for running jobs, do nothing */
    return(PBSE_NONE);
    }
  
  if (((pattr + JOB_ATR_resource)->at_val.at_list.ll_next != NULL) &&
      ((pattr + JOB_ATR_resource)->at_flags & ATR_VFLAG_SET))
    {
    pres = (resource *)GET_NEXT((pattr + JOB_ATR_resource)->at_val.at_list);
    
    if ((pattr + JOB_ATR_comp_time)->at_flags & ATR_VFLAG_SET)
      upperBound = (pattr + JOB_ATR_comp_time)->at_val.at_long;
    else
      upperBound = (unsigned long)time_now;
    
    /* find the walltime resource */
    for (;pres != NULL;pres = (resource *)GET_NEXT(pres->rs_link))
      {
      pname = pres->rs_defin->rs_name;
      
      if (strcmp(pname, "walltime") == 0)
        {
        /* found walltime */
        unsigned long value = (unsigned long)pres->rs_value.at_val.at_long;
        remaining = value - (time_now - (pattr + index)->at_val.at_long);
        found = TRUE;
        break;
        }
      }
    }
  
  if (found == TRUE)
    {
    snprintf(buf,sizeof(buf),"%lu",remaining);
    
    len = strlen(buf) + 1;
    pal = attrlist_create("Walltime","Remaining",len);
    
    if (pal != NULL)
      {
      memcpy(pal->al_value,buf,len);
      pal->al_flags = ATR_VFLAG_SET;
      append_link(phead,&pal->al_link,pal);
      }
    }

  return(PBSE_NONE);
  } /* END add_walltime_remaining() */
Example #2
0
int encode_svrstate(

  pbs_attribute  *pattr,   /* ptr to pbs_attribute */
  tlist_head     *phead,   /* head of attrlist list */
  const char    *atname,  /* pbs_attribute name */
  const char    *rsname,  /* null */
  int             mode,    /* encode mode */
  int             perm)    /* only used for resources */

  {
  svrattrl *pal;
  const char *psname;

  if (pattr == NULL)
    {
    /* FAILURE */

    return(-1);
    }

  if ((mode == ATR_ENCODE_SAVE)   ||
      (pattr->at_val.at_long <= SV_STATE_DOWN) ||
      (pattr->at_val.at_long > SV_STATE_SHUTSIG))
    {
    /* SUCCESS */

    return(0);  /* don't bother to encode it */
    }

  psname = svr_state_names[pattr->at_val.at_long];

  if (pattr->at_val.at_long == SV_STATE_RUN)
    {
    pthread_mutex_lock(server.sv_attr_mutex);
    if (server.sv_attr[SRV_ATR_scheduling].at_val.at_long == 0)
      psname = svr_idle;
    else 
      {
      pthread_mutex_lock(scheduler_sock_jobct_mutex);
      if (scheduler_sock != -1)
        psname = svr_sched;
      pthread_mutex_unlock(scheduler_sock_jobct_mutex);
      }
    pthread_mutex_unlock(server.sv_attr_mutex);
    }

  pal = attrlist_create(atname, rsname, strlen(psname) + 1);

  if (pal == NULL)
    {
    /* FAILURE */

    return(-1);
    }

  strcpy(pal->al_value, psname);

  pal->al_flags = pattr->at_flags;

  append_link(phead, &pal->al_link, pal);

  /* SUCCESS */

  return(1);
  }  /* END encode_svrstate() */
Example #3
0
int
decode_DIS_svrattrl(int sock, tlist_head *phead)
  {
  unsigned int i;
  unsigned int hasresc;
  size_t  ls;
  unsigned int data_len;
  unsigned int numattr;
  svrattrl       *psvrat = NULL;
  int  rc;
  size_t  tsize;


  numattr = disrui(sock, &rc); /* number of attributes in set */

  if (rc) return rc;

  for (i = 0; i < numattr; ++i)
    {

    data_len = disrui(sock, &rc); /* here it is used */

    if (rc) return rc;

    tsize = sizeof(svrattrl) + data_len;

    if ((psvrat = (svrattrl *)malloc(tsize)) == 0)
      return DIS_NOMALLOC;

    CLEAR_LINK(psvrat->al_link);

    psvrat->al_atopl.next = 0;

    psvrat->al_tsize = tsize;

    psvrat->al_name  = (char *)psvrat + sizeof(svrattrl);

    psvrat->al_resc  = 0;

    psvrat->al_value = 0;

    psvrat->al_nameln = 0;

    psvrat->al_rescln = 0;

    psvrat->al_valln  = 0;

    psvrat->al_flags  = 0;

    if ((rc = disrfcs(sock, &ls, data_len, psvrat->al_name)))
      break;

    *(psvrat->al_name + ls++) = '\0';

    psvrat->al_nameln = (int)ls;

    data_len -= ls;

    hasresc = disrui(sock, &rc);

    if (rc) 
	  break;

    if (hasresc)
      {
      psvrat->al_resc = psvrat->al_name + ls;

      if ((rc = disrfcs(sock, &ls, data_len, psvrat->al_resc)))
        break;

      *(psvrat->al_resc + ls++) = '\0';

      psvrat->al_rescln = (int)ls;

      data_len -= ls;
      }

    psvrat->al_value  = psvrat->al_name + psvrat->al_nameln +

                        psvrat->al_rescln;

    if ((rc = disrfcs(sock, &ls, data_len, psvrat->al_value)))
      break;

    *(psvrat->al_value + ls++) = '\0';

    psvrat->al_valln = (int)ls;

    psvrat->al_op = (enum batch_op)disrui(sock, &rc);

    if (rc) break;

    append_link(phead, &psvrat->al_link, psvrat);
    }

  if (rc)
    {
    (void)free(psvrat);
    }

  return (rc);
  }
Example #4
0
static int status_que(

  pbs_queue            *pque,     /* ptr to que to status */
  struct batch_request *preq,
  tlist_head           *pstathd)  /* head of list to append status to */

  {
  struct brp_status *pstat;
  svrattrl          *pal;
  int                bad = 0;

  if ((preq->rq_perm & ATR_DFLAG_RDACC) == 0)
    {
    return(PBSE_PERM);
    }

  /* ok going to do status, update count and state counts from qu_qs */

  pque->qu_attr[QA_ATR_TotalJobs].at_val.at_long = pque->qu_numjobs;

  pque->qu_attr[QA_ATR_TotalJobs].at_flags |= ATR_VFLAG_SET;

  update_state_ct(
    &pque->qu_attr[QA_ATR_JobsByState],
    pque->qu_njstate,
    pque->qu_jobstbuf);

  /* allocate status sub-structure and fill in header portion */

  pstat = (struct brp_status *)calloc(1, sizeof(struct brp_status));

  if (pstat == NULL)
    {
    return(PBSE_SYSTEM);
    }

  memset(pstat, 0, sizeof(struct brp_status));
  pstat->brp_objtype = MGR_OBJ_QUEUE;

  strcpy(pstat->brp_objname, pque->qu_qs.qu_name);

  CLEAR_LINK(pstat->brp_stlink);
  CLEAR_HEAD(pstat->brp_attr);

  append_link(pstathd, &pstat->brp_stlink, pstat);

  /* add attributes to the status reply */

  pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

  if (status_attrib(
        pal,
        que_attr_def,
        pque->qu_attr,
        QA_ATR_LAST,
        preq->rq_perm,
        &pstat->brp_attr,
        &bad,
        1) != 0)   /* IsOwner == TRUE */
    {
    return(bad);
    }

  return(PBSE_NONE);
  }  /* END status_que() */
Example #5
0
int req_stat_svr(

  struct batch_request *preq) /* ptr to the decoded request */

  {
  svrattrl             *pal;

  struct batch_reply   *preply;

  struct brp_status    *pstat;
  int                   bad = 0;
  char                  nc_buf[128];
  int                   numjobs;
  int                   netrates[3];

  memset(netrates, 0, sizeof(netrates));

  /* update count and state counts from sv_numjobs and sv_jobstates */
  lock_sv_qs_mutex(server.sv_qs_mutex, __func__);
  numjobs = server.sv_qs.sv_numjobs;
  unlock_sv_qs_mutex(server.sv_qs_mutex, __func__);
  
  pthread_mutex_lock(server.sv_attr_mutex);
  server.sv_attr[SRV_ATR_TotalJobs].at_val.at_long = numjobs;
  server.sv_attr[SRV_ATR_TotalJobs].at_flags |= ATR_VFLAG_SET;

  pthread_mutex_lock(server.sv_jobstates_mutex);

  update_state_ct(
    &server.sv_attr[SRV_ATR_JobsByState],
    server.sv_jobstates,
    server.sv_jobstbuf);
  
  pthread_mutex_unlock(server.sv_jobstates_mutex);

  netcounter_get(netrates);
  snprintf(nc_buf, 127, "%d %d %d", netrates[0], netrates[1], netrates[2]);

  if (server.sv_attr[SRV_ATR_NetCounter].at_val.at_str != NULL)
    free(server.sv_attr[SRV_ATR_NetCounter].at_val.at_str);
  server.sv_attr[SRV_ATR_NetCounter].at_val.at_str = strdup(nc_buf);
  if (server.sv_attr[SRV_ATR_NetCounter].at_val.at_str != NULL)
    server.sv_attr[SRV_ATR_NetCounter].at_flags |= ATR_VFLAG_SET;
  pthread_mutex_unlock(server.sv_attr_mutex);

  /* allocate a reply structure and a status sub-structure */

  preply = &preq->rq_reply;
  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  pstat = (struct brp_status *)calloc(1, sizeof(struct brp_status));

  if (pstat == NULL)
    {
    reply_free(preply);

    req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);
    pthread_mutex_unlock(server.sv_attr_mutex);

    return(PBSE_SYSTEM);
    }

  CLEAR_LINK(pstat->brp_stlink);

  strcpy(pstat->brp_objname, server_name);

  pstat->brp_objtype = MGR_OBJ_SERVER;

  CLEAR_HEAD(pstat->brp_attr);

  append_link(&preply->brp_un.brp_status, &pstat->brp_stlink, pstat);

  /* add attributes to the status reply */

  pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

  if (status_attrib(
        pal,
        svr_attr_def,
        server.sv_attr,
        SRV_ATR_LAST,
        preq->rq_perm,
        &pstat->brp_attr,
        &bad,
        1))    /* IsOwner == TRUE */
    {
    reply_badattr(PBSE_NOATTR, bad, pal, preq);
    }
  else
    {
    reply_send_svr(preq);
    }
    

  return(PBSE_NONE);
  }  /* END req_stat_svr() */
Example #6
0
void req_commit(

  struct batch_request *preq)  /* I */

  {
  job   *pj;

  pj = locate_new_job(preq->rq_conn, preq->rq_ind.rq_commit);

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "committing job");
    }

  if (pj == NULL)
    {
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return;
    }

  if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSICM)
    {
    log_err(errno, "req_commit", "cannot commit job in unexpected state");

    req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL);

    return;
    }

  /* move job from new job list to "all" job list, set to running state */

  delete_link(&pj->ji_alljobs);

  append_link(&svr_alljobs, &pj->ji_alljobs, pj);

  /*
  ** Set JOB_SVFLG_HERE to indicate that this is Mother Superior.
  */

  pj->ji_qs.ji_svrflags |= JOB_SVFLG_HERE;

  pj->ji_qs.ji_state = JOB_STATE_RUNNING;

  pj->ji_qs.ji_substate = JOB_SUBSTATE_PRERUN;

  pj->ji_qs.ji_un_type = JOB_UNION_TYPE_MOM;

  pj->ji_qs.ji_un.ji_momt.ji_svraddr = get_connectaddr(preq->rq_conn);

  pj->ji_qs.ji_un.ji_momt.ji_exitstat = 0;

  /* For MOM - start up the job (blocks) */

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "starting job execution");
    }

  start_exec(pj);

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "job execution started");
    }

  /* if start request fails, reply with failure string */

  if (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITING)
    {
    char tmpLine[1024];

    if ((pj->ji_hosts != NULL) &&
        (pj->ji_nodekill >= 0) &&
        (pj->ji_hosts[pj->ji_nodekill].hn_host != NULL))
      {
      sprintf(tmpLine, "start failed on node %s",
              pj->ji_hosts[pj->ji_nodekill].hn_host);
      }
    else
      {
      sprintf(tmpLine, "start failed on unknown node");
      }

    if (LOGLEVEL >= 6)
      {
      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
        tmpLine);
      }

    reply_text(preq, 0, tmpLine);
    }
  else
    {
    reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_Commit);
    }

  job_save(pj, SAVEJOB_FULL);

  /* NOTE: we used to flag JOB_ATR_errpath, JOB_ATR_outpath,
   * JOB_ATR_session_id, and JOB_ATR_altid as modified at this point to make sure
   * pbs_server got these attr values.  This worked fine before TORQUE modified
   * job launched into an async process.  At 2.0.0p6, a new attribute "SEND" flag
   * was added to handle this process. */

  return;
  }  /* END req_commit() */
int encode_state(

  pbs_attribute *pattr, /*struct pbs_attribute being encoded  */
  tlist_head    *ph, /*head of a list of  "svrattrl" structs 
                       which are to be returned*/
  char          *aname, /*pbs_attribute's name    */
  char          *rname, /*resource's name (null if none)  */
  int            mode, /*mode code, unused here   */
  int            perm) /* only used for resources */

  {
  int       i;
  svrattrl *pal;
  short     state;

  char      state_str[MAX_ENCODE_BFR];

  if (!pattr)
    {
    return -(PBSE_INTERNAL);
    }

  if (!(pattr->at_flags & ATR_VFLAG_SET))
    {
    /* SUCCESS - pbs_attribute not set */

    return(0);
    }

  state = pattr->at_val.at_short & (INUSE_SUBNODE_MASK | INUSE_UNKNOWN);

  if (!state)
    {
    strcpy(state_str, ND_free);
    }
  else
    {
    state_str[0] = '\0';

    for (i = 0;ns[i].name;i++)
      {
      if (state & ns[i].bit)
        {
        if (state_str[0] != '\0')
          strcat(state_str, ",");

        strcat(state_str, ns[i].name);
        }
      }
    }

  pal = attrlist_create(aname, rname, (int)strlen(state_str) + 1);

  if (pal == NULL)
    {
    return -(PBSE_SYSTEM);
    }

  strcpy(pal->al_value, state_str);

  pal->al_flags = ATR_VFLAG_SET;

  append_link(ph, &pal->al_link, pal);

  /* SUCCESS */

  return(0);
  }  /* END encode_state */
Example #8
0
int
status_job(job *pjob, struct batch_request *preq, svrattrl *pal, pbs_list_head *pstathd, int *bad)
{
	struct brp_status *pstat;
	time_t tm;
	long oldtime = 0;
	int old_elig_flags = 0;
	int old_atyp_flags = 0;

	/* see if the client is authorized to status this job */

	if (! server.sv_attr[(int)SRV_ATR_query_others].at_val.at_long)
		if (svr_authorize_jobreq(preq, pjob))
			return (PBSE_PERM);

	if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) {
		/* for Array Job, if array_indices_remaining is modified */
		/* then need to recalculate the string value	     */
		char *pnewstr;
		attribute *premain;

		premain = &pjob->ji_wattr[(int)JOB_ATR_array_indices_remaining];
		if (premain->at_flags & ATR_VFLAG_MODCACHE) {
			pnewstr = cvt_range(pjob->ji_ajtrk, JOB_STATE_QUEUED);
			if (pnewstr == NULL)
				pnewstr = "-";
			job_attr_def[JOB_ATR_array_indices_remaining].at_free(premain);
			job_attr_def[JOB_ATR_array_indices_remaining].at_decode(premain, 0, 0, pnewstr);
			/* also update value of attribute "array_state_count" */
			update_subjob_state_ct(pjob);
		}
	}

	/* calc eligible time on the fly and return, don't save. */
	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long != 0) {
		if (pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE) {
			time(&tm);
			oldtime = pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long;
			pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long += ((long)tm - pjob->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long);
			pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE;

			/* Note: ATR_VFLAG_MODCACHE must be set because of svr_cached() does */
			/* 	 not correctly check ATR_VFLAG_SET */
		}
	} else {
		/* eligible_time_enable is off so,				       */
		/* clear set flag so that eligible_time and accrue type dont show */
		old_elig_flags = pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags;
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags &= ~ATR_VFLAG_SET;
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= (ATR_VFLAG_MODCACHE);

		old_atyp_flags = pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags &= ~ATR_VFLAG_SET;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags |= (ATR_VFLAG_MODCACHE);

		/* Note: ATR_VFLAG_MODCACHE must be set because of svr_cached() does */
		/*		not correctly check ATR_VFLAG_SET */
	}

	/* allocate reply structure and fill in header portion */

	pstat = (struct brp_status *)malloc(sizeof(struct brp_status));
	if (pstat == (struct brp_status *)0)
		return (PBSE_SYSTEM);
	CLEAR_LINK(pstat->brp_stlink);
	pstat->brp_objtype = MGR_OBJ_JOB;
	(void)strcpy(pstat->brp_objname, pjob->ji_qs.ji_jobid);
	CLEAR_HEAD(pstat->brp_attr);
	append_link(pstathd, &pstat->brp_stlink, pstat);

	/* add attributes to the status reply */

	*bad = 0;
	if (status_attrib(pal, job_attr_def, pjob->ji_wattr, JOB_ATR_LAST,
		preq->rq_perm, &pstat->brp_attr, bad))
		return (PBSE_NOATTR);

	/* reset eligible time, it was calctd on the fly, real calctn only when accrue_type changes */

	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long != 0) {
		if (pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE) {
			pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long = oldtime;
			pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE;

			/* Note: ATR_VFLAG_MODCACHE must be set because of svr_cached() does */
			/*	 not correctly check ATR_VFLAG_SET */
		}
	} else {
		/* reset the set flags */
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags = old_elig_flags;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags = old_atyp_flags;
	}

	return (0);
}
Example #9
0
/**
 * @brief
 * 		status_subjob - status a single subjob (of an Array Job)
 *		Works by statusing the parrent unless subjob is actually running.
 *
 * @param[in,out]	pjob	-	ptr to parent Array
 * @param[in]	preq	-	request structure
 * @param[in]	pal	-	specific attributes to status
 * @param[in]	subj	-	if not = -1 then include subjob [n]
 * @param[in,out]	pstathd	-	RETURN: head of list to append status to
 * @param[out]	bad	-	RETURN: index of first bad attribute
 *
 * @return	int
 * @retval	0	: success
 * @retval	PBSE_PERM	: client is not authorized to status the job
 * @retval	PBSE_SYSTEM	: memory allocation error
 * @retval	PBSE_IVALREQ	: something wrong with the flags
 */
int
status_subjob(job *pjob, struct batch_request *preq, svrattrl *pal, int subj, pbs_list_head *pstathd, int *bad)
{
	int		   limit = (int)JOB_ATR_LAST;
	struct brp_status *pstat;
	job		  *psubjob;	/* ptr to job to status */
	char		   realstate;
	int		   rc = 0;
	int		   oldeligflags = 0;
	int		   oldatypflags = 0;
	int 		   subjob_state = -1;
	char 		   *old_subjob_comment = NULL;

	/* see if the client is authorized to status this job */

	if (! server.sv_attr[(int)SRV_ATR_query_others].at_val.at_long)
		if (svr_authorize_jobreq(preq, pjob))
			return (PBSE_PERM);

	if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) == 0)
		return PBSE_IVALREQ;

	/* if subjob is running, use real job structure */

	if (get_subjob_state(pjob, subj) == JOB_STATE_RUNNING) {
		psubjob = find_job(mk_subjob_id(pjob, subj));
		if (psubjob)
			status_job(psubjob, preq, pal, pstathd, bad);
		return 0;
	}

	/* otherwise we fake it with info from the parent      */
	/* allocate reply structure and fill in header portion */


	/* for the general case, we don't want to include the parent's */
	/* array related attrbutes as they belong only to the Array    */
	if (pal == NULL)
		limit = JOB_ATR_array;
	pstat = (struct brp_status *)malloc(sizeof(struct brp_status));
	if (pstat == (struct brp_status *)0)
		return (PBSE_SYSTEM);
	CLEAR_LINK(pstat->brp_stlink);
	pstat->brp_objtype = MGR_OBJ_JOB;
	(void)strcpy(pstat->brp_objname, mk_subjob_id(pjob, subj));
	CLEAR_HEAD(pstat->brp_attr);
	append_link(pstathd, &pstat->brp_stlink, pstat);

	/* add attributes to the status reply */

	*bad = 0;

	/*
	 * fake the job state and comment by setting the parent job's state
	 * and comment to that of the subjob
	 */
	subjob_state = get_subjob_state(pjob, subj);
	realstate = pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char;
	pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char = statechars[subjob_state];
	pjob->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_MODCACHE;

	if (subjob_state == JOB_STATE_EXPIRED || subjob_state == JOB_STATE_FINISHED) {
		if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_FINISHED) {
			if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) {
				old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str);
				if (old_subjob_comment == (char *)0)
					return (PBSE_SYSTEM);
			}
			if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
				(char *)0, (char *)0, "Subjob finished") == PBSE_SYSTEM) {
				free(old_subjob_comment);
				return (PBSE_SYSTEM);
			}
		} else if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_FAILED) {
			if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) {
				old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str);
				if (old_subjob_comment == (char *)0)
					return (PBSE_SYSTEM);
			}
			if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
				(char *)0, (char *)0, "Subjob failed") == PBSE_SYSTEM) {
				free(old_subjob_comment);
				return (PBSE_SYSTEM);
			}
		} else if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_TERMINATED) {
			if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) {
				old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str);
				if (old_subjob_comment == (char *)0)
					return (PBSE_SYSTEM);
			}
			if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
				(char *)0, (char *)0, "Subjob terminated") == PBSE_SYSTEM) {
				free(old_subjob_comment);
				return (PBSE_SYSTEM);
			}
		}
	}

	/* when eligible_time_enable is off,				      */
	/* clear the set flag so that eligible_time and accrue_type dont show */
	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 0) {
		oldeligflags = pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags;
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags &= ~ATR_VFLAG_SET;
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE;

		oldatypflags = pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags &= ~ATR_VFLAG_SET;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags |= ATR_VFLAG_MODCACHE;

		/* Note: ATR_VFLAG_MODCACHE must be set because of svr_cached() does */
		/* 	 not correctly check ATR_VFLAG_SET */
	}

	if (status_attrib(pal, job_attr_def, pjob->ji_wattr, limit,
		preq->rq_perm, &pstat->brp_attr, bad))
		rc =  PBSE_NOATTR;

	/* Set the parent state back to what it really is */

	pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char = realstate;
	pjob->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_MODCACHE;

	/* Set the parent comment back to what it really is */
	if (old_subjob_comment != NULL) {
		if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
			(char *)0, (char *)0, old_subjob_comment) == PBSE_SYSTEM) {
			free(old_subjob_comment);
			return (PBSE_SYSTEM);
		}

		free(old_subjob_comment);
	}

	/* reset the flags */
	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 0) {
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags = oldeligflags;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags = oldatypflags;
	}

	return (rc);
}
Example #10
0
job *job_recov(

  char *filename) /* I */   /* pathname to job save file */

  {
  int  fds;
  job *pj;
  char *pn;
  char  namebuf[MAXPATHLEN];
  int    qs_upgrade;
#ifndef PBS_MOM
  char   parent_id[PBS_MAXSVRJOBID + 1];
  job_array *pa;
#endif

  qs_upgrade = FALSE;

  pj = job_alloc(); /* allocate & initialize job structure space */

  if (pj == NULL)
    {
    /* FAILURE - cannot alloc memory */

    return(NULL);
    }

  strcpy(namebuf, path_jobs); /* job directory path */

  strcat(namebuf, filename);

  fds = open(namebuf, O_RDONLY, 0);

  if (fds < 0)
    {
    sprintf(log_buffer, "unable to open %s",
            namebuf);

    log_err(errno, "job_recov", log_buffer);

    free((char *)pj);

    /* FAILURE - cannot open job file */

    return(NULL);
    }

  /* read in job quick save sub-structure */

  if (read(fds, (char *)&pj->ji_qs, quicksize) != (ssize_t)quicksize &&
      pj->ji_qs.qs_version == PBS_QS_VERSION)
    {
    sprintf(log_buffer, "Unable to read %s",
            namebuf);

    log_err(errno, "job_recov", log_buffer);

    free((char *)pj);

    close(fds);

    return(NULL);
    }

  /* is ji_qs the version we expect? */

  if (pj->ji_qs.qs_version != PBS_QS_VERSION)
    {
    /* ji_qs is older version */
    sprintf(log_buffer,
            "%s appears to be from an old version. Attempting to convert.\n",
            namebuf);
    log_err(-1, "job_recov", log_buffer);

    if (job_qs_upgrade(pj, fds, namebuf, pj->ji_qs.qs_version) != 0)
      {
      sprintf(log_buffer, "unable to upgrade %s\n", namebuf);

      log_err(-1, "job_recov", log_buffer);

      free((char *)pj);

      close(fds);

      return(NULL);
      }

    qs_upgrade = TRUE;
    }  /* END if (pj->ji_qs.qs_version != PBS_QS_VERSION) */

  /* Does file name match the internal name? */
  /* This detects ghost files */

  pn = strrchr(namebuf, (int)'/') + 1;

  if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0)
    {
    /* mismatch, discard job */

    sprintf(log_buffer, "Job Id %s does not match file name for %s",
            pj->ji_qs.ji_jobid,
            namebuf);

    log_err(-1, "job_recov", log_buffer);

    free((char *)pj);

    close(fds);

    return(NULL);
    }

  /* read in working attributes */

  if (recov_attr(
        fds,
        pj,
        job_attr_def,
        pj->ji_wattr,
        (int)JOB_ATR_LAST,
        (int)JOB_ATR_UNKN,
        TRUE) != 0)
    {
    sprintf(log_buffer, "unable to recover %s (file is likely corrupted)",
            namebuf);

    log_err(-1, "job_recov", log_buffer);

    job_free(pj);

    close(fds);

    return(NULL);
    }

#ifdef PBS_MOM
  /* read in tm sockets and ips */

  if (recov_tmsock(fds, pj) != 0)
    {
    sprintf(log_buffer, "warning: tmsockets not recovered from %s (written by an older pbs_mom?)",
            namebuf);

    log_err(-1, "job_recov", log_buffer);
    }

  if (recov_roottask(fds, pj) != 0)
    {
    sprintf(log_buffer, "warning: root task not recovered from %s (written by an older pbs_mom?)",
            namebuf);

    log_err(-1, "job_recov", log_buffer);
    }

  if (recov_jobflags(fds, pj) != 0)
    {
    sprintf(log_buffer, "warning: job flags not recovered from %s (written by an older pbs_mom?)", namebuf);

    log_err(-1, "job_recov", log_buffer);
    }

#else /* PBS_MOM */

  if (pj->ji_wattr[(int)JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET)
    {
    /* job is part of an array.  We need to put a link back to the server
    job array struct for this array. We also have to link this job into
    the linked list of jobs belonging to the array. */

    array_get_parent_id(pj->ji_qs.ji_jobid, parent_id);
    pa = get_array(parent_id);

    if (strcmp(parent_id, pj->ji_qs.ji_jobid) == 0)
      {
      pj->ji_isparent = TRUE;
      }
    else
      {
      if (pa == NULL)
        {
        /* couldn't find array struct, it must not have been recovered,
        treat job as indepentent job?  perhaps we should delete the job
        XXX_JOB_ARRAY: should I unset this?*/
        pj->ji_wattr[(int)JOB_ATR_job_array_request].at_flags &= ~ATR_VFLAG_SET;
        }
      else
        {
        CLEAR_LINK(pj->ji_arrayjobs);
        append_link(&pa->array_alljobs, &pj->ji_arrayjobs, (void*)pj);
        pj->ji_arraystruct = pa;
        pa->jobs_recovered++;
        }
      }
    }

#endif

  close(fds);

  /* all done recovering the job */

  if (qs_upgrade == TRUE)
    {
    job_save(pj, SAVEJOB_FULL);
    }

  return(pj);
  }  /* END job_recov() */
Example #11
0
int encode_time(

  attribute *attr,   /* ptr to attribute (value in attr->at_val.at_long) */
  tlist_head *phead,  /* head of attrlist list (optional) */
  char      *atname, /* attribute name */
  char      *rsname, /* resource name (optional) */
  int        mode)   /* encode mode (not used) */

  {
  size_t  ct;
  char   cvnbuf[CVNBUFSZ];
  int    hr;
  int   min;
  long   n;
  svrattrl *pal;
  int   sec;
  char  *pv;

  if (attr == NULL)
    {
    /* FAILURE */

    return(-1);
    }

  if (!(attr->at_flags & ATR_VFLAG_SET))
    {
    return(0);
    }

  n   = attr->at_val.at_long;

  hr  = n / 3600;
  n   = n % 3600;
  min = n / 60;
  n   = n % 60;
  sec = n;

  pv = cvnbuf;

  sprintf(pv, "%02d:%02d:%02d",
          hr, min, sec);

  pv += strlen(pv);

  ct = strlen(cvnbuf) + 1;

  if (phead != NULL)
    {
    pal = attrlist_create(atname, rsname, ct);

    if (pal == NULL)
      {
      /* FAILURE */

      return(-1);
      }

    memcpy(pal->al_value, cvnbuf, ct);

    pal->al_flags = attr->at_flags;

    append_link(phead, &pal->al_link, pal);
    }
  else
    {
    strcpy(atname, cvnbuf);
    }

  /* SUCCESS */

  return(1);
  }
Example #12
0
/**
 * attempt_delete()
 * deletes a job differently depending on the job's state
 *
 * @return TRUE if the job was deleted, FALSE if skipped
 * @param pjob - a pointer to the job being handled
 */
int attempt_delete(

  void *j) /* I */

  {
  int skipped = FALSE;
  struct work_task *pwtold;
  struct work_task *pwtnew;
  job *pjob;

  /* job considered deleted if null */
  if (j == NULL)
    return(TRUE);

  pjob = (job *)j;

  if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT)
    {
    /*
     * Find pid of router from existing work task entry,
     * then establish another work task on same child.
     * Next, signal the router and wait for its completion;
     */
    
    pwtold = (struct work_task *)GET_NEXT(pjob->ji_svrtask);
    
    while (pwtold != NULL)
      {
      if ((pwtold->wt_type == WORK_Deferred_Child) ||
          (pwtold->wt_type == WORK_Deferred_Cmp))
        {
        kill((pid_t)pwtold->wt_event, SIGTERM);
        
        pjob->ji_qs.ji_substate = JOB_SUBSTATE_ABORT;
        }
      
      pwtold = (struct work_task *)GET_NEXT(pwtold->wt_linkobj);
      }

    skipped = TRUE;
    
    return(!skipped);
    }  /* END if (pjob->ji_qs.ji_state == JOB_SUBSTATE_TRANSIT) */

  else if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN)
    {
    /* we'll wait for the mom to get this job, then delete it */
    skipped = TRUE;
    }  /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) */

  else if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /* set up nanny */
    
    if (!has_job_delete_nanny(pjob))
      {
      apply_job_delete_nanny(pjob, time_now + 60);
      
      /* need to issue a signal to the mom, but we don't want to sent an ack to the
       * client when the mom replies */
      issue_signal(pjob, "SIGTERM", post_delete, NULL);
      }

    if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
      {
      /* job has restart file at mom, change restart comment if failed */
      change_restart_comment_if_needed(pjob);
      }
    
    return(!skipped);
    }  /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */

  if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
    {
    /* job has restart file at mom, change restart comment if failed */    
    change_restart_comment_if_needed(pjob);
    
    /* job has restart file at mom, do end job processing */
    svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING);

    pjob->ji_momhandle = -1;
    
    /* force new connection */
    pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);
    
    if (pwtnew)
      {
      append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
      }
   
    }
  else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0)
    {
    /* job has staged-in file, should remove them */
    
    remove_stagein(pjob);
    
    job_abt(&pjob, NULL);
    }
  else
    {
    /*
     * the job is not transitting (though it may have been) and
     * is not running, so put in into a complete state.
     */

    struct work_task *ptask;
    struct pbs_queue *pque;
    int  KeepSeconds = 0;

    svr_setjobstate(pjob, JOB_STATE_COMPLETE, JOB_SUBSTATE_COMPLETE);
    
    if ((pque = pjob->ji_qhdr) && (pque != NULL))
      {
      pque->qu_numcompleted++;
      }
    
    KeepSeconds = attr_ifelse_long(
        &pque->qu_attr[QE_ATR_KeepCompleted],
        &server.sv_attr[SRV_ATR_KeepCompleted],
        0);
    ptask = set_task(WORK_Timed, time_now + KeepSeconds, on_job_exit, pjob);
    
    if (ptask != NULL)
      {
      append_link(&pjob->ji_svrtask, &ptask->wt_linkobj, ptask);
      }
    }

  return(!skipped);
  } /* END attempt_delete() */
Example #13
0
void array_delete_wt(struct work_task *ptask)
  {

  struct batch_request *preq;
  job_array *pa;
  /*struct work_task *pnew_task;*/

  struct work_task *pwtnew;

  int i;

  static int last_check = 0;
  static char *last_id = NULL;

  preq = ptask->wt_parm1;

  pa = get_array(preq->rq_ind.rq_delete.rq_objname);

  if (pa == NULL)
    {
    /* jobs must have exited already */
    reply_ack(preq);
    last_check = 0;
    free(last_id);
    last_id = NULL;
    return;
    }

  if (last_id == NULL)
    {
    last_id = strdup(preq->rq_ind.rq_delete.rq_objname);
    last_check = time_now;
    }
  else if (strcmp(last_id, preq->rq_ind.rq_delete.rq_objname) != 0)
    {
    last_check = time_now;
    free(last_id);
    last_id = strdup(preq->rq_ind.rq_delete.rq_objname);
    }
  else if (time_now - last_check > 10)
    {
    int num_jobs;
    int num_prerun;
    job *pjob;

    num_jobs = 0;
    num_prerun = 0;

    for (i = 0; i < pa->ai_qs.array_size; i++)
      {
      if (pa->jobs[i] == NULL)
        continue;

      pjob = (job *)pa->jobs[i];

      num_jobs++;

      if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN)
        {
        num_prerun++;
        /* mom still hasn't gotten job?? delete anyway */

        if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
          {
          /* job has restart file at mom, do end job processing */

          change_restart_comment_if_needed(pjob);

          svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING);

          pjob->ji_momhandle = -1;

          /* force new connection */

          pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);

          if (pwtnew)
            {
            append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
            }

          }
        else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0)
          {
          /* job has staged-in file, should remove them */

          remove_stagein(pjob);

          job_abt(&pjob, NULL);
          }
        else
          {
          job_abt(&pjob, NULL);
          }

        }

      }

    if (num_jobs == num_prerun)
      {
      reply_ack(preq);
      free(last_id);
      last_id = NULL;
      return;
      }

    }



  req_deletearray(preq);


  }
int encode_arst(

  pbs_attribute  *attr,   /* I ptr to pbs_attribute to encode */
  tlist_head     *phead,  /* O ptr to head of attrlist list */
  char           *atname, /* I pbs_attribute name */
  char           *rsname, /* I resource name or NULL (optional) */
  int             mode,   /* I encode mode */
  int             perm)   /* only used for resources */

  {
  char     *end;
  int       i;
  int       j;
  svrattrl *pal;
  char     *pc;
  char     *pfrom;
  char      separator;

  if (attr == NULL)
    {
    /* FAILURE - invalid parameters */

    return(-2);
    }

  if (((attr->at_flags & ATR_VFLAG_SET) == 0) ||
      !attr->at_val.at_arst ||
      !attr->at_val.at_arst->as_usedptr)
    {
    /* SUCCESS - empty list */

    return(0); /* no values */
    }

  i = (int)(attr->at_val.at_arst->as_next - attr->at_val.at_arst->as_buf);

  if (mode == ATR_ENCODE_SAVE)
    {
    separator = '\n'; /* new-line for encode_acl  */

    /* allow one extra byte for final new-line */

    j = i + 1;
    }
  else
    {
    separator = ','; /* normally a comma is the separator */

    j = i;
    }

  /* how many back-slashes are required */

  for (pc = attr->at_val.at_arst->as_buf;pc < attr->at_val.at_arst->as_next;++pc)
    {
    if ((*pc == '"') || (*pc == '\'') || (*pc == ',') || (*pc == '\\') || (*pc == '\n'))
      ++j;
    }

  pal = attrlist_create(atname, rsname, j + 1);

  if (pal == NULL)
    {
    return(-1);
    }

  pal->al_flags = attr->at_flags;

  pc    = pal->al_value;
  pfrom = attr->at_val.at_arst->as_buf;

  /* replace nulls between sub-strings with separater characters */
  /* escape any embedded special character */

  end = attr->at_val.at_arst->as_next;

  while (pfrom < end)
    {
    switch (*pfrom)
      {
      case '\0':

        *pc = separator;

        break;

      case '"':
      case '\'':
      case ',':
      case '\\':
      case '\n':

        *pc++ = '\\';

      default:

        *pc = *pfrom;

        break;
      }

    pc++;

    pfrom++;
    }

  /* convert the last null to separator only if going to new-lines */

  if (mode == ATR_ENCODE_SAVE)
    *pc = '\0'; /* insure string terminator */
  else
    *(pc - 1) = '\0';

  append_link(phead, &pal->al_link, pal);

  return(1);
  }  /* END encode_arst() */
Example #15
0
/**
 * @brief
 * 	main - the initialization and main loop of pbs_daemon
 */
int
main(int argc, char *argv[])
{
	char		jobfile[MAXPATHLEN+1];
	char		jobfile_full[MAXPATHLEN+1];
	pbs_net_t	hostaddr = 0;
	int			port = -1;
	int			move_type = -1;
	pbs_list_head	attrl;
	enum conn_type  cntype = ToServerDIS;
	int		con = -1;
	char		*destin;
	int			encode_type;
	int			i;
	job			*jobp;
	char		 job_id[PBS_MAXSVRJOBID+1];
	attribute	*pattr;
	struct attropl  *pqjatr;      /* list (single) of attropl for quejob */
	char		 script_name[MAXPATHLEN+1];
	int			in_server = -1;
	char		*param_name, *param_val;
	char		buf[4096];
	struct  hostent *hp;
	struct in_addr  addr;
	char            *credbuf = NULL;
	size_t		credlen = 0;
	int 		prot = PROT_TCP;
	/*the real deal or output version and exit?*/

	execution_mode(argc, argv);

	/* If we are not run with real and effective uid of 0, forget it */

	pbs_loadconf(0);

	if (!isAdminPrivilege(getlogin())) {
		fprintf(stderr, "%s: Must be run by root\n", argv[0]);
		exit(SEND_JOB_FATAL);
	}

	/* initialize the pointers in the resource_def array */
	for (i = 0; i < (svr_resc_size - 1); ++i)
		svr_resc_def[i].rs_next = &svr_resc_def[i+1];
	/* last entry is left with null pointer */
	/* set single threaded mode */
	pbs_client_thread_set_single_threaded_mode();
	/* disable attribute verification */
	set_no_attribute_verification();

	/* initialize the thread context */
	if (pbs_client_thread_init_thread_context() != 0) {
		fprintf(stderr, "%s: Unable to initialize thread context\n",
			argv[0]);
		exit(SEND_JOB_FATAL);
	}

	if(set_msgdaemonname("PBS_send_job")) {
		fprintf(stderr, "Out of memory\n");
		return 1;
	}

	winsock_init();

	connection_init();

	while (fgets(buf, sizeof(buf), stdin) != NULL) {
		buf[strlen(buf)-1] = '\0';	/* gets rid of newline */

		param_name = buf;
		param_val = strchr(buf, '=');
		if (param_val) {
			*param_val = '\0';
			param_val++;
		} else {	/* bad param_val -- skipping */
			break;
		}

		if (strcmp(param_name, "jobfile") == 0) {
			jobfile[0] = '\0';
			strncpy(jobfile, param_val, MAXPATHLEN);
		} else if (strcmp(param_name, "destaddr") == 0) {
			hostaddr = atol(param_val);
		} else if (strcmp(param_name, "destport") == 0) {
			port = atoi(param_val);
		} else if (strcmp(param_name, "move_type") == 0) {
			move_type = atoi(param_val);
		} else if (strcmp(param_name, "in_server") == 0) {
			in_server = atoi(param_val);
		} else if (strcmp(param_name, "server_name") == 0) {
			server_name[0] = '\0';
			strncpy(server_name, param_val, PBS_MAXSERVERNAME);
		} else if (strcmp(param_name, "server_host") == 0) {
			server_host[0] = '\0';
			strncpy(server_host, param_val, (sizeof(server_host) - 1));
		} else if (strcmp(param_name, "server_addr") == 0) {
			pbs_server_addr = atol(param_val);
		} else if (strcmp(param_name, "server_port") == 0) {
			pbs_server_port_dis = atoi(param_val);
		} else if (strcmp(param_name, "log_file") == 0) {
			log_file = strdup(param_val);
		} else if (strcmp(param_name, "path_log") == 0) {
			path_log[0] = '\0';
			strncpy(path_log, param_val, MAXPATHLEN);
		} else if (strcmp(param_name, "path_jobs") == 0) {
			path_jobs = strdup(param_val);
		} else if (strcmp(param_name, "path_spool") == 0) {
			path_spool = strdup(param_val);
		} else if (strcmp(param_name, "path_rescdef") == 0) {
			path_rescdef = strdup(param_val);
		} else if (strcmp(param_name, "path_users") == 0) {
			path_users = strdup(param_val);
		} else if (strcmp(param_name, "path_hooks_workdir") == 0) {
			path_hooks_workdir = strdup(param_val);
			if (path_hooks_workdir == NULL)
				exit(SEND_JOB_FATAL);
		} else if (strcmp(param_name, "svr_history_enable") == 0) {
			svr_history_enable = atol(param_val);
		} else if (strcmp(param_name, "svr_history_duration") == 0) {
			svr_history_duration = atol(param_val);
		} else if (strcmp(param_name, "single_signon_password_enable") == 0) {
			if (decode_b(&server.sv_attr[(int)SRV_ATR_ssignon_enable],
				NULL, NULL, param_val) != 0) {
				fprintf(stderr, "%s: failed to set ssignon_password_enable\n", argv[0]);
				exit(SEND_JOB_FATAL);
			}
		} else if (strcmp(param_name, "script_name") == 0) {
			strncpy(script_name, param_val, MAXPATHLEN + 1);
		} else
			break;
	}

	time(&time_now);

	(void)log_open_main(log_file, path_log, 1); /* silent open */

	if (setup_resc(1) == -1) {
		/* log_buffer set in setup_resc */
		log_err(-1, "pbsd_send_job(setup_resc)", log_buffer);
		return (-1);
	}

	if( strlen(jobfile) == 0 || hostaddr == 0 || port == 0 ||  move_type == -1 || \
			in_server == -1 || strlen(server_name) == 0 || strlen(server_host) == 0 || \
			pbs_server_addr == 0 || pbs_server_port_dis == 0 || \
			strlen(path_log) == 0 || path_jobs == NULL || \
			path_spool == NULL || path_users == NULL ) {
		log_err(-1, "pbs_send_job", "error on one of the parameters");
		log_close(0);	/* silent close */
		exit(SEND_JOB_FATAL);
	}

	CLEAR_HEAD(task_list_immed);
	CLEAR_HEAD(task_list_timed);
	CLEAR_HEAD(task_list_event);
	CLEAR_HEAD(svr_queues);
	CLEAR_HEAD(svr_alljobs);
	CLEAR_HEAD(svr_newjobs);
	CLEAR_HEAD(svr_allresvs);
	CLEAR_HEAD(svr_newresvs);
	CLEAR_HEAD(svr_deferred_req);
	CLEAR_HEAD(svr_unlicensedjobs);

	strcpy(jobfile_full, path_jobs);
	strcat(jobfile_full, jobfile);

	if (chk_save_file(jobfile_full) != 0) {
		sprintf(log_buffer, "Error opening jobfile=%s", jobfile);
		log_err(-1, __func__, log_buffer);
		goto fatal_exit;
	}

	if ((jobp=job_recov_fs(jobfile, RECOV_SUBJOB)) == NULL) {
		sprintf(log_buffer, "Failed to recreate job in jobfile=%s", jobfile);
		log_err(-1, __func__, log_buffer);
		goto fatal_exit;
	}

	/* now delete the temp job file that was created by job_save_fs in server code
	 * jobs are in database now, no need to keep in filesystem
	 */
	unlink(jobfile_full);

	if (in_server)
		append_link(&svr_alljobs, &jobp->ji_alljobs, jobp);


	/* select attributes/resources to send based on move type */

	if (move_type == MOVE_TYPE_Exec) {
		resc_access_perm = ATR_DFLAG_MOM;
		encode_type = ATR_ENCODE_MOM;
		cntype = ToServerDIS;
	} else {
		resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR |
			ATR_DFLAG_MGWR | ATR_DFLAG_SvRD;
		encode_type = ATR_ENCODE_SVR;
		svr_dequejob(jobp);
	}

	CLEAR_HEAD(attrl);
	pattr = jobp->ji_wattr;
	for (i=0; i < (int)JOB_ATR_LAST; i++) {
		if ((job_attr_def+i)->at_flags & resc_access_perm) {
			(void)(job_attr_def+i)->at_encode(pattr+i, &attrl,
				(job_attr_def+i)->at_name, NULL,
				encode_type, NULL);
		}
	}
	attrl_fixlink(&attrl);

	/* script name is passed from parent */

	/* get host name */
	pbs_loadconf(0);
	addr.s_addr = htonl(hostaddr);
	hp = gethostbyaddr((void *)&addr, sizeof(struct in_addr), AF_INET);
	if (hp == NULL) {
		sprintf(log_buffer, "%s: h_errno=%d",
			inet_ntoa(addr), h_errno);
		log_err(-1, __func__, log_buffer);
	}
	else {
		/* read any credential file */
		(void)get_credential(hp->h_name, jobp,  PBS_GC_BATREQ,
			&credbuf, &credlen);
	}
	/* save the job id for when after we purge the job */

	(void)strcpy(job_id, jobp->ji_qs.ji_jobid);

	con = -1;

	DIS_tcparray_init();

	for (i=0; i<RETRY; i++) {

		pbs_errno = 0;
		/* connect to receiving server with retries */

		if (i > 0) {	/* recycle after an error */
			if (con >= 0)
				svr_disconnect(con);
			if (should_retry_route(pbs_errno) == -1) {
				goto fatal_exit;	/* fatal error, don't retry */
			}
			sleep(1<<i);
		}
		if ((con = svr_connect(hostaddr, port, 0, cntype, prot)) ==
			PBS_NET_RC_FATAL) {
			(void)sprintf(log_buffer, "send_job failed to %lx port %d",
				hostaddr, port);
			log_err(pbs_errno, __func__, log_buffer);
			goto fatal_exit;
		} else if (con == PBS_NET_RC_RETRY) {
			pbs_errno = WSAECONNREFUSED;	/* should retry */
			continue;
		}
		/*
		 * if the job is substate JOB_SUBSTATE_TRNOUTCM which means
		 * we are recovering after being down or a late failure, we
		 * just want to send the "read-to-commit/commit"
		 */


		if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) {

			if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) {
				jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT;
			}

			pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl;
			destin = jobp->ji_qs.ji_destin;

			if (PBSD_queuejob(con, jobp->ji_qs.ji_jobid, destin, pqjatr, NULL, prot, NULL)== 0) {
				if (pbs_errno == PBSE_JOBEXIST && move_type == MOVE_TYPE_Exec) {
					/* already running, mark it so */
					log_event(PBSEVENT_ERROR,
						PBS_EVENTCLASS_JOB, LOG_INFO,
						jobp->ji_qs.ji_jobid, "Mom reports job already running");
					goto ok_exit;

				} else if ((pbs_errno == PBSE_HOOKERROR) ||
					(pbs_errno == PBSE_HOOK_REJECT)  ||
					(pbs_errno == PBSE_HOOK_REJECT_RERUNJOB)  ||
					(pbs_errno == PBSE_HOOK_REJECT_DELETEJOB)) {
					char		name_buf[MAXPATHLEN+1];
					int		rfd;
					int		len;
					char		*reject_msg;
					int		err;

					err = pbs_errno;

					reject_msg = pbs_geterrmsg(con);
					(void)snprintf(log_buffer, sizeof(log_buffer),
						"send of job to %s failed error = %d reject_msg=%s",
						destin, err,
						reject_msg?reject_msg:"");
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
						LOG_INFO, jobp->ji_qs.ji_jobid,
						log_buffer);

					(void)strcpy(name_buf, path_hooks_workdir);
					(void)strcat(name_buf, jobp->ji_qs.ji_jobid);
					(void)strcat(name_buf, HOOK_REJECT_SUFFIX);

					if ((reject_msg != NULL) &&
						(reject_msg[0] != '\0')) {

						if ((rfd = open(name_buf,
							O_RDWR|O_CREAT|O_TRUNC, 0600)) == -1) {
							snprintf(log_buffer,
								sizeof(log_buffer),
								"open of reject file %s failed: errno %d",
								name_buf, errno);
							log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
						} else {
							secure_file(name_buf, "Administrators",
								READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
							setmode(rfd, O_BINARY);
							len = strlen(reject_msg)+1;
							/* write also trailing null char */
							if (write(rfd, reject_msg, len) != len) {
								snprintf(log_buffer,
									sizeof(log_buffer),
									"write to file %s incomplete: errno %d", name_buf, errno);
								log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
							}
							close(rfd);
						}
					}

					if (err == PBSE_HOOKERROR)
						exit(SEND_JOB_HOOKERR);
					if (err == PBSE_HOOK_REJECT)
						exit(SEND_JOB_HOOK_REJECT);
					if (err == PBSE_HOOK_REJECT_RERUNJOB)
						exit(SEND_JOB_HOOK_REJECT_RERUNJOB);
					if (err == PBSE_HOOK_REJECT_DELETEJOB)
						exit(SEND_JOB_HOOK_REJECT_DELETEJOB);
				} else {
					(void)sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno);
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
					continue;
				}
			}

			if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				if (PBSD_jscript(con, script_name, prot, NULL) != 0)
					continue;
			}

			if (credlen > 0) {
				int     ret;

				ret = PBSD_jcred(con,
					jobp->ji_extended.ji_ext.ji_credtype,
					credbuf, credlen, prot, NULL);
				if ((ret == 0) || (i == (RETRY - 1)))
					free(credbuf);	/* free credbuf if credbuf is sent successfully OR */
				/* at the end of all retry attempts */
				if (ret != 0)
					continue;
			}

			if ((move_type == MOVE_TYPE_Exec) &&
				(jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) &&
				(hostaddr !=  pbs_server_addr)) {
				/* send files created on prior run */
				if ((move_job_file(con, jobp, StdOut, prot) != 0) ||
					(move_job_file(con, jobp, StdErr, prot) != 0) ||
					(move_job_file(con, jobp, Chkpt, prot) != 0))
					continue;
			}

			jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM;
		}

		if (PBSD_rdytocmt(con, job_id, prot, NULL) != 0)
			continue;

		if (PBSD_commit(con, job_id, prot, NULL) != 0)
			goto fatal_exit;
		goto ok_exit;	/* This child process is all done */
	}
	if (con >= 0)
		svr_disconnect(con);
	/*
	 * If connection is actively refused by the execution node(or mother superior) OR
	 * the execution node(or mother superior) is rejecting request with error
	 * PBSE_BADHOST(failing to authorize server host), the node should be marked down.
	 */
	if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == WSAECONNREFUSED || pbs_errno == PBSE_BADHOST)) {
		i = SEND_JOB_NODEDW;
	} else if (should_retry_route(pbs_errno) == -1) {
		i = SEND_JOB_FATAL;
	} else {
		i = SEND_JOB_RETRY;
	}
	(void)sprintf(log_buffer, "send_job failed with error %d", pbs_errno);
	log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_NOTICE,
		jobp->ji_qs.ji_jobid, log_buffer);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(i);

fatal_exit:
	if (con >= 0)
		svr_disconnect(con);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(SEND_JOB_FATAL);

ok_exit:
	if (con >= 0)
		svr_disconnect(con);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(SEND_JOB_OK);
}
Example #16
0
static void
svrcached(attribute *pat, pbs_list_head *phead, attribute_def *pdef)
{
	svrattrl *working = NULL;
	svrattrl *wcopy;
	svrattrl *encoded;

	if (resc_access_perm & PRIV_READ)
		encoded = pat->at_priv_encoded;
	else
		encoded = pat->at_user_encoded;

	if (pat->at_flags & ATR_VFLAG_MODCACHE)
		/* free old cache value if the value has changed */
		free_svrcache(pat);

	if ((encoded == NULL) || (pat->at_flags & ATR_VFLAG_MODCACHE)) {
		if (pat->at_flags & ATR_VFLAG_SET) {
			/* encode and cache new svrattrl structure */
			(void)pdef->at_encode(pat, phead, pdef->at_name,
				(char *)0, ATR_ENCODE_CLIENT, &working);
			if (resc_access_perm & PRIV_READ)
				pat->at_priv_encoded = working;
			else
				pat->at_user_encoded = working;

			pat->at_flags &= ~ATR_VFLAG_MODCACHE;
			while (working) {
				working->al_refct++;	/* incr ref count */
				working = working->al_sister;
			}
		}
	} else {
		/* can use the existing cached svrattrl struture */

		working = encoded;
		if (working->al_refct < 2) {
			while (working) {
				CLEAR_LINK(working->al_link);
				append_link(phead, &working->al_link, working);
				working->al_refct++;	/* incr ref count */
				working = working->al_sister;
			}
		} else {
			/*
			 * already linked in, must make a copy to link
			 * NOTE: the copy points to the original's data
			 * so it should be freed by itself, hence the
			 * ref count is set to 1 and the sisters are not
			 * linked in
			 */
			while (working) {
				wcopy = malloc(sizeof(struct svrattrl));
				if (wcopy) {
					*wcopy = *working;
					working = working->al_sister;
					CLEAR_LINK(wcopy->al_link);
					append_link(phead, &wcopy->al_link, wcopy);
					wcopy->al_refct = 1;
					wcopy->al_sister = NULL;
				}
			}
		}
	}
}
Example #17
0
void req_quejob(

  struct batch_request *preq) /* ptr to the decoded request   */

  {
  char  *id = "req_quejob";

  char   basename[PBS_JOBBASE + 1];
  int    created_here = 0;
  int    index;
  char  *jid;
  attribute_def *pdef;
  job   *pj;
  svrattrl *psatl;
  int    rc;
  int    sock = preq->rq_conn;

  int    IsCheckpoint = 0;

  /* set basic (user) level access permission */

  resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_Creat;

  if (PBSNodeCheckProlog)
    {
    check_state(1);

    mom_server_all_update_stat();

    if (internal_state & INUSE_DOWN)
      {
      req_reject(PBSE_MOMREJECT,0,preq,NULL,NULL);

      return;
      }
    }

  if (preq->rq_fromsvr)
    {
    /* from another server - accept the extra attributes */

    resc_access_perm |= ATR_DFLAG_MGWR | ATR_DFLAG_SvWR | ATR_DFLAG_MOM;

    jid = preq->rq_ind.rq_queuejob.rq_jid;
    }
  else
    {
    /* request must be from server */

    log_err(errno, id, "request not from server");

    req_reject(PBSE_IVALREQ, 0, preq, NULL, "request not received from server");

    return;
    }

  /* does job already exist, check both old and new jobs */

  if ((pj = find_job(jid)) == NULL)
    {
    pj = (job *)GET_NEXT(svr_newjobs);

    while (pj != NULL)
      {
      if (!strcmp(pj->ji_qs.ji_jobid, jid))
        break;

      pj = (job *)GET_NEXT(pj->ji_alljobs);
      }
    }

  /*
   * New job ...
   *
   * for MOM - rather than make up a hashname, we use the name sent
   * to us by the server as an attribute.
   */

  psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_queuejob.rq_attr);

  while (psatl != NULL)
    {
    if (!strcmp(psatl->al_name,ATTR_hashname))
      {
      strcpy(basename,psatl->al_value);

      break;
      }

    psatl = (svrattrl *)GET_NEXT(psatl->al_link);
    }

  if (pj != NULL)
    {
    /* newly queued job already exists */

    if (pj->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING)
      {
      /* FAILURE - job exists and is running */

      log_err(errno,id,"cannot queue new job, job exists and is running");

      req_reject(PBSE_JOBEXIST,0,preq,NULL,"job is running");

      return;
      }

    /* if checkpointed, then keep old and skip rest of process */

    if (pj->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE)
      {
      IsCheckpoint = 1;
      }  /* END if (pj->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) */
    else
      {
      /* unlink job from svr_alljobs since it will be placed on newjobs */

      delete_link(&pj->ji_alljobs);
      }
    }  /* END if (pj != NULL) */
  else
    {
    /* if not already here, allocate job struct */

    if ((pj = job_alloc()) == NULL)
      {
      /* FAILURE */

      req_reject(PBSE_SYSTEM, 0, preq, NULL, "cannot allocate new job structure");

      return;
      }
    }    /* END else (pj != NULL) */

  if (IsCheckpoint == 0)
    {
    strcpy(pj->ji_qs.ji_jobid,jid);

    strcpy(pj->ji_qs.ji_fileprefix,basename);

    pj->ji_modified       = 1;

    pj->ji_qs.ji_svrflags = created_here;

    pj->ji_qs.ji_un_type  = JOB_UNION_TYPE_NEW;
    }

  /* decode attributes from request into job structure */

  psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_queuejob.rq_attr);

  while (psatl != NULL)
    {
    if (IsCheckpoint == 1)
      {
      if (strcmp(psatl->al_name,ATTR_checkpoint_name) &&
          strcmp(psatl->al_name,ATTR_v))
        {
        psatl = (svrattrl *)GET_NEXT(psatl->al_link);

        continue;
        }
      }

    /* identify the attribute by name */

    index = find_attr(job_attr_def,psatl->al_name,JOB_ATR_LAST);

    if (index < 0)
      {
      /* FAILURE */

      /* didn`t recognize the name */

      job_purge(pj);   /* CRI - 12/20/2004 */

      reply_badattr(PBSE_NOATTR,1,psatl,preq);

      return;
      }

    pdef = &job_attr_def[index];

    /* Is attribute not writeable by manager or by a server? */

    if ((pdef->at_flags & resc_access_perm) == 0)
      {
      /* FAILURE */

      job_purge(pj);

      reply_badattr(PBSE_ATTRRO,1,psatl,preq);

      return;
      }

    /* decode attribute */

    if (!strcmp(psatl->al_name,ATTR_v))
      {
      rc = decode_arst_merge(
             &pj->ji_wattr[index],
             psatl->al_name,
             psatl->al_resc,
             psatl->al_value);
      }
    else
      {
      rc = pdef->at_decode(
             &pj->ji_wattr[index],
             psatl->al_name,
             psatl->al_resc,
             psatl->al_value);
      }

    if (rc != 0)
      {
      /* FAILURE */

      /* all errors are fatal for MOM */

      job_purge(pj);

      reply_badattr(rc,1,psatl,preq);

      return;
      }

    if (psatl->al_op == DFLT)
      {
      if (psatl->al_resc)
        {
        resource     *presc;
        resource_def *prdef;

        prdef = find_resc_def(svr_resc_def,psatl->al_resc,svr_resc_size);

        if (prdef == NULL)
          {
          job_purge(pj);

          reply_badattr(rc,1,psatl, preq);

          return;
          }

        presc = find_resc_entry(&pj->ji_wattr[index],prdef);

        if (presc != NULL)
          presc->rs_value.at_flags |= ATR_VFLAG_DEFLT;
        }
      else
        {
        pj->ji_wattr[index].at_flags |= ATR_VFLAG_DEFLT;
        }
      }    /* END if (psatl->al_op == DFLT) */

    psatl = (svrattrl *)GET_NEXT(psatl->al_link);
    }      /* END while (psatl != NULL) */

  if (IsCheckpoint == 1)
    {
    pj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSIN;

    if (reply_jobid(preq,pj->ji_qs.ji_jobid,BATCH_REPLY_CHOICE_Queue) == 0)
      {
      delete_link(&pj->ji_alljobs);

      append_link(&svr_newjobs,&pj->ji_alljobs,pj);

      pj->ji_qs.ji_un_type = JOB_UNION_TYPE_NEW;
      pj->ji_qs.ji_un.ji_newt.ji_fromsock = sock;
      pj->ji_qs.ji_un.ji_newt.ji_fromaddr = get_connectaddr(sock);
      pj->ji_qs.ji_un.ji_newt.ji_scriptsz = 0;

      /* Per Eric R., req_mvjobfile was giving error in open_std_file, 
         showed up as fishy error message */

      if (pj->ji_grpcache != NULL)
        {
        free(pj->ji_grpcache);
        pj->ji_grpcache = NULL;
        }
      }
    else
      {
      close_conn(sock);
      }

    /* SUCCESS */

    return;
    }

  /* set remaining job structure elements */

  pj->ji_qs.ji_state =    JOB_STATE_TRANSIT;

  pj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSIN;

  pj->ji_wattr[(int)JOB_ATR_mtime].at_val.at_long = (long)time_now;

  pj->ji_wattr[(int)JOB_ATR_mtime].at_flags |= ATR_VFLAG_SET;

  pj->ji_qs.ji_un_type = JOB_UNION_TYPE_NEW;

  pj->ji_qs.ji_un.ji_newt.ji_fromsock = sock;

  pj->ji_qs.ji_un.ji_newt.ji_fromaddr = get_connectaddr(sock);

  pj->ji_qs.ji_un.ji_newt.ji_scriptsz = 0;

  /* acknowledge the request with the job id */

  if (reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_Queue) != 0)
    {
    /* reply failed, purge the job and close the connection */

    close_conn(sock);

    job_purge(pj);

    return;
    }

  /* link job into server's new jobs list request  */

  append_link(&svr_newjobs, &pj->ji_alljobs, pj);

  return;
  }  /* END req_quejob() */
Example #18
0
int send_job(

  job       *jobp,
  pbs_net_t  hostaddr, /* host address, host byte order */
  int        port, /* service port, host byte order */
  int        move_type, /* move, route, or execute */
  void (*post_func)(struct work_task *),     /* after move */
  void      *data)  /* ptr to optional batch_request to be put */
                    /* in the work task structure */

  {
  tlist_head  attrl;
  enum conn_type cntype = ToServerDIS;
  int    con;
  char  *destin = jobp->ji_qs.ji_destin;
  int    encode_type;
  int    i;
  int    NumRetries;

  char  *id = "send_job";

  attribute *pattr;

  pid_t  pid;

  struct attropl *pqjatr;      /* list (single) of attropl for quejob */
  char  *safail = "sigaction failed\n";
  char  *spfail = "sigprocmask failed\n";
  char   script_name[MAXPATHLEN + 1];
  sigset_t  child_set, all_set;

  struct  sigaction child_action;

  struct work_task *ptask;

  mbool_t        Timeout = FALSE;

  char          *pc;

  sigemptyset(&child_set);
  sigaddset(&child_set, SIGCHLD);
  sigfillset(&all_set);

  /* block SIGCHLD until work task is established */

  if (sigprocmask(SIG_BLOCK, &child_set, NULL) == -1)
    {
    log_err(errno,id,spfail);

    pbs_errno = PBSE_SYSTEM;

    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      "cannot set signal mask");

    return(-1);
    }

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer,"about to send job - type=%d",
      move_type);
 
    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      "forking in send_job");
    }

  pid = fork();

  if (pid == -1)
    {
    /* error on fork */

    log_err(errno, id, "fork failed\n");

    if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1)
      log_err(errno, id, spfail);

    pbs_errno = PBSE_SYSTEM;

    return(-1);
    }

  if (pid != 0)
    {
    /* The parent (main server) */

    /* create task to monitor job startup */

    /* CRI:   need way to report to scheduler job is starting, not started */

    ptask = set_task(WORK_Deferred_Child, pid, post_func, jobp);

    if (ptask == NULL)
      {
      log_err(errno, id, msg_err_malloc);

      return(-1);
      }

    ptask->wt_parm2 = data;

    append_link(
      &((job *)jobp)->ji_svrtask,
      &ptask->wt_linkobj,
      ptask);

    /* now can unblock SIGCHLD */

    if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1)
      log_err(errno, id, spfail);

    if (LOGLEVEL >= 1)
      {
      extern long   DispatchTime[];
      extern job   *DispatchJob[];
      extern char  *DispatchNode[];

      extern time_t time_now;

      struct pbsnode *NP;

      /* record job dispatch time */

      int jindex;

      for (jindex = 0;jindex < 20;jindex++)
        {
        if (DispatchJob[jindex] == NULL)
          {
          DispatchTime[jindex] = time_now;

          DispatchJob[jindex] = jobp;

          if ((NP = PGetNodeFromAddr(hostaddr)) != NULL)
            DispatchNode[jindex] = NP->nd_name;
          else
            DispatchNode[jindex] = NULL;

          break;
          }
        }
      }

    /* SUCCESS */

    return(2);
    }  /* END if (pid != 0) */

  /*
   * the child process
   *
   * set up signal catcher for error return
   */

  rpp_terminate();

  child_action.sa_handler = net_move_die;

  sigfillset(&child_action.sa_mask);

  child_action.sa_flags = 0;

  if (sigaction(SIGHUP, &child_action, NULL))
    log_err(errno, id, safail);

  if (sigaction(SIGINT, &child_action, NULL))
    log_err(errno, id, safail);

  if (sigaction(SIGQUIT, &child_action, NULL))
    log_err(errno, id, safail);

  /* signal handling is set, now unblock */

  if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1)
    log_err(errno, id, spfail);

  /* encode job attributes to be moved */

  CLEAR_HEAD(attrl);

  /* select attributes/resources to send based on move type */

  if (move_type == MOVE_TYPE_Exec)
    {
    /* moving job to MOM - ie job start */

    resc_access_perm = ATR_DFLAG_MOM;
    encode_type = ATR_ENCODE_MOM;
    cntype = ToServerDIS;
    }
  else
    {
    /* moving job to alternate server? */

    resc_access_perm =
      ATR_DFLAG_USWR |
      ATR_DFLAG_OPWR |
      ATR_DFLAG_MGWR |
      ATR_DFLAG_SvRD;

    encode_type = ATR_ENCODE_SVR;

    /* clear default resource settings */

    svr_dequejob(jobp);
    }

  pattr = jobp->ji_wattr;

  for (i = 0;i < JOB_ATR_LAST;i++)
    {
    if (((job_attr_def + i)->at_flags & resc_access_perm) ||
      ((strncmp((job_attr_def + i)->at_name,"session_id",10) == 0) &&
      (jobp->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET)))
      {
      (job_attr_def + i)->at_encode(
        pattr + i,
        &attrl,
        (job_attr_def + i)->at_name,
        NULL,
        encode_type);
      }
    }    /* END for (i) */

  attrl_fixlink(&attrl);

  /* put together the job script file name */

  strcpy(script_name, path_jobs);

  if (jobp->ji_wattr[JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET)
    {
    strcat(script_name, jobp->ji_arraystruct->ai_qs.fileprefix);
    }
  else
    {
    strcat(script_name, jobp->ji_qs.ji_fileprefix);
    }

  strcat(script_name, JOB_SCRIPT_SUFFIX);

 
  pbs_errno = 0;
  con = -1;

  for (NumRetries = 0;NumRetries < RETRY;NumRetries++)
    {
    int rc;

    /* connect to receiving server with retries */

    if (NumRetries > 0)
      {
      /* recycle after an error */

      if (con >= 0)
        svr_disconnect(con);

      /* check pbs_errno from previous attempt */

      if (should_retry_route(pbs_errno) == -1)
        {
        sprintf(log_buffer, "child failed in previous commit request for job %s",
                jobp->ji_qs.ji_jobid);

        log_err(pbs_errno, id, log_buffer);

        exit(1); /* fatal error, don't retry */
        }

      sleep(1 << NumRetries);
      }

    /* NOTE:  on node hangs, svr_connect is successful */

    if ((con = svr_connect(hostaddr, port, 0, cntype)) == PBS_NET_RC_FATAL)
      {
      sprintf(log_buffer, "send_job failed to %lx port %d",
        hostaddr,
        port);

      log_err(pbs_errno, id, log_buffer);

      exit(1);
      }

    if (con == PBS_NET_RC_RETRY)
      {
      pbs_errno = 0; /* should retry */

      continue;
      }

    /*
     * if the job is substate JOB_SUBSTATE_TRNOUTCM which means
     * we are recovering after being down or a late failure, we
     * just want to send the "ready-to-commit/commit"
     */

    if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM)
      {
      if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT)
        {
        jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT;

        job_save(jobp, SAVEJOB_QUICK, 0);
        }

      pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl;

      if ((pc = PBSD_queuejob(
                  con,
                  jobp->ji_qs.ji_jobid,
                  destin,
                  pqjatr,
                  NULL)) == NULL)
        {
        if ((pbs_errno == PBSE_EXPIRED) || (pbs_errno == PBSE_READ_REPLY_TIMEOUT))
          {
          /* queue job timeout based on pbs_tcp_timeout */

          Timeout = TRUE;
          }

        if ((pbs_errno == PBSE_JOBEXIST) && (move_type == MOVE_TYPE_Exec))
          {
          /* already running, mark it so */

          log_event(
            PBSEVENT_ERROR,
            PBS_EVENTCLASS_JOB,
            jobp->ji_qs.ji_jobid,
            "MOM reports job already running");

          exit(0);
          }

        sprintf(log_buffer, "send of job to %s failed error = %d",
          destin,
          pbs_errno);

        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          jobp->ji_qs.ji_jobid,
          log_buffer);

        continue;
        }  /* END if ((pc = PBSD_queuejob() == NULL) */

      free(pc);

      if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT)
        {
        if (PBSD_jscript(con, script_name, jobp->ji_qs.ji_jobid) != 0)
          continue;
        }

      /* XXX may need to change the logic below, if we are sending the job to
         a mom on the same host and the mom and server are not sharing the same
         spool directory, then we still need to move the file */

      if ((move_type == MOVE_TYPE_Exec) &&
          (jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) &&
          (hostaddr != pbs_server_addr))
        {
        /* send files created on prior run */

        if ((move_job_file(con,jobp,StdOut) != 0) ||
            (move_job_file(con,jobp,StdErr) != 0) ||
            (move_job_file(con,jobp,Checkpoint) != 0))
          {
          continue;
          }
        }

      /* ignore signals */

      if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1)
        log_err(errno, id, "sigprocmask\n");

      jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM;

      job_save(jobp, SAVEJOB_QUICK, 0);
      }
    else
      {
      /* ignore signals */

      if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1)
        log_err(errno, id, "sigprocmask\n");
      }

    if (PBSD_rdytocmt(con, jobp->ji_qs.ji_jobid) != 0)
      {
      if (sigprocmask(SIG_UNBLOCK, &all_set, NULL) == -1)
        log_err(errno, id, "sigprocmask\n");

      continue;
      }

   
    if ((rc = PBSD_commit(con, jobp->ji_qs.ji_jobid)) != 0)
      {
      int errno2;

      /* NOTE:  errno is modified by log_err */

      errno2 = errno;

      sprintf(log_buffer, "send_job commit failed, rc=%d (%s)",
              rc,
              (connection[con].ch_errtxt != NULL) ? connection[con].ch_errtxt : "N/A");

      log_ext(errno2, id, log_buffer, LOG_WARNING);

      /* if failure occurs, pbs_mom should purge job and pbs_server should set *
         job state to idle w/error msg */

      if (errno2 == EINPROGRESS)
        {
        /* request is still being processed */

        /* increase tcp_timeout in qmgr? */

        Timeout = TRUE;

        /* do we need a continue here? */

        sprintf(log_buffer, "child commit request timed-out for job %s, increase tcp_timeout?",
                jobp->ji_qs.ji_jobid);

        log_ext(errno2, id, log_buffer, LOG_WARNING);

        /* don't retry on timeout--break out and report error! */

        break;
        }
      else
        {
        sprintf(log_buffer, "child failed in commit request for job %s",
                jobp->ji_qs.ji_jobid);

        log_ext(errno2, id, log_buffer, LOG_CRIT);

        /* FAILURE */

        exit(1);
        }
      }    /* END if ((rc = PBSD_commit(con,jobp->ji_qs.ji_jobid)) != 0) */

    svr_disconnect(con);

    /* child process is done */

    /* SUCCESS */

    exit(0);
    }  /* END for (NumRetries) */

  if (con >= 0)
    svr_disconnect(con);

  if (Timeout == TRUE)
    {
    /* 10 indicates that job migrate timed out, server will mark node down *
          and abort the job - see post_sendmom() */

    sprintf(log_buffer, "child timed-out attempting to start job %s",
            jobp->ji_qs.ji_jobid);

    log_ext(pbs_errno, id, log_buffer, LOG_WARNING);

    exit(10);
    }

  if (should_retry_route(pbs_errno) == -1)
    {
    sprintf(log_buffer, "child failed and will not retry job %s",
      jobp->ji_qs.ji_jobid);

    log_err(pbs_errno, id, log_buffer);

    exit(1);
    }

  exit(2);

  /*NOTREACHED*/

  return(0);
  }  /* END send_job() */
Example #19
0
void
req_register(struct batch_request *preq)
{
	int		   made;
	attribute	  *pattr;
	struct depend	  *pdep;
	struct depend_job *pdj;
	job		  *pjob;
	char		  *ps;
	struct work_task  *ptask;
	int		   rc = 0;
	int		   revtype;
	int		   type;
	int		   savetype = SAVEJOB_FULL;

	/*  make sure request is from a server */

	if (!preq->rq_fromsvr) {
#ifdef NAS /* localmod 109 */
		sprintf(log_buffer, "Dependency request not from server");
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
			preq->rq_ind.rq_register.rq_parent, log_buffer);
#endif /* localmod 109 */
		req_reject(PBSE_IVALREQ, 0, preq);
		return;
	}

	/* find the "parent" job specified in the request */

	if ((pjob = find_job(preq->rq_ind.rq_register.rq_parent)) == NULL) {

		/*
		 * job not found... if server is initializing, it may not
		 * yet recovered, that is not an error.
		 */

		if (server.sv_attr[(int)SRV_ATR_State].at_val.at_long != SV_STATE_INIT) {
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
				preq->rq_ind.rq_register.rq_parent,
				msg_unkjobid);
			req_reject(PBSE_UNKJOBID, 0, preq);
		} else {
			reply_ack(preq);
		}
		return;
	}

	pattr = &pjob->ji_wattr[(int)JOB_ATR_depend];
	type = preq->rq_ind.rq_register.rq_dependtype;
	pjob->ji_modified = 1;

	/* more of the server:port fix kludge */

	ps = strchr(preq->rq_ind.rq_register.rq_child, (int)'@');
	if (ps != NULL) {
		(void)strcpy(preq->rq_ind.rq_register.rq_svr, ps+1);
		*ps = '\0';
	} else {
		(void)strcpy(preq->rq_ind.rq_register.rq_svr, preq->rq_host);
	}

	if (pjob->ji_qs.ji_state == JOB_STATE_MOVED) {
		snprintf(log_buffer, sizeof(log_buffer), "Parent %s%s", msg_movejob, pjob->ji_qs.ji_destin);
		log_event(PBSEVENT_DEBUG|PBSEVENT_SYSTEM|PBSEVENT_ERROR,
			PBS_EVENTCLASS_REQUEST, LOG_INFO,
			preq->rq_ind.rq_register.rq_child, log_buffer);
		req_reject(PBSE_JOB_MOVED, 0, preq);
		return;
	}
	switch (preq->rq_ind.rq_register.rq_op) {

			/*
			 * Register a dependency
			 */

		case JOB_DEPEND_OP_REGISTER:
			switch (type) {

				case JOB_DEPEND_TYPE_AFTERSTART:
					if (pjob->ji_qs.ji_substate >= JOB_SUBSTATE_RUNNING) {
						/* job already running, setup task to send	*/
						/* release back to child and continue with	*/
						/* registration process 			*/
						ptask = set_task(WORK_Immed, 0, post_run_depend,
							(void *)pjob);
						if (ptask)
							append_link(&pjob->ji_svrtask,
								&ptask->wt_linkobj, ptask);
					}
					/* fall through to complete registration */
				case JOB_DEPEND_TYPE_AFTERANY:
				case JOB_DEPEND_TYPE_AFTEROK:
				case JOB_DEPEND_TYPE_AFTERNOTOK:
					rc = register_dep(pattr, preq, type, &made);
					break;

				case JOB_DEPEND_TYPE_BEFORESTART:
				case JOB_DEPEND_TYPE_BEFOREANY:
				case JOB_DEPEND_TYPE_BEFOREOK:
				case JOB_DEPEND_TYPE_BEFORENOTOK:

					/*
					 * Check job owner for permission, use the real
					 * job owner, not the sending server's name.
					 */

					(void)strcpy(preq->rq_user,
						preq->rq_ind.rq_register.rq_owner);
					if (svr_chk_owner(preq, pjob)) {
						rc = PBSE_PERM;		/* not same user */
					} else {
						/* ok owner, see if job has "on" */
						pdep = find_depend(JOB_DEPEND_TYPE_ON, pattr);
						if (pdep == 0) {
							/* on "on", see if child already registered */
							revtype = type ^ (JOB_DEPEND_TYPE_BEFORESTART -
								JOB_DEPEND_TYPE_AFTERSTART);
							pdep = find_depend(revtype, pattr);
							if (pdep == 0) {
								/* no "on" and no prior - return error */
								rc = PBSE_BADDEPEND;
							} else {
								pdj = find_dependjob(pdep,
									preq->rq_ind.rq_register.rq_child);
								if (pdj) {
									/* has prior register, update it */
									(void)strcpy(pdj->dc_svr,
										preq->rq_ind.rq_register.rq_svr);
								}
							}
						} else if ((rc=register_dep(pattr, preq, type, &made)) == 0) {
							if (made) {	/* first time registered */
								if (--pdep->dp_numexp <= 0)
									del_depend(pdep);
							}
						}
					}
					break;

				default:
#ifdef NAS /* localmod 109 */
					sprintf(log_buffer, "Unknown dep. op: %d", preq->rq_ind.rq_register.rq_op);
					log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
						preq->rq_ind.rq_register.rq_parent, log_buffer);
#endif /* localmod 109 */
					rc = PBSE_IVALREQ;
					break;
			}
			break;

			/*
			 * Release a dependency so job might run
			 */

		case JOB_DEPEND_OP_RELEASE:
			switch (type) {

				case JOB_DEPEND_TYPE_BEFORESTART:
				case JOB_DEPEND_TYPE_BEFOREANY:
				case JOB_DEPEND_TYPE_BEFOREOK:
				case JOB_DEPEND_TYPE_BEFORENOTOK:

					/* predecessor sent release-reduce "on", */
					/* see if this job can now run 		 */

					type ^= (JOB_DEPEND_TYPE_BEFORESTART -
						JOB_DEPEND_TYPE_AFTERSTART);
					if ((pdep = find_depend(type, pattr)) != NULL) {
						pdj = find_dependjob(pdep,
							preq->rq_ind.rq_register.rq_child);
						if (pdj) {
							del_depend_job(pdj);
							pattr->at_flags |= ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;
							savetype = SAVEJOB_FULLFORCE;
							(void)sprintf(log_buffer, msg_registerrel,
								preq->rq_ind.rq_register.rq_child);
							log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
								LOG_INFO,
								pjob->ji_qs.ji_jobid, log_buffer);

							if (GET_NEXT(pdep->dp_jobs) == 0) {
								/* no more dependencies of this type */
								del_depend(pdep);
								set_depend_hold(pjob, pattr);
							}
							break;
						}
#ifdef NAS /* localmod 109 */
						sprintf(log_buffer, "Dep.rls. job not found: %d/%s", type, preq->rq_ind.rq_register.rq_child);
					} else {
						sprintf(log_buffer, "Dep.rls. type not found: %d", type);
#endif /* localmod 109 */
					}
#ifdef NAS /* localmod 109 */
					log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
						preq->rq_ind.rq_register.rq_parent, log_buffer);
#endif /* localmod 109 */
					rc = PBSE_IVALREQ;
					break;

			}

			break;

		case JOB_DEPEND_OP_READY:
			rc = PBSE_NOSYNCMSTR;
			break;

		case JOB_DEPEND_OP_DELETE:
			(void)sprintf(log_buffer, msg_registerdel,
				preq->rq_ind.rq_register.rq_child);
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
				pjob->ji_qs.ji_jobid, log_buffer);
			job_abt(pjob, log_buffer);
			break;

		case JOB_DEPEND_OP_UNREG:
			unregister_dep(pattr, preq);
			set_depend_hold(pjob, pattr);
			break;


		default:
			sprintf(log_buffer, msg_illregister,
				preq->rq_ind.rq_register.rq_parent);
			log_event(PBSEVENT_DEBUG|PBSEVENT_SYSTEM|PBSEVENT_ERROR,
				PBS_EVENTCLASS_REQUEST, LOG_INFO,
				preq->rq_host, log_buffer);
			rc = PBSE_IVALREQ;
			break;;
	}

	if (rc) {
		pjob->ji_modified = 0;
		req_reject(rc, 0, preq);
	} else {
		/* If this is an array job, forcibly save it to ensure
		 * dependencies are recorded.
		 */
		if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob)
			savetype = SAVEJOB_FULLFORCE;
		if (pjob->ji_modified)
			(void)job_save(pjob, savetype);
		reply_ack(preq);
	}
	return;
}
Example #20
0
int reply_send(

  struct batch_request *request)  /* I (freed) */

  {
  int      rc = 0;
  int      sfds = request->rq_conn;  /* socket */

#ifndef PBS_MOM
  static char    *id = "reply_send";

  struct work_task *ptask;
#endif /* PBS_MOM */

  /* determine where the reply should go, remote or local */

  if (sfds == PBS_LOCAL_CONNECTION)
    {

#ifndef PBS_MOM

    /*
     * reply stays local, find work task and move it to
     * the immediate list for dispatching.
     */

    ptask = (struct work_task *)GET_NEXT(task_list_event);

    while (ptask != NULL)
      {
      if ((ptask->wt_type == WORK_Deferred_Local) &&
          (ptask->wt_parm1 == (void *)request))
        {
        delete_link(&ptask->wt_linkall);

        append_link(&task_list_immed, &ptask->wt_linkall, ptask);

        return(0);
        }

      ptask = (struct work_task *)GET_NEXT(ptask->wt_linkall);
      }

    /* should have found a task and didn't */

    log_err(-1, id, "did not find work task for local request");

#endif /* PBS_MOM */

    rc = PBSE_SYSTEM;
    }
  else if (sfds >= 0)
    {
    /* Otherwise, the reply is to be sent to a remote client */

#ifndef PBS_MOM
    if (request->rq_noreply != TRUE)
      {
#endif
      rc = dis_reply_write(sfds, &request->rq_reply);
#ifndef PBS_MOM
      if (LOGLEVEL >= 7)
        {
        sprintf(log_buffer, "Reply sent for request type %s on socket %d",
          reqtype_to_txt(request->rq_type),
          sfds);

        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          id,
          log_buffer);
        }
#ifndef PBS_MOM
      }

#endif
#endif
    }

#ifndef PBS_MOM
  if ((request->rq_type != PBS_BATCH_AsyModifyJob) || (request->rq_noreply == TRUE))
    {
#endif
    free_br(request);
#ifndef PBS_MOM
    }

#endif

  return(rc);
  }  /* END reply_send() */
int encode_jobs(

  pbs_attribute  *pattr, /*struct pbs_attribute being encoded  */
  tlist_head     *ph,    /*head of a  list of "svrattrl" structs 
                           which are to be returned*/
  char           *aname, /*pbs_attribute's name    */
  char           *rname, /*resource's name (null if none)  */
  int             mode,  /*mode code, unused here   */
  int             perm)  /* only used for resources */

  {
  svrattrl       *pal;

  struct jobinfo *jip;
  struct pbsnode *pnode;
  struct pbssubn *psubn;
  int             i;
  int             jobcnt;  /*number of jobs using the node     */
  int             strsize; /*computed string size      */
  char           *job_str; /*holds comma separated list of jobs*/
  int             job_str_len = 0;

  if (pattr == NULL)
    {
    return (-1);
    }

  if (!(pattr->at_flags & ATR_VFLAG_SET) || !pattr->at_val.at_jinfo)
    {
    return(0);                  /* nothing to report back */
    }

  /* cnt number of jobs and estimate size of string buffer required */

  jobcnt = 0;

  strsize = 1;        /*allow for terminating null char*/

  pnode = pattr->at_val.at_jinfo;

  for (psubn = pnode->nd_psn;psubn != NULL;psubn = psubn->next)
    {
    for (jip = psubn->jobs;jip != NULL;jip = jip->next)
      {
      jobcnt++;

      strsize += strlen(jip->jobid) + PCONST_ENCOVERHEAD;
      }
    }    /* END for (psubn) */

  if (jobcnt == 0)
    {
    /* no jobs currently on this node */

    return(0);
    }

  if ((job_str = (char *)calloc(1, strsize+1)) == NULL)
    {
    return -(PBSE_SYSTEM);
    }

  i = 0;

  for (psubn = pnode->nd_psn;psubn != NULL;psubn = psubn->next)
    {
    for (jip = psubn->jobs;jip != NULL;jip = jip->next)
      {
      if (i != 0)
        strcat(job_str, ", ");
      else
        i++;

      sprintf(job_str + strlen(job_str), "%d/%s",
        psubn->index, jip->jobid);
      }
    }    /* END for (psubn) */
  job_str_len = strlen(job_str);

  pal = attrlist_create(aname, rname, job_str_len+1);

  if (pal == NULL)
    {
    free(job_str);

    return -(PBSE_SYSTEM);
    }

  strncpy(pal->al_value, job_str, job_str_len);

  pal->al_flags = ATR_VFLAG_SET;

  free(job_str);

  append_link(ph, &pal->al_link, pal);

  return(0);                  /* success */
  }  /* END encode_jobs() */
int encode_time(

  pbs_attribute  *attr,   /* ptr to pbs_attribute (value in attr->at_val.at_long) */
  tlist_head     *phead,  /* head of attrlist list (optional) */
  const char    *atname, /* pbs_attribute name */
  const char    *rsname, /* resource name (optional) */
  int             mode,   /* encode mode (not used) */
  int             perm)  /* only used for resources */

  {
  size_t  ct;
  char   cvnbuf[ENCODE_TIME_SIZE];
  int    hr;
  int   min;
  long   n;
  svrattrl *pal;
  int   sec;
  char  *pv;

  if ((attr == NULL)||(phead == NULL))
    {
    /* FAILURE */

    return(-1);
    }

  if (!(attr->at_flags & ATR_VFLAG_SET))
    {
    return(0);
    }

  n   = attr->at_val.at_long;

  hr  = n / 3600;
  n   = n % 3600;
  min = n / 60;
  n   = n % 60;
  sec = n;


  sprintf(cvnbuf, "%02d:%02d:%02d",
          hr, min, sec);

  pv = cvnbuf;

  pv += strlen(pv);

  ct = strlen(cvnbuf);

  pal = attrlist_create(atname, rsname, ct+1);

  if (pal == NULL)
    {
    /* FAILURE */

    return(-1);
    }

  memcpy(pal->al_value, cvnbuf, ct);

  pal->al_flags = attr->at_flags;

  append_link(phead, &pal->al_link, pal);
  /* SUCCESS */

  return(1);
  }
Example #23
0
int status_node(

  struct pbsnode       *pnode,    /* ptr to node receiving status query */
  struct batch_request *preq,
  int                  *bad,      /* O */
  tlist_head           *pstathd)  /* head of list to append status to  */

  {
  int                rc = 0;

  struct brp_status *pstat;
  svrattrl          *pal;

  if ((preq->rq_perm & ATR_DFLAG_RDACC) == 0)
    {
    return(PBSE_PERM);
    }

  /* allocate status sub-structure and fill in header portion */

  pstat = (struct brp_status *)calloc(1, sizeof(struct brp_status));

  if (pstat == NULL)
    {
    return(PBSE_SYSTEM);
    }

  pstat->brp_objtype = MGR_OBJ_NODE;

  strncpy(pstat->brp_objname, pnode->nd_name, sizeof(pstat->brp_objname)-1);

  CLEAR_LINK(pstat->brp_stlink);
  CLEAR_HEAD(pstat->brp_attr);

  /*add this new brp_status structure to the list hanging off*/
  /*the request's reply substructure                         */

  append_link(pstathd, &pstat->brp_stlink, pstat);

  /*point to the list of node-attributes about which we want status*/
  /*hang that status information from the brp_attr field for this  */
  /*brp_status structure                                           */

  *bad = 0;                                    /*global variable*/

  if (preq->rq_ind.rq_status.rq_attr.ll_struct != NULL)
    pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);
  else
    pal = NULL;

  rc = status_nodeattrib(
         pal,
         node_attr_def,
         pnode,
         ND_ATR_LAST,
         preq->rq_perm,
         &pstat->brp_attr,
         bad);

  return(rc);
  }  /* END status_node() */
Example #24
0
/* array_recov reads in  an array struct saved to disk and inserts it into
   the servers list of arrays */
int array_recov(

  char *path, 
  job_array **new_pa)

  {
  extern tlist_head svr_jobarrays;
  job_array *pa;
  array_request_node *rn;
  int fd;
  int old_version;
  int num_tokens;
  int i;
  int len;
  int rc;

  old_version = ARRAY_QS_STRUCT_VERSION;

  /* allocate the storage for the struct */
  pa = (job_array*)calloc(1,sizeof(job_array));

  if (pa == NULL)
  {
    return PBSE_SYSTEM;
  }

  /* initialize the linked list nodes */
  CLEAR_LINK(pa->all_arrays);

  CLEAR_HEAD(pa->request_tokens);

  fd = open(path, O_RDONLY, 0);

  if ( array_259_upgrade )
    {
    rc = read_and_convert_259_array(fd, pa, path);
    if(rc != PBSE_NONE)
      {
      free(pa);
      close(fd);
      return rc;
      }
    }
  else
    {

    /* read the file into the struct previously allocated.
     */

    len = read(fd, &(pa->ai_qs), sizeof(pa->ai_qs));
    if ((len < 0) || ((len < (int)sizeof(pa->ai_qs)) && (pa->ai_qs.struct_version == ARRAY_QS_STRUCT_VERSION)))
      {
      sprintf(log_buffer, "error reading %s", path);
      log_err(errno, "array_recov", log_buffer);
      free(pa);
      close(fd);
      return PBSE_SYSTEM;
      }

    if (pa->ai_qs.struct_version != ARRAY_QS_STRUCT_VERSION)
      {
      rc = array_upgrade(pa, fd, pa->ai_qs.struct_version, &old_version);
      if(rc)
        {
        sprintf(log_buffer, "Cannot upgrade array version %d to %d", pa->ai_qs.struct_version, ARRAY_QS_STRUCT_VERSION);
        log_err(errno, "array_recov", log_buffer);
        free(pa);
        close(fd);
        return rc;
        }
      }
    }

  pa->jobs = malloc(sizeof(job *) * pa->ai_qs.array_size);
  memset(pa->jobs,0,sizeof(job *) * pa->ai_qs.array_size);

  /* check to see if there is any additional info saved in the array file */
  /* check if there are any array request tokens that haven't been fully
     processed */

  if (old_version > 1)
  {
    if (read(fd, &num_tokens, sizeof(int)) != sizeof(int))
    {
      sprintf(log_buffer, "error reading token count from %s", path);
      log_err(errno, "array_recov", log_buffer);

      free(pa);
      close(fd);
      return PBSE_SYSTEM;
    }

    for (i = 0; i < num_tokens; i++)
    {
      rn = (array_request_node*)malloc(sizeof(array_request_node));

      if (read(fd, rn, sizeof(array_request_node)) != sizeof(array_request_node))
      {

        sprintf(log_buffer, "error reading array_request_node from %s", path);
        log_err(errno, "array_recov", log_buffer);

        free(rn);

        for (rn = (array_request_node*)GET_NEXT(pa->request_tokens);
            rn != NULL;
            rn = (array_request_node*)GET_NEXT(pa->request_tokens))
        {
          delete_link(&rn->request_tokens_link);
          free(rn);
        }

        free(pa);

        close(fd);
        return PBSE_SYSTEM;
      }

      CLEAR_LINK(rn->request_tokens_link);

      append_link(&pa->request_tokens, &rn->request_tokens_link, (void*)rn);

    }

  }

  close(fd);

  CLEAR_HEAD(pa->ai_qs.deps);

  if (old_version != ARRAY_QS_STRUCT_VERSION)
  {
    /* resave the array struct if the version on disk is older than the current */
    array_save(pa);
  }

  /* link the struct into the servers list of job arrays */
  append_link(&svr_jobarrays, &pa->all_arrays, (void*)pa);

  *new_pa = pa;

  return PBSE_NONE;

  }
Example #25
0
static void req_stat_job_step2(

  struct stat_cntl *cntl)  /* I/O (free'd on return) */

  {
  svrattrl              *pal;
  job                   *pjob = NULL;

  struct batch_request  *preq;
  struct batch_reply    *preply;
  int                    rc = 0;
  enum TJobStatTypeEnum  type;
  pbs_queue             *pque = NULL;
  int                    exec_only = 0;

  int                    bad = 0;
  long                   DTime;  /* delta time - only report full pbs_attribute list if J->MTime > DTime */
  static svrattrl       *dpal = NULL;
  int                    job_array_index = 0;
  job_array             *pa = NULL;
  char                   log_buf[LOCAL_LOG_BUF_SIZE];
  int                    iter;
  time_t                 time_now = time(NULL);
  long                   poll_jobs = 0;
  char                   job_id[PBS_MAXSVRJOBID+1];
  int                    job_substate = -1;
  time_t                 job_momstattime = -1;

  preq   = cntl->sc_origrq;
  type   = (enum TJobStatTypeEnum)cntl->sc_type;
  preply = &preq->rq_reply;

  /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */

  if (dpal == NULL)
    {
    /* build 'delta' pbs_attribute list */

    svrattrl *tpal;

    tlist_head dalist;

    int aindex;

    int atrlist[] =
      {
      JOB_ATR_jobname,
      JOB_ATR_resc_used,
      JOB_ATR_LAST
      };

    CLEAR_LINK(dalist);

    for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++)
      {
      if ((tpal = attrlist_create("", "", 23)) == NULL)
        {
        return;
        }

      tpal->al_valln = atrlist[aindex];

      if (dpal == NULL)
        dpal = tpal;

      append_link(&dalist, &tpal->al_link, tpal);
      }
    }  /* END if (dpal == NULL) */

  if (type == tjstArray)
    {
    pa = get_array(preq->rq_ind.rq_status.rq_id);

    if (pa == NULL)
      {
      req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array");
      return;
      }
    }

  iter = -1;

  get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs);
  if (!poll_jobs)
    {
    /* polljobs not set - indicates we may need to obtain fresh data from
       MOM */

    if (cntl->sc_jobid[0] == '\0')
      pjob = NULL;
    else
      pjob = svr_find_job(cntl->sc_jobid, FALSE);

    while (1)
      {
      if (pjob == NULL)
        {
        /* start from the first job */

        if (type == tjstJob)
          {
          pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE);
          }
        else if (type == tjstQueue)
          {
          pjob = next_job(cntl->sc_pque->qu_jobs,&iter);
          }
        else if (type == tjstArray)
          {
          job_array_index = 0;
          /* increment job_array_index until we find a non-null pointer or hit the end */
          while (job_array_index < pa->ai_qs.array_size)
            {
            if (pa->job_ids[job_array_index] != NULL)
              {
              if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
                {
                unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
                break;
                }
              }

            job_array_index++;
            }
          }
        else
          {
          pjob = next_job(&alljobs,&iter);
          }

        }    /* END if (pjob == NULL) */
      else
        {
        strcpy(job_id, pjob->ji_qs.ji_jobid);
        unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL);

        if (type == tjstJob)
          break;

        if (type == tjstQueue)
          pjob = next_job(cntl->sc_pque->qu_jobs,&iter);
        else if (type == tjstArray)
          {
          pjob = NULL;
          /* increment job_array_index until we find a non-null pointer or hit the end */
          while (++job_array_index < pa->ai_qs.array_size)
            {
            if (pa->job_ids[job_array_index] != NULL)
              {
              if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
                {
                unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL);
                break;
                }
              }
            }
          }
        else
          pjob = next_job(&alljobs,&iter);
          
        }

      if (pjob == NULL)
        break;

      strcpy(job_id, pjob->ji_qs.ji_jobid);
      job_substate = pjob->ji_qs.ji_substate;
      job_momstattime = pjob->ji_momstat;
      strcpy(cntl->sc_jobid, job_id);
      unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL);
      pjob = NULL;

      /* PBS_RESTAT_JOB defaults to 30 seconds */
      if ((job_substate == JOB_SUBSTATE_RUNNING) &&
          ((time_now - job_momstattime) > JobStatRate))
        {
        /* go to MOM for status */
        if ((rc = stat_to_mom(job_id, cntl)) == PBSE_MEM_MALLOC)
          break;

        if (rc != 0)
          {
          pjob = svr_find_job(job_id, FALSE);

          rc = 0;

          continue;
          }
        
        if (pa != NULL)
          unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);

        return; /* will pick up after mom replies */
        }
      }    /* END while(1) */

    if (rc != 0)
      {
      if (pa != NULL)
        unlock_ai_mutex(pa, __func__, "2", LOGLEVEL);

      reply_free(preply);

      req_reject(rc, 0, preq, NULL, "cannot get update from mom");

      return;
      }
    }    /* END if (!server.sv_attr[SRV_ATR_PollJobs].at_val.at_long) */

  /*
   * now ready for part 3, building the status reply,
   * loop through again
   */

  if ((type == tjstSummarizeArraysQueue) || 
      (type == tjstSummarizeArraysServer))
    {
    /* No array can be owned for these options */
    update_array_statuses();
    }

  if (type == tjstJob)
    pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE);

  else if (type == tjstQueue)
    pjob = next_job(cntl->sc_pque->qu_jobs,&iter);

  else if (type == tjstSummarizeArraysQueue)
    pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter);

  else if (type == tjstSummarizeArraysServer)
    pjob = next_job(&array_summary,&iter);

  else if (type == tjstArray)
    {
    job_array_index = -1;
    pjob = NULL;
    /* increment job_array_index until we find a non-null pointer or hit the end */
    while (++job_array_index < pa->ai_qs.array_size)
      {
      if (pa->job_ids[job_array_index] != NULL)
        {
        if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
          {
          break;
          }
        }
      }
    }
  else
    pjob = next_job(&alljobs,&iter);

  DTime = 0;

  if (preq->rq_extend != NULL)
    {
    char *ptr;

    /* FORMAT:  { EXECQONLY | DELTA:<EPOCHTIME> } */

    if (strstr(preq->rq_extend, EXECQUEONLY))
      exec_only = 1;

    ptr = strstr(preq->rq_extend, "DELTA:");

    if (ptr != NULL)
      {
      ptr += strlen("delta:");

      DTime = strtol(ptr, NULL, 10);
      }
    }


  if ((type == tjstTruncatedServer) || 
      (type == tjstTruncatedQueue))
    {
    long sentJobCounter;
    long qjcounter;
    long qmaxreport;
    int  iter = -1;

    /* loop through all queues */
    while ((pque = next_queue(&svr_queues,&iter)) != NULL)
      {
      qjcounter = 0;

      if ((exec_only == 1) &&
          (pque->qu_qs.qu_type != QTYPE_Execution))
        {
        /* ignore routing queues */
        unlock_queue(pque, __func__, "ignore queue", LOGLEVEL);
        continue;
        }

      if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) &&
          (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0))
        {
        qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long;
        }
      else
        {
        qmaxreport = TMAX_JOB;
        }

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buf,"giving scheduler up to %ld idle jobs in queue %s\n",
          qmaxreport,
          pque->qu_qs.qu_name);

        log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
        }

      sentJobCounter = 0;

      /* loop through jobs in queue */
      if (pjob != NULL)
        unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL);

      iter = -1;

      while ((pjob = next_job(pque->qu_jobs,&iter)) != NULL)
        {
        if ((qjcounter >= qmaxreport) &&
            (pjob->ji_qs.ji_state == JOB_STATE_QUEUED))
          {
          /* max_report of queued jobs reached for queue */
          unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL);

          continue;
          }

        pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

        rc = status_job(
               pjob,
               preq,
               (pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal,
               &preply->brp_un.brp_status,
               &bad);

        if ((rc != 0) && (rc != PBSE_PERM))
          {
          req_reject(rc, bad, preq, NULL, NULL);

          if (pa != NULL)
            {
            unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
            }
          unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL);
          unlock_queue(pque, __func__, "perm", LOGLEVEL);
          return;
          }

        sentJobCounter++;

        if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)
          qjcounter++;

        unlock_ji_mutex(pjob, __func__, "8", LOGLEVEL);
        }    /* END foreach (pjob from pque) */

      if (LOGLEVEL >= 5)
        {
        sprintf(log_buf,"sent scheduler %ld total jobs for queue %s\n",
          sentJobCounter,
          pque->qu_qs.qu_name);

        log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
        }
    
      unlock_queue(pque, __func__, "end while", LOGLEVEL);
      }      /* END for (pque) */
      
    if (pa != NULL)
      unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);

    reply_send_svr(preq);

    return;
    }        /* END if ((type == tjstTruncatedServer) || ...) */

  while (pjob != NULL)
    {
    /* go ahead and build the status reply for this job */

    if (exec_only)
      {
      if (cntl->sc_pque != NULL)
        {
        if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution)
          goto nextjob;
        }
      else
        {
        if (pa != NULL)
          pthread_mutex_unlock(pa->ai_mutex);
        pque = get_jobs_queue(&pjob);
        if (pa != NULL)
          pthread_mutex_lock(pa->ai_mutex);

        if ((pjob == NULL) ||
            (pque == NULL))
          goto nextjob;
        
        if (pque->qu_qs.qu_type != QTYPE_Execution)
          {
          unlock_queue(pque, __func__, "not exec", LOGLEVEL);
        
          goto nextjob;
          }

        unlock_queue(pque, __func__, "exec", LOGLEVEL);
        }
      }

    pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

    rc = status_job(
           pjob,
           preq,
           pal,
           &preply->brp_un.brp_status,
           &bad);

    if ((rc != 0) && 
        (rc != PBSE_PERM))
      {
      if (pa != NULL)
        {
        unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
        }
      unlock_ji_mutex(pjob, __func__, "9", LOGLEVEL);

      req_reject(rc, bad, preq, NULL, NULL);

      return;
      }

    /* get next job */

nextjob:

    if (pjob != NULL)
      unlock_ji_mutex(pjob, __func__, "10", LOGLEVEL);

    if (type == tjstJob)
      break;

    if (type == tjstQueue)
      pjob = next_job(cntl->sc_pque->qu_jobs,&iter);
    else if (type == tjstSummarizeArraysQueue)
      pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter);
    else if (type == tjstSummarizeArraysServer)
      pjob = next_job(&array_summary,&iter);
    else if (type == tjstArray)
      {
      pjob = NULL;
      /* increment job_array_index until we find a non-null pointer or hit the end */
      while (++job_array_index < pa->ai_qs.array_size)
        {
        if (pa->job_ids[job_array_index] != NULL)
          {
          if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL)
            {
            break;
            }
          }
        }
      }
    else
      pjob = next_job(&alljobs,&iter);

    rc = 0;
    }  /* END while (pjob != NULL) */

  if (pa != NULL)
    {
    unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
    }
 
  reply_send_svr(preq);

  if (LOGLEVEL >= 7)
    {
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      "req_statjob",
      "Successfully returned the status of queued jobs\n");
    }

  return;
  }  /* END req_stat_job_step2() */
Example #26
0
int setup_array_struct(job *pjob)
  {
  job_array *pa;

  /* struct work_task *wt; */
  array_request_node *rn;
  int bad_token_count;
  int array_size;
  int rc;

  /* setup a link to this job array in the servers all_arrays list */
  pa = (job_array *)calloc(1,sizeof(job_array));

  pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION;
  
  pa->template_job = pjob;

  /*pa->ai_qs.array_size  = pjob->ji_wattr[(int)JOB_ATR_job_array_size].at_val.at_long;*/

  strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid);
  strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix);
  strncpy(pa->ai_qs.owner, pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str, PBS_MAXUSER + PBS_MAXSERVERNAME + 2);
  strncpy(pa->ai_qs.submit_host, get_variable(pjob, pbs_o_host), PBS_MAXSERVERNAME);

  pa->ai_qs.num_cloned = 0;
  CLEAR_LINK(pa->all_arrays);
  CLEAR_HEAD(pa->request_tokens);
  append_link(&svr_jobarrays, &pa->all_arrays, (void*)pa);

 if (job_save(pjob, SAVEJOB_FULL, 0) != 0)
    {
    job_purge(pjob);


    if (LOGLEVEL >= 6)
      {
      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
        "cannot save job");
      }

    return 1;
    }

  if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa)))
    {
    array_delete(pa);

    snprintf(log_buffer,sizeof(log_buffer),
      "Array %s requested a slot limit above the max limit %ld, rejecting\n",
      pa->ai_qs.parent_id,
      server.sv_attr[SRV_ATR_MaxSlotLimit].at_val.at_long);
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      pa->ai_qs.parent_id,
      log_buffer);

    return(INVALID_SLOT_LIMIT);
    }

  pa->ai_qs.jobs_running = 0;
  pa->ai_qs.num_started = 0;
  pa->ai_qs.num_failed = 0;
  pa->ai_qs.num_successful = 0;
  
  bad_token_count =

    parse_array_request(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str,
                        &(pa->request_tokens));

  /* get the number of elements that should be allocated in the array */
  rn = (array_request_node *)GET_NEXT(pa->request_tokens);
  array_size = 0;
  pa->ai_qs.num_jobs = 0;
  while (rn != NULL) 
    {
    if (rn->end > array_size)
      array_size = rn->end;
    /* calculate the actual number of jobs (different from array size) */
    pa->ai_qs.num_jobs += rn->end - rn->start + 1;

    rn = (array_request_node *)GET_NEXT(rn->request_tokens_link);
    }

  /* size of array is the biggest index + 1 */
  array_size++; 

  if (server.sv_attr[SRV_ATR_MaxArraySize].at_flags & ATR_VFLAG_SET)
    {
    int max_array_size = server.sv_attr[SRV_ATR_MaxArraySize].at_val.at_long;
    if (max_array_size < pa->ai_qs.num_jobs)
      {
      array_delete(pa);

      return(ARRAY_TOO_LARGE);
      }
    }

  /* initialize the array */
  pa->jobs = malloc(array_size * sizeof(job *));
  memset(pa->jobs,0,array_size * sizeof(job *));

  /* remember array_size */
  pa->ai_qs.array_size = array_size;

  CLEAR_HEAD(pa->ai_qs.deps);

  array_save(pa);

  if (bad_token_count > 0)
    {
    array_delete(pa);
    return 2;
    }

  return 0;

  }
Example #27
0
int copy_batchrequest(
    
  struct batch_request **newreq,
  struct batch_request  *preq,
  int                    type,
  int                    jobid)

  {
  struct batch_request *request;
  svrattrl             *pal = NULL;
  svrattrl             *newpal = NULL;
  tlist_head           *phead = NULL;
  char                 *ptr1;
  char                 *ptr2;
  char                  newjobname[PBS_MAXSVRJOBID+1];
  
  request = alloc_br(type);
  if (request)
    {
    request->rq_type = preq->rq_type;
    request->rq_perm = preq->rq_perm;
    request->rq_fromsvr = preq->rq_fromsvr;
    request->rq_conn = preq->rq_conn;
    request->rq_orgconn = preq->rq_orgconn;
    request->rq_extsz = preq->rq_extsz;
    request->rq_time = preq->rq_time;
    strcpy(request->rq_user, preq->rq_user);
    strcpy(request->rq_host, preq->rq_host);
    request->rq_reply.brp_choice = preq->rq_reply.brp_choice;
    request->rq_noreply = preq->rq_noreply;
    /* we need to copy rq_extend if there is any data */
    if (preq->rq_extend)
      {
      request->rq_extend = (char *)calloc(1, strlen(preq->rq_extend) + 1);
      if (request->rq_extend == NULL)
        {
        free_br(request);
        return(PBSE_SYSTEM);
        }
      strcpy(request->rq_extend, preq->rq_extend);
      }
    /* remember the batch_request we copied */
    request->rq_extra = (void *)preq;
    
    switch(preq->rq_type)
      {
      /* This function was created for a modify arracy request (PBS_BATCH_ModifyJob)
         the preq->rq_ind structure was allocated in dis_request_read. If other
         BATCH types are needed refer to that function to see how the rq_ind structure
         was allocated and then copy it here. */
      case PBS_BATCH_DeleteJob:
        
      case PBS_BATCH_HoldJob:
        
      case PBS_BATCH_CheckpointJob:
        
      case PBS_BATCH_ModifyJob:
        
      case PBS_BATCH_AsyModifyJob:
        /* based on how decode_DIS_Manage allocates data */
        CLEAR_HEAD(request->rq_ind.rq_manager.rq_attr);
        
        phead = &request->rq_ind.rq_manager.rq_attr;
        request->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd;
        request->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype;
        /* If this is a job array it is possible we only have the array name
           and not the individual job. We need to find out what we have and
           modify the name if needed */
        ptr1 = strstr(preq->rq_ind.rq_manager.rq_objname, "[]");
        if ((ptr1) && (jobid != -1))
          {
          ptr1++;
          strcpy(newjobname, preq->rq_ind.rq_manager.rq_objname);
          ptr2 = strstr(newjobname, "[]");
          ptr2++;
          *ptr2 = 0;
          sprintf(request->rq_ind.rq_manager.rq_objname,"%s%d%s", 
            newjobname,
            jobid,
            ptr1);
          }
        else
          strcpy(request->rq_ind.rq_manager.rq_objname, preq->rq_ind.rq_manager.rq_objname);
        
        /* copy the attribute list */
        pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_manager.rq_attr);
        while(pal != NULL)
          {
          newpal = (svrattrl *)calloc(1, pal->al_tsize + 1);
          if (!newpal)
            {
            free_br(request);
            return(PBSE_SYSTEM);
            }
          CLEAR_LINK(newpal->al_link);
          
          newpal->al_atopl.next = 0;
          newpal->al_tsize = pal->al_tsize + 1;
          newpal->al_nameln = pal->al_nameln;
          newpal->al_flags  = pal->al_flags;
          newpal->al_atopl.name = (char *)newpal + sizeof(svrattrl);
          strcpy(newpal->al_atopl.name, pal->al_atopl.name);
          newpal->al_nameln = pal->al_nameln;
          newpal->al_atopl.resource = newpal->al_atopl.name + newpal->al_nameln;
          if (pal->al_atopl.resource != NULL)
            strcpy(newpal->al_atopl.resource, pal->al_atopl.resource);
          newpal->al_rescln = pal->al_rescln;
          newpal->al_atopl.value = newpal->al_atopl.name + newpal->al_nameln + newpal->al_rescln;
          strcpy(newpal->al_atopl.value, pal->al_atopl.value);
          newpal->al_valln = pal->al_valln;
          newpal->al_atopl.op = pal->al_atopl.op;
          
          pal = (struct svrattrl *)GET_NEXT(pal->al_link);
          
          }
        
        break;

      case PBS_BATCH_SignalJob:

        strcpy(request->rq_ind.rq_signal.rq_jid, preq->rq_ind.rq_signal.rq_jid);
        strcpy(request->rq_ind.rq_signal.rq_signame, preq->rq_ind.rq_signal.rq_signame);
        request->rq_extra = strdup(preq->rq_extra);

        break;

      case PBS_BATCH_MessJob:

        strcpy(request->rq_ind.rq_message.rq_jid, preq->rq_ind.rq_message.rq_jid);
        request->rq_ind.rq_message.rq_file = preq->rq_ind.rq_message.rq_file;
        strcpy(request->rq_ind.rq_message.rq_text, preq->rq_ind.rq_message.rq_text);

        break;
        
      default:

        break;
        
      }

    if ((phead != NULL) &&
        (newpal != NULL))
      append_link(phead, &newpal->al_link, newpal);
    
    *newreq = request;
    return(0);
    
    }
  else
    return(PBSE_SYSTEM);
  }
Example #28
0
static int parse_array_request(char *request, tlist_head *tl)
  {
  char *temp_str;
  int num_tokens;
  char **tokens;
  int i;
  int j;
  int num_elements;
  int start;
  int end;
  int num_bad_tokens;
  int searching;
  array_request_node *rn;
  array_request_node *rn2;

  temp_str = strdup(request);
  num_tokens = array_request_token_count(request);
  num_bad_tokens = 0;

  tokens = (char**)malloc(sizeof(char*) * num_tokens);
  j = num_tokens - 1;
  /* start from back and scan backwards setting pointers to tokens and changing ',' to '\0' */

  for (i = strlen(temp_str) - 1; i >= 0; i--)
    {

    if (temp_str[i] == ',')
      {
      tokens[j--] = &temp_str[i+1];
      temp_str[i] = '\0';
      }
    else if (i == 0)
      {
      tokens[0] = temp_str;
      }
    }


  for (i = 0; i < num_tokens; i++)
    {
    num_elements = array_request_parse_token(tokens[i], &start, &end);

    if (num_elements == 0)
      {
      num_bad_tokens++;
      }
    else
      {
      rn = (array_request_node*)malloc(sizeof(array_request_node));
      rn->start = start;
      rn->end = end;
      CLEAR_LINK(rn->request_tokens_link);

      rn2 = GET_NEXT(*tl);
      searching = TRUE;

      while (searching)
        {

        if (rn2 == NULL)
          {
          append_link(tl, &rn->request_tokens_link, (void*)rn);
          searching = FALSE;
          }
        else if (rn->start < rn2->start)
          {
          insert_link(&rn2->request_tokens_link, &rn->request_tokens_link, (void*)rn,
                      LINK_INSET_BEFORE);
          searching = FALSE;
          }
        else
          {
          rn2 = GET_NEXT(rn2->request_tokens_link);
          }

        }

      rn2 = GET_PRIOR(rn->request_tokens_link);

      if (rn2 != NULL && rn2->end >= rn->start)
        {
        num_bad_tokens++;
        }

      rn2 = GET_NEXT(rn->request_tokens_link);

      if (rn2 != NULL && rn2->start <= rn->end)
        {
        num_bad_tokens++;
        }

      }
    }

  free(tokens);

  free(temp_str);

  return num_bad_tokens;
  }
Example #29
0
int status_job(

  job           *pjob, /* ptr to job to status */
  batch_request *preq,
  svrattrl      *pal, /* specific attributes to status */
  tlist_head    *pstathd, /* RETURN: head of list to append status to */
  bool           condensed,
  int           *bad) /* RETURN: index of first bad pbs_attribute */

  {
  struct brp_status *pstat;
  int                IsOwner = 0;
  long               query_others = 0;
  long               condensed_timeout = JOB_CONDENSED_TIMEOUT;

  /* Make sure procct is removed from the job 
     resource attributes */
  remove_procct(pjob);

  /* see if the client is authorized to status this job */
  if (svr_authorize_jobreq(preq, pjob) == 0)
    IsOwner = 1;

  get_svr_attr_l(SRV_ATR_query_others, &query_others);
  if (!query_others)
    {
    if (IsOwner == 0)
      {
      return(PBSE_PERM);
      }
    }
  
  get_svr_attr_l(SRV_ATR_job_full_report_time, &condensed_timeout);

  // if the job has been modified within the timeout, send the full output
  if ((condensed == true) &&
      (time(NULL) < pjob->ji_mod_time + condensed_timeout))
    condensed = false;

  /* allocate reply structure and fill in header portion */
  if ((pstat = (struct brp_status *)calloc(1, sizeof(struct brp_status))) == NULL)
    {
    return(PBSE_SYSTEM);
    }

  CLEAR_LINK(pstat->brp_stlink);

  pstat->brp_objtype = MGR_OBJ_JOB;

  strcpy(pstat->brp_objname, pjob->ji_qs.ji_jobid);

  CLEAR_HEAD(pstat->brp_attr);

  append_link(pstathd, &pstat->brp_stlink, pstat);

  /* add attributes to the status reply */

  *bad = 0;

  if (status_attrib(
        pal,
        job_attr_def,
        pjob->ji_wattr,
        JOB_ATR_LAST,
        preq->rq_perm,
        &pstat->brp_attr,
        condensed,
        bad,
        IsOwner))
    {
    return(PBSE_NOATTR);
    }

  return (0);
  }  /* END status_job() */
Example #30
0
int status_job(

  job      *pjob, /* ptr to job to status */
  struct batch_request *preq,
  svrattrl   *pal, /* specific attributes to status */
  tlist_head *pstathd, /* RETURN: head of list to append status to */
  int        *bad) /* RETURN: index of first bad attribute */

  {

  struct brp_status *pstat;

  int IsOwner = 0;

  /* see if the client is authorized to status this job */

  if (svr_authorize_jobreq(preq, pjob) == 0)
    IsOwner = 1;

  if (!server.sv_attr[SRV_ATR_query_others].at_val.at_long)
    {
    if (IsOwner == 0)
      {
      return(PBSE_PERM);
      }
    }

  /* allocate reply structure and fill in header portion */

  pstat = (struct brp_status *)malloc(sizeof(struct brp_status));

  if (pstat == NULL)
    {
    return(PBSE_SYSTEM);
    }

  memset(pstat, 0, sizeof(struct brp_status));

  CLEAR_LINK(pstat->brp_stlink);

  pstat->brp_objtype = MGR_OBJ_JOB;

  strcpy(pstat->brp_objname, pjob->ji_qs.ji_jobid);

  CLEAR_HEAD(pstat->brp_attr);

  append_link(pstathd, &pstat->brp_stlink, pstat);

  /* add attributes to the status reply */

  *bad = 0;

  if (status_attrib(
        pal,
        job_attr_def,
        pjob->ji_wattr,
        JOB_ATR_LAST,
        preq->rq_perm,
        &pstat->brp_attr,
        bad,
        IsOwner))
    {
    return(PBSE_NOATTR);
    }

  return (0);
  }  /* END status_job() */