Esempio n. 1
0
/**
 * @brief
 * 		pbsTcl_Init	- Function to initialize Tcl interpreter based on the environment.
 *
 * @param[in,out]	interp	-	Interpreter for application.
 *
 * @return	int
 * @retval	TCL_OK	: everything looks good.
 * @retval	TCL_ERROR	: something got wrong!
 */
int
pbsTcl_Init(Tcl_Interp *interp)
{
	if (Tcl_Init(interp) == TCL_ERROR)
		return TCL_ERROR;
#if	TCLX
	if (Tclx_Init(interp) == TCL_ERROR)
		return TCL_ERROR;
#endif

	fullresp(0);
	add_cmds(interp);

	Tcl_SetVar(interp, "tcl_rcFileName", "~/.tclshrc", TCL_GLOBAL_ONLY);
	return TCL_OK;
}
Esempio n. 2
0
int pbsTcl_Init(
  Tcl_Interp *interp)  /* Interpreter for application. */
  {
  if (Tcl_Init(interp) == TCL_ERROR)
    return TCL_ERROR;

#if     TCLX
#if     TCL_MINOR_VERSION < 5  && TCL_MAJOR_VERSION < 8
  if (TclX_Init(interp) == TCL_ERROR)
    {
#else

  if (Tclx_Init(interp) == TCL_ERROR)
    {
#endif
    return TCL_ERROR;
    }

#endif  /* TCLX */

#ifndef __cplusplus
  fullresp(0);
#endif

  add_cmds(interp);

  Tcl_SetVar(interp, "tcl_rcFileName", "~/.tclshrc", TCL_GLOBAL_ONLY);

  return TCL_OK;
  }

int main(int argc, char *argv[])
  
  {
  chk_file_sec_stderr = 1;

  Tcl_Main(argc, argv, pbsTcl_Init);
  return 0;
  } /* END main() */
Esempio n. 3
0
/*
 * Find an entry for the resources for the requested host in the list of
 * existing resources, or create a new one for that host and return it.
 */
Resources *
schd_get_resources(char *exechost)
{
    char   *id = "schd_get_resources";
    Resources *rptr, *new_rsrcs;
    int     rm;

    char   *response = NULL;
    int     badreply   = 0;
    int     cpus_avail = 0;
    size_t  pmem_avail = 0;

    char    hpm_ctl[64];

    struct sigaction act, oact;

    unsigned int remain; /* Time remaining in any old alarm(). */
    time_t then;  /* When this alarm() was started. */

#ifdef NODEMASK
    Bitfield cpy;
    int     i, j;
#endif /* NODEMASK */

    /*
     * Check for a local copy of the resources being available already.
     * If so, just return a reference to that Resources structure.
     */

    if (schd_RsrcsList != NULL)
    {
        for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next)
            if (strcmp(rptr->exechost, exechost) == 0)
                return (rptr);
    }

    schd_timestamp("get_rsrcs");

    /*
     * No cached resource information for 'exechost'.  Need to query the
     * host for its information.
     */

    if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL)
    {
        (void)sprintf(log_buffer, "Unable to alloc space for Resources.");
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        return (NULL); /* Can't get the information - nowhere to store it. */
    }

    memset((void *)new_rsrcs, 0, sizeof(Resources));

    act.sa_flags = 0;
    act.sa_handler = connect_interrupt;
    sigemptyset(&act.sa_mask);
    remain = 0;
    then = 0;

    /*
     * Set the alarm, and maintain some idea of how long was left on any
     * previously set alarm.
     */

    if (sigaction(SIGALRM, &act, &oact) == 0)
    {
        remain = alarm(GETRSRCS_CONNECT_TIME);
        then = time(NULL);
    }

    if ((rm = openrm(exechost, 0)) == -1)
    {
        (void)sprintf(log_buffer,
                      "Unable to contact resmom@%s (%d)", exechost, pbs_errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

        badreply = 1;
        goto bail;
    }

    /*
     * Turn off full response.  Responses will be received in the order in
     * which they are sent.
     */
    fullresp(0);

    /* Build a list of all the resources about which we want information. */

    addreq(rm, "loadave");

    addreq(rm, "availmem");

    addreq(rm, "physmem");

    addreq(rm, "ncpus");

#ifdef NODEMASK
    addreq(rm, "availmask");

#endif /* NODEMASK */

    if (schd_MANAGE_HPM)
    {
        (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_QUERY_STR);
        addreq(rm, hpm_ctl);
    }

    /* Get the values back from the resource monitor, and round up. */

    /* Receive LOADAVE response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        new_rsrcs->loadave = atof(response) * schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(loadave), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive AVAILMEM response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        new_rsrcs->freemem = schd_val2byte(response);
        new_rsrcs->freemem *= schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(freemem), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive PHYSMEM response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        pmem_avail = schd_val2byte(response);
        pmem_avail *= schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(realmem), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

    /* Receive NCPUS response from resource monitor. */
    response = getreq(rm);

    if (response != NULL)
    {
        cpus_avail = atoi(response) * schd_FAKE_MACH_MULT;
        (void)free(response);
    }
    else
    {
        (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }

#ifdef NODEMASK
    /* Receive available nodes from resource monitor. */
    response = getreq(rm);

    if (response == NULL)
    {
        (void)sprintf(log_buffer, "bad return from getreq(availmask), %d, %d",
                      pbs_errno, errno);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        badreply = 1;
        goto bail;
    }
    else
    {
        if (schd_bits2mask(response, &new_rsrcs->availmask) != 0)
        {
            if (schd_str2mask(response, &new_rsrcs->availmask) != 0)
            {
                (void)sprintf(log_buffer, "can't parse availmask '%s'", response);
                log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
                badreply = 1;
                goto bail;
            }
        }

        (void)free(response);
    }

#endif /* NODEMASK */

    if (schd_MANAGE_HPM)
    {
        /* Receive HPM_CTL response from resource monitor. */
        response = getreq(rm);

        if (response != NULL)
        {
            if (strcmp(response, HPM_CTL_USERMODE_STR) == 0)
                new_rsrcs->flags |= RSRCS_FLAGS_HPM_USER;
            else if (strcmp(response, HPM_CTL_GLOBALMODE_STR) == 0)
                new_rsrcs->flags &= ~RSRCS_FLAGS_HPM_USER;
            else
            {
                (void)sprintf(log_buffer, "bad response '%s' for '%s@%s'",
                              response, hpm_ctl, exechost);
                log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                           log_buffer);
                badreply = 1;
                goto bail;
            }
        }
        else
        {
            (void)sprintf(log_buffer, "bad return from getreq(%s), %d, %d",
                          hpm_ctl, pbs_errno, errno);
            log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
            badreply = 1;
            goto bail;
        }
    }

    /*
     * NOTE: response will be free()'d in bail.  Be sure to explicitly free()
     * response if more getreq() calls are added before the code below.
     */

bail:
    if (response != NULL)
        (void)free(response);

    /* Disconnect from the resource monitor. */
    if (rm >= 0)  /* resmom handle "0" is valid in RPP. */
        closerm(rm);

    /* And unset the alarm and handler. */
    alarm(0);

    sigaction(SIGALRM, &oact, &act);

    /* Reset the old alarm, taking into account how much time has passed. */
    if (remain)
    {
        DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id,
               remain, (time(NULL) - then)));
        /* How much time remains even after the time spent above? */
        remain -= (time(NULL) - then);

        /*
         * Would the previous time have already expired?  If so, schedule
         * an alarm call in 1 second (close enough, hopefully).
         */

        if (remain < 1)
            remain = 1;

        DBPRT(("reset to %d secs\n", remain));

        alarm(remain);
    }

    /*
     * Verify all the data came back as expected; if not, abort this
     * iteration of the scheduler.
     */
    if (badreply)
    {
        (void)sprintf(log_buffer,
                      "Got bad info from mom@%s - aborting sched run", exechost);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        free(new_rsrcs);
        return (NULL);
    }

    /* Make a copy of the hostname for the resources struct. */
    new_rsrcs->exechost = schd_strdup(exechost);

    if (new_rsrcs->exechost == NULL)
    {
        (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs",
                      exechost);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
        DBPRT(("%s: %s\n", id, log_buffer));

        free(new_rsrcs);
        return (NULL);
    }

    new_rsrcs->nodes_total = NODES_REQD(cpus_avail, pmem_avail);

#ifdef NODEMASK
    /* Copy the availmask schd_FAKE_MACH_MULT times to match avail cpus. */
    BITFIELD_CPY(&cpy, &(new_rsrcs->availmask));

    for (i = 2; i <= schd_FAKE_MACH_MULT; i++)
    {
        for (j = 0; j < (cpus_avail / schd_FAKE_MACH_MULT / 2); j++)
            BITFIELD_SHIFTL(&cpy);

        BITFIELD_SETM(&(new_rsrcs->availmask), &cpy);
    }

#endif /* NODEMASK */

    if (schd_RsrcsList == NULL)
    {
        schd_RsrcsList  = new_rsrcs; /* Start the list. */
    }
    else
    {
        for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next)
            /* Find the last element in the list. */ ;

        rptr->next = new_rsrcs;
    }

    /* Next pointer for the tail of the list points to nothing. */
    new_rsrcs->next = NULL;

    return (new_rsrcs);
}
Esempio n. 4
0
/*
 * Find an entry for the resources for the requested host in the list of
 * existing resources, or create a new one for that host and return it.
 */
Resources *
schd_get_resources(char *exechost)
  {
  char   *id = "schd_get_resources";
  Resources *rptr, *new_rsrcs;
  int     rm;

  char   *response;
  int     badreply   = 0;
  int     cpus_avail = 0;
  int     cpus_tot   = 0;

  struct sigaction act, oact;

  unsigned int remain; /* Time remaining in any old alarm(). */
  time_t then;  /* When this alarm() was started. */

  /*
   * Check for a local copy of the resources being available already.
   * If so, just return a reference to that Resources structure.
   */

  if (schd_RsrcsList != NULL)
    {
    for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next)
      if (strcmp(rptr->exechost, exechost) == 0)
        return (rptr);
    }

  schd_timestamp("get_rsrcs");

  /*
   * No cached resource information for 'exechost'.  Need to query the
   * host for its information.
   */

  if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL)
    {
    (void)sprintf(log_buffer, "Unable to alloc space for Resources.");
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    return (NULL); /* Can't get the information - nowhere to store it. */
    }

  memset((void *)new_rsrcs, 0, sizeof(Resources));

  act.sa_flags = 0;
  act.sa_handler = connect_interrupt;
  sigemptyset(&act.sa_mask);
  remain = 0;
  then = 0;

  /*
   * Set the alarm, and maintain some idea of how long was left on any
   * previously set alarm.
   */

  if (sigaction(SIGALRM, &act, &oact) == 0)
    {
    remain = alarm(GETRSRCS_CONNECT_TIME);
    then = time(NULL);
    }

  if ((rm = openrm(exechost, 0)) == -1)
    {
    (void)sprintf(log_buffer,
                  "Unable to contact resmom@%s (%d)", exechost, pbs_errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    badreply = 1;
    goto bail;
    }

  /*
   * Turn off full response.  Responses will be received in the order in
   * which they are sent.
   */
  fullresp(0);

  /* Build a list of all the resources about which we want information. */

  addreq(rm, "mppe_app");

  addreq(rm, "mppe_avail");

  /* Get the values back from the resource monitor, and round up. */

  /* Receive MPPE_APP response from resource monitor. */
  /* returns the total number of Application PEs configured */
  response = getreq(rm);

  if (response != NULL)
    {
    cpus_tot = atoi(response) * schd_FAKE_MACH_MULT;
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                  pbs_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive MPPE_AVAIL response from resource monitor. */
  /* returns the largest contiguous block of APP PEs */
  response = getreq(rm);

  if (response != NULL)
    {
    cpus_avail = atoi(response) * schd_FAKE_MACH_MULT;
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                  pbs_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  new_rsrcs->freemem = MB_PER_NODE * schd_FAKE_MACH_MULT;

bail:
  /* Disconnect from the resource monitor. */

  if (rm)
    closerm(rm);

  /* And unset the alarm and handler. */
  alarm(0);

  sigaction(SIGALRM, &oact, &act);

  /* Reset the old alarm, taking into account how much time has passed. */
  if (remain)
    {
    DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id,
           remain, (time(NULL) - then)));
    /* How much time remains even after the time spent above? */
    remain -= (time(NULL) - then);

    /*
     * Would the previous time have already expired?  If so, schedule
     * an alarm call in 1 second (close enough, hopefully).
     */

    if (remain < 1)
      remain = 1;

    DBPRT(("reset to %d secs\n", remain));

    alarm(remain);
    }

  /*
   * Verify all the data came back as expected; if not, abort this
   * iteration of the scheduler.
   */
  if (badreply)
    {
    (void)sprintf(log_buffer,
                  "Got bad info from mom@%s - aborting sched run", exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    free(new_rsrcs);
    return (NULL);
    }

  /* Make a copy of the hostname for the resources struct. */
  new_rsrcs->exechost = schd_strdup(exechost);

  if (new_rsrcs->exechost == NULL)
    {
    (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs",
                  exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    free(new_rsrcs);
    return (NULL);
    }

  new_rsrcs->nodes_total = cpus_tot;

  new_rsrcs->nodes_alloc = cpus_tot - cpus_avail;

  if (schd_RsrcsList == NULL)
    {
    schd_RsrcsList  = new_rsrcs; /* Start the list. */
    }
  else
    {
    for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next)
      /* Find the last element in the list. */ ;

    rptr->next = new_rsrcs;
    }

  /* Next pointer for the tail of the list points to nothing. */
  new_rsrcs->next = NULL;

  return (new_rsrcs);
  }
Esempio n. 5
0
void
start_tcl(void)
{
    char *id = "start_tcl";
    char buf[BUFSIZ];
    int fd;
    int tot, len;

    interp = Tcl_CreateInterp();

    if (Tcl_Init(interp) == TCL_ERROR)
    {
        sprintf(log_buffer, "Tcl_Init error: %s",
                Tcl_GetStringResult(interp));
        log_err(-1, id, log_buffer);
        die(0);
    }

#if TCLX
#if TCL_MINOR_VERSION < 5  && TCL_MAJOR_VERSION < 8
    if (TclX_Init(interp) == TCL_ERROR)
    {
#else

    if (Tclx_Init(interp) == TCL_ERROR)
    {
#endif
        sprintf(log_buffer, "Tclx_Init error: %s",
                Tcl_GetStringResult(interp));
        log_err(-1, id, log_buffer);
        die(0);
    }

#endif
    add_cmds(interp);

    if (initfil)
    {
        int  code;

        code = Tcl_EvalFile(interp, initfil);

        if (code != TCL_OK)
        {
            char *trace;

            trace = (char *)Tcl_GetVar(interp, "errorInfo", 0);

            if (trace == NULL)
                trace = (char *)Tcl_GetStringResult(interp);

            fprintf(stderr, "%s: TCL error @ line %d: %s\n",
                    initfil, interp->errorLine, trace);

            sprintf(log_buffer, "%s: TCL error @ line %d: %s",
                    initfil, interp->errorLine,
                    Tcl_GetStringResult(interp));

            log_err(-1, id, log_buffer);

            die(0);
        }

        sprintf(log_buffer, "init file %s", initfil);

        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
    }

    if ((fd = open(bodyfil, O_RDONLY)) == -1)
    {
        log_err(errno, id, bodyfil);
        die(0);
    }

    sprintf(log_buffer, "body file: %s", bodyfil);

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    if (body)
        free(body);

    if ((body = malloc(BUFSIZ)) == NULL)
    {
        log_err(errno, id, "malloc");
        die(0);
    }

    for (tot = 0; (len = read(fd, buf, sizeof(buf))) > 0; tot += len)
    {
        if ((body = realloc(body, tot + len + 1)) == NULL)
        {
            log_err(errno, id, "realloc");
            die(0);
        }

        memcpy(&body[tot], buf, len);
    }

    if (len == -1)
    {
        log_err(errno, id, bodyfil);
        die(0);
    }

    body[tot] = '\0';

    close(fd);

#if TCL_MAJOR_VERSION >= 8

    if (body_obj == NULL)
    {
        body_obj = Tcl_NewStringObj(body, tot);
        Tcl_IncrRefCount(body_obj);
    }
    else
    {
        Tcl_SetStringObj(body_obj, body, tot);
    }

#endif
}

int

addclient(name)
char *name;
{
    static char id[] = "addclient";

    struct hostent  *host, *gethostbyname();

    struct  in_addr saddr;

    if ((host = gethostbyname(name)) == NULL)
    {
        sprintf(log_buffer, "host %s not found", name);
        log_err(-1, id, log_buffer);
        return -1;
    }

    if (numclients >= START_CLIENTS)
    {
        pbs_net_t *newclients;

        newclients = realloc(okclients,
                             sizeof(pbs_net_t) * (numclients + 1));

        if (newclients == NULL)
            return -1;

        okclients = newclients;
    }

    memcpy((char *)&saddr, host->h_addr, host->h_length);

    okclients[numclients++] = saddr.s_addr;
    return 0;
}

/*
 * read_config - read and process the configuration file (see -c option)
 *
 * Currently, the only statement is $clienthost to specify which systems
 * can contact the scheduler.
 */
#define CONF_LINE_LEN 120

static
int
read_config(file)
char *file;
{
    static char *id = "read_config";
    FILE *conf;
    int i;
    char line[CONF_LINE_LEN];
    char *token;

    struct specialconfig
    {
        char *name;
        int (*handler)();
    } special[] =

    {
        {"clienthost", addclient },
        { NULL,  NULL }
    };


#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)

    if (chk_file_sec(file, 0, 0, S_IWGRP | S_IWOTH, 1, 0))
        return (-1);

#endif

    if ((conf = fopen(file, "r")) == NULL)
    {
        log_err(errno, id, "cannot open config file");
        return (-1);
    }

    while (fgets(line, CONF_LINE_LEN, conf))
    {

        if ((line[0] == '#') || (line[0] == '\n'))
            continue;  /* ignore comment & null line */
        else if (line[0] == '$')   /* special */
        {

            if ((token = strtok(line, " \t")) == NULL)
                token = "";

            for (i = 0; special[i].name; i++)
            {
                if (strcmp(token + 1, special[i].name) == 0)
                    break;
            }

            if (special[i].name == NULL)
            {
                sprintf(log_buffer, "config name %s not known",
                        token);
                log_record(PBSEVENT_ERROR,
                           PBS_EVENTCLASS_SERVER,
                           msg_daemonname, log_buffer);
                return (-1);
            }

            token = strtok(NULL, " \t");

            if (*(token + strlen(token) - 1) == '\n')
                *(token + strlen(token) - 1) = '\0';

            if (special[i].handler(token))
            {
                fclose(conf);
                return (-1);
            }

        }
        else
        {
            log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER,
                       msg_daemonname,
                       "invalid line in config file");
            fclose(conf);
            return (-1);
        }
    }

    fclose(conf);

    return (0);
}

void
restart(sig)
int sig;
{
    char    *id = "restart";

    if (sig)
    {
        sprintf(log_buffer, "restart on signal %d", sig);
        log_close(1);
        log_open(logfile, path_log);
    }
    else
    {
        sprintf(log_buffer, "restart command");
    }

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    Tcl_DeleteInterp(interp);

    if (configfile)
    {
        if (read_config(configfile) != 0)
            die(0);
    }

    start_tcl();
}

void
badconn(msg)
char *msg;
{
    static char id[] = "badconn";

    struct in_addr addr;
    char  buf[5*sizeof(addr) + 100];

    struct hostent *phe;

    addr = saddr.sin_addr;
    phe = gethostbyaddr((void *) & addr, sizeof(addr), AF_INET);

    if (phe == NULL)
    {
        char hold[6];
        int i;
        union
        {

            struct in_addr aa;
            u_char  bb[sizeof(addr)];
        } uu;

        uu.aa = addr;
        sprintf(buf, "%u", uu.bb[0]);

        for (i = 1; i < (int)sizeof(addr); i++)
        {
            sprintf(hold, ".%u", uu.bb[i]);
            strcat(buf, hold);
        }
    }
    else
    {
        strncpy(buf, phe->h_name, sizeof(buf));
        buf[sizeof(buf)-1] = '\0';
    }

    sprintf(log_buffer, "%s on port %u %s", buf, ntohs(saddr.sin_port), msg);

    log_err(-1, id, log_buffer);
    return;
}

unsigned int
server_command()
{
    static char id[] = "server_command";
    int  new_socket;
    int  i;
    torque_socklen_t slen;
    unsigned int  cmd;
    pbs_net_t addr;

    slen = sizeof(saddr);
    new_socket = accept(server_sock,
                        (struct sockaddr *) & saddr, &slen);

    if (new_socket == -1)
    {
        log_err(errno, id, "accept");
        return SCH_ERROR;
    }

    if (ntohs(saddr.sin_port) >= IPPORT_RESERVED)
    {
        badconn("non-reserved port");
        close(new_socket);
        return SCH_ERROR;
    }

    addr = (pbs_net_t)saddr.sin_addr.s_addr;

    for (i = 0; i < numclients; i++)
    {
        if (addr == okclients[i])
            break;
    }

    if (i == numclients)
    {
        badconn("unauthorized host");
        close(new_socket);
        return SCH_ERROR;
    }

    if ((connector = socket_to_conn(new_socket)) < 0)
    {
        log_err(errno, id, "socket_to_conn");
        return SCH_ERROR;
    }

    if (get_4byte(new_socket, &cmd) != 1)
    {
        log_err(errno, id, "get4bytes");
        return SCH_ERROR;
    }

    return cmd;
}


/*
 * lock_out - lock out other daemons from this directory.
 */

static void lock_out(fds, op)
int fds;
int op;  /* F_WRLCK  or  F_UNLCK */
{

    struct flock flock;

    flock.l_type   = op;
    flock.l_whence = SEEK_SET;
    flock.l_start  = 0;
    flock.l_len    = 0; /* whole file */

    if (fcntl(fds, F_SETLK, &flock) < 0)
    {
        (void)strcpy(log_buffer, "pbs_sched: another scheduler running\n");
        log_err(errno, msg_daemonname, log_buffer);
        fprintf(stderr, log_buffer);
        exit(1);
    }
}

int main(argc, argv)
int  argc;
char *argv[];
{
    char  *id = "main";
    int  code;

    struct hostent *hp;
    int  go, c, errflg = 0;
    int  lockfds;
    int  t = 1;
    char  *ptr;
    pid_t  pid;
    char  *cp, host[100];
    char  *homedir = PBS_SERVER_HOME;
    unsigned int port;
    char  path_priv[_POSIX_PATH_MAX];
    char  *dbfile = "sched_out";
    int  alarm_time = 180;

    struct sigaction act;
    caddr_t  curr_brk = 0, next_brk;
    extern char *optarg;
    extern int optind, opterr;
    fd_set  fdset;

#ifndef DEBUG
    if (IamRoot() == 0)
    {
        return (1);
    }
#endif /* DEBUG */

    glob_argv = argv;

    if ((cp = strrchr(argv[0], '/')) == NULL)
        cp = argv[0];
    else
        cp++;

    msg_daemonname = strdup(cp);

    port = get_svrport(PBS_SCHEDULER_SERVICE_NAME, "tcp",
                       PBS_SCHEDULER_SERVICE_PORT);

    while ((c = getopt(argc, argv, "L:S:d:i:b:t:p:a:vc:")) != EOF)
    {
        switch (c)
        {

        case 'L':
            logfile = optarg;
            break;

        case 'S':
            port = (unsigned int)atoi(optarg);

            if (port == 0)
            {
                fprintf(stderr,
                        "%s: illegal port\n", optarg);
                errflg = 1;
            }

            break;

        case 'd':
            homedir = optarg;
            break;

        case 'i':  /* initialize */
            initfil = optarg;
            break;

        case 'b':
            bodyfil = optarg;
            break;

        case 't':
            termfil = optarg;
            break;

        case 'p':
            dbfile = optarg;
            break;

        case 'a':
            alarm_time = strtol(optarg, &ptr, 10);

            if (alarm_time <= 0 || *ptr != '\0')
            {
                fprintf(stderr,
                        "%s: bad alarm time\n", optarg);
                errflg = 1;
            }

            break;

        case 'c':
            configfile = optarg;
            break;

        case 'v':
            verbose = 1;
            break;

        case '?':
            errflg = 1;
            break;
        }
    }

    if (errflg || optind != argc)
    {
        static char *options[] =
        {
            "[-L logfile]",
            "[-S port]",
            "[-d home]",
            "[-i init]",
            "[-b body]",
            "[-t term]",
            "[-p output]",
            "[-a alarm]",
            "[-c configfile]",
            "[-v]",
            NULL
        };
        int i;

        fprintf(stderr, "usage: %s\n", argv[0]);

        for (i = 0; options[i]; i++)
            fprintf(stderr, "\t%s\n", options[i]);

        exit(1);
    }

    /* Save the original working directory for "restart" */
    if ((oldpath = getcwd((char *)NULL, MAXPATHLEN)) == NULL)
    {
        fprintf(stderr, "cannot get current working directory\n");
        exit(1);
    }

    (void)sprintf(path_priv, "%s/sched_priv", homedir);
#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)
    c  = chk_file_sec(path_priv, 1, 0, S_IWGRP | S_IWOTH, 1, 0);
    c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP | S_IWOTH, 0, 0);

    if (c != 0) exit(1);

#endif  /* not DEBUG and not NO_SECURITY_CHECK */
    if (chdir(path_priv) == -1)
    {
        perror(path_priv);
        exit(1);
    }

    (void)sprintf(path_log, "%s/sched_logs", homedir);
    (void)strcpy(pbs_current_user, "Scheduler");

    /* The following is code to reduce security risks                */
    /* start out with standard umask, system resource limit infinite */

    umask(022);

    if (setup_env(PBS_ENVIRON) == -1)
        exit(1);

    c = getgid();

    (void)setgroups(1, (gid_t *)&c); /* secure suppl. group ids */

    c = sysconf(_SC_OPEN_MAX);

    while (--c > 2)
        (void)close(c); /* close any file desc left open by parent */

#ifndef DEBUG
#ifdef _CRAY
    (void)limit(C_JOB,      0, L_CPROC, 0);

    (void)limit(C_JOB,      0, L_CPU,   0);

    (void)limit(C_JOBPROCS, 0, L_CPU,   0);

    (void)limit(C_PROC,     0, L_FD,  255);

    (void)limit(C_JOB,      0, L_FSBLK, 0);

    (void)limit(C_JOBPROCS, 0, L_FSBLK, 0);

    (void)limit(C_JOB,      0, L_MEM  , 0);

    (void)limit(C_JOBPROCS, 0, L_MEM  , 0);

#else /* not  _CRAY */
    {

        struct rlimit rlimit;

        rlimit.rlim_cur = RLIM_INFINITY;
        rlimit.rlim_max = RLIM_INFINITY;
        (void)setrlimit(RLIMIT_CPU,   &rlimit);
        (void)setrlimit(RLIMIT_FSIZE, &rlimit);
        (void)setrlimit(RLIMIT_DATA,  &rlimit);
        (void)setrlimit(RLIMIT_STACK, &rlimit);
#ifdef  RLIMIT_RSS
        (void)setrlimit(RLIMIT_RSS  , &rlimit);
#endif  /* RLIMIT_RSS */
#ifdef  RLIMIT_VMEM
        (void)setrlimit(RLIMIT_VMEM  , &rlimit);
#endif  /* RLIMIT_VMEM */
    }
#endif /* not _CRAY */

#if !defined(NO_SECURITY_CHECK)
    c = 0;

    if (initfil)
    {
        if (*initfil != '/')
        {
            (void)sprintf(log_buffer, "%s/%s", path_priv, initfil);
            c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
        else
        {
            c |= chk_file_sec(initfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
    }

    if (bodyfil)
    {
        if (*bodyfil != '/')
        {
            (void)sprintf(log_buffer, "%s/%s", path_priv, bodyfil);
            c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
        else
        {
            c |= chk_file_sec(bodyfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
    }

    if (termfil)
    {
        if (*termfil != '/')
        {
            (void)sprintf(log_buffer, "%s/%s", path_priv, termfil);
            c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
        else
        {
            c |= chk_file_sec(termfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
    }

    if (c) exit(1);

#endif /* not NO_SECURITY_CHECK */
#endif /* not DEBUG */

    if (log_open(logfile, path_log) == -1)
    {
        fprintf(stderr, "%s: logfile could not be opened\n", argv[0]);
        exit(1);
    }

    if (gethostname(host, sizeof(host)) == -1)
    {
        char *prob = "gethostname";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if ((hp = gethostbyname(host)) == NULL)
    {
        char *prob = "gethostbyname";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if ((server_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
    {
        char *prob = "socket";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if (setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR,
                   (char *)&t, sizeof(t)) == -1)
    {
        char *prob = "setsockopt";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    saddr.sin_family = AF_INET;

    saddr.sin_port = htons((unsigned short)port);
    memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length);

    if (bind(server_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0)
    {
        char *prob = "bind";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if (listen(server_sock, 5) < 0)
    {
        char *prob = "listen";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    okclients = (pbs_net_t *)calloc(START_CLIENTS, sizeof(pbs_net_t));

    addclient("localhost");   /* who has permission to call MOM */
    addclient(host);

    if (configfile)
    {
        if (read_config(configfile) != 0)
            die(0);
    }

    lockfds = open("sched.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644);

    if (lockfds < 0)
    {
        char *prob = "lock file";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    lock_out(lockfds, F_WRLCK);

#ifndef DEBUG
    lock_out(lockfds, F_UNLCK);

    if ((pid = fork()) == -1)       /* error on fork */
    {
        char *prob = "fork";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }
    else if (pid > 0)               /* parent exits */
        exit(0);

    if ((pid = setsid()) == -1)
    {
        log_err(errno, id, "setsid");
        die(0);
    }

    lock_out(lockfds, F_WRLCK);

    freopen(dbfile, "a", stdout);
    setvbuf(stdout, NULL, _IOLBF, 0);
    dup2(fileno(stdout), fileno(stderr));
#else
    pid = getpid();
    setvbuf(stdout, NULL, _IOLBF, 0);
    setvbuf(stderr, NULL, _IOLBF, 0);
#endif
    freopen("/dev/null", "r", stdin);

    /* write schedulers pid into lockfile */
    (void)sprintf(log_buffer, "%d\n", pid);
    (void)write(lockfds, log_buffer, strlen(log_buffer) + 1);

#if (PLOCK_DAEMONS & 2)
    (void)plock(PROCLOCK); /* lock daemon into memory */
#endif

    sprintf(log_buffer, "%s startup pid %d", argv[0], pid);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    sprintf(log_buffer, "%s using TCL %s (%s)", argv[0],
            TCL_VERSION, TCL_PATCH_LEVEL);
    fprintf(stderr, "%s\n", log_buffer);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    fullresp(0);
    sigemptyset(&allsigs);
    act.sa_flags = 0;
    sigaddset(&allsigs, SIGHUP);    /* remember to block these */
    sigaddset(&allsigs, SIGINT);    /* during critical sections */
    sigaddset(&allsigs, SIGTERM);   /* so we don't get confused */
    act.sa_mask = allsigs;

    act.sa_handler = restart;       /* do a restart on SIGHUP */
    sigaction(SIGHUP, &act, NULL);

    act.sa_handler = toolong; /* handle an alarm call */
    sigaction(SIGALRM, &act, NULL);

    act.sa_handler = die;           /* bite the biscuit for all following */
    sigaction(SIGINT, &act, NULL);
    sigaction(SIGTERM, &act, NULL);

    start_tcl();

    FD_ZERO(&fdset);

    for (go = 1; go;)
    {
        unsigned int cmd;


        FD_SET(server_sock, &fdset);

        if (select(FD_SETSIZE, &fdset, NULL, NULL, NULL) == -1)
        {
            if (errno != EINTR)
                log_err(errno, id, "select");

            continue;
        }

        if (!FD_ISSET(server_sock, &fdset))
            continue;

        cmd = server_command();

        if (cmd == (unsigned)SCH_ERROR || cmd == (unsigned)SCH_SCHEDULE_NULL)
            continue;

        if (sigprocmask(SIG_BLOCK, &allsigs, &oldsigs) == -1)
            log_err(errno, id, "sigprocmaskSIG_BLOCK)");

        if (verbose)
        {
            sprintf(log_buffer, "command %d", cmd);
            log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                       id, log_buffer);
        }

        switch (cmd)
        {

        case SCH_SCHEDULE_NEW:

        case SCH_SCHEDULE_TERM:

        case SCH_SCHEDULE_TIME:

        case SCH_SCHEDULE_RECYC:

        case SCH_SCHEDULE_CMD:

        case SCH_SCHEDULE_FIRST:
            alarm(alarm_time);

#if TCL_MAJOR_VERSION >= 8
            /* execute compiled body code for TCL-8 */
            code = Tcl_EvalObj(interp, body_obj);
#else
            code = Tcl_Eval(interp, body);
#endif
            alarm(0);

            switch (code)
            {

            case TCL_OK:

            case TCL_RETURN:
                break;

            default:
            {

                char *trace;
                char  codename[20];

                switch (code)
                {

                case TCL_BREAK:
                    strcpy(codename, "break");
                    break;

                case TCL_CONTINUE:
                    strcpy(codename, "continue");
                    break;

                default:
                    strcpy(codename, "<unknown>");
                    break;
                }

                trace = (char *)Tcl_GetVar(interp, "errorInfo", 0);

                if (trace == NULL)
                    trace = (char *)Tcl_GetStringResult(interp);

                fprintf(stderr, "%s: TCL interpreter return code %d (%s) @ line %d: %s\n",
                        bodyfil, code, codename,
                        interp->errorLine, trace);

                sprintf(log_buffer,
                        "%s: TCL error @ line %d: %s",
                        bodyfil, interp->errorLine,
                        Tcl_GetStringResult(interp));

                log_err(-1, id, log_buffer);

                die(0);
            }
            }

            break;

        case SCH_CONFIGURE:

        case SCH_RULESET:
            restart(0);
            break;

        case SCH_QUIT:
            go = 0;
            break;

        default:
            log_err(-1, id, "unknown command");
            break;
        }

        if (connector >= 0 && server_disconnect(connector))
        {
            log_err(errno, id, "server_disconnect");
            die(0);
        }

        connector = -1;

        if (verbose)
        {
            next_brk = (caddr_t)sbrk(0);

            if (next_brk > curr_brk)
            {
                sprintf(log_buffer, "brk point %p", next_brk);
                log_record(PBSEVENT_SYSTEM,
                           PBS_EVENTCLASS_SERVER,
                           id, log_buffer);
                curr_brk = next_brk;
            }
        }

        if (sigprocmask(SIG_SETMASK, &oldsigs, NULL) == -1)
            log_err(errno, id, "sigprocmask(SIG_SETMASK)");
    }

    if (termfil)
    {
        code = Tcl_EvalFile(interp, termfil);

        if (code != TCL_OK)
        {
            char *trace;

            trace = (char *)Tcl_GetVar(interp, "errorInfo", 0);

            if (trace == NULL)
                trace = (char *)Tcl_GetStringResult(interp);

            fprintf(stderr, "%s: TCL error @ line %d: %s\n",
                    termfil, interp->errorLine, trace);

            sprintf(log_buffer, "%s: TCL error @ line %d: %s",
                    termfil, interp->errorLine,
                    Tcl_GetStringResult(interp));

            log_err(-1, id, log_buffer);

            die(0);
        }

        sprintf(log_buffer, "term file: %s", termfil);

        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
    }

    sprintf(log_buffer, "%s normal finish pid %d", argv[0], pid);

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    (void)close(server_sock);
    exit(0);
}
Esempio n. 6
0
/*
 * Find an entry for the resources for the requested host in the list of
 * existing resources, or create a new one for that host and return it.
 */
Resources *
schd_get_resources(char *exechost)
  {
  char *id = "schd_get_resources";
  Resources *rptr, *new_rsrcs;
  int  rm;
  char *response = NULL;
  int  badreply   = 0;
  int  local_errno = 0;

  struct sigaction act, oact;
  unsigned int remain;  /* Time remaining in any old alarm(). */
  time_t  then;  /* When this alarm() was started. */

  /*
   * Check for a local copy of the resources being available already.
   * If so, just return a reference to that Resources structure.
   */

  if (schd_RsrcsList != NULL)
    {
    for (rptr = schd_RsrcsList; rptr != NULL; rptr = rptr->next)
      if (strcmp(rptr->exechost, exechost) == 0)
        return (rptr);
    }

  schd_timestamp("get_rsrcs");

  /*
   * No cached resource information for 'exechost'.  Need to query the
   * host for its information.
   */

  if ((new_rsrcs = (Resources *)malloc(sizeof(Resources))) == NULL)
    {
    (void)sprintf(log_buffer, "Unable to alloc space for Resources.");
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));

    return (NULL); /* Can't get the information - nowhere to store it. */
    }

  memset((void *)new_rsrcs, 0, sizeof(Resources));

  act.sa_flags = 0;
  act.sa_handler = connect_interrupt;
  sigemptyset(&act.sa_mask);
  remain = 0;
  then = 0;

  /*
   * Set the alarm, and maintain some idea of how long was left on any
   * previously set alarm.
   */

  if (sigaction(SIGALRM, &act, &oact) == 0)
    {
    remain = alarm(GETRSRCS_CONNECT_TIME);
    then = time(NULL);
    }

  if ((rm = openrm(exechost, 0)) == -1)
    {
    (void)sprintf(log_buffer,
                  "Unable to contact resmom@%s ", exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    badreply = 1;
    goto bail;
    }

  /*
   * Turn off full response.  Responses will be received in the order in
   * which they are sent.
   */
  fullresp(0);

  /* Build a list of all the resources about which we want information. */

  addreq(rm, "loadave");

  addreq(rm, "availmem");

  addreq(rm, "physmem");

  addreq(rm, "ncpus");

  addreq(rm, "tmpdir");

  addreq(rm, "arch");

  /* Get the values back from the resource monitor, and round up. */

  /* Receive LOADAVE response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->loadave = atof(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(loadave), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive AVAILMEM response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->freemem = schd_val2byte(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(freemem), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive PHYSMEM response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->mem_total = schd_val2byte(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(realmem), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive NCPUS response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->ncpus_total = atoi(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(ncpus), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive TMPDIR response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->tmpdir = schd_val2byte(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(tmpdir), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

  /* Receive ARCH response from resource monitor. */
  response = getreq_err(&local_errno, rm);

  if (response != NULL)
    {
    new_rsrcs->arch = schd_strdup(response);
    (void)free(response);
    }
  else
    {
    (void)sprintf(log_buffer, "bad return from getreq(arch), %d, %d",
                  local_errno, errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    badreply = 1;
    goto bail;
    }

bail:

  /* Disconnect from the resource monitor. */

  if (rm >= 0)  /* resmom handle "0" is valid in RPP. */
    closerm(rm);

  /* And unset the alarm and handler. */
  alarm(0);

  sigaction(SIGALRM, &oact, &act);

  /* Reset the old alarm, taking into account how much time has passed. */
  if (remain)
    {
    DBPRT(("%s: old alarm had %d secs remaining, %d elapsed, ", id,
           remain, (time(NULL) - then)));

    /* How much time remains even after the time spent above? */
    remain -= (time(NULL) - then);

    /*
     * Would the previous time have already expired?  If so, schedule
     * an alarm call in 1 second (close enough, hopefully).
     */

    if (remain < 1)
      remain = 1;

    DBPRT(("reset to %d secs\n", remain));

    alarm(remain);
    }

  /*
   * Verify all the data came back as expected; if not, abort this
   * iteration of the scheduler.
   */

  if (badreply)
    {
    (void)sprintf(log_buffer,
                  "Got bad info from mom@%s - skipping this node", exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    free(new_rsrcs);
    return (NULL);
    }

  /* Make a copy of the hostname for the resources struct. */
  new_rsrcs->exechost = schd_strdup(exechost);

  if (new_rsrcs->exechost == NULL)
    {
    (void)sprintf(log_buffer, "Unable to copy exechost %s to rsrcs",
                  exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    free(new_rsrcs);
    return (NULL);
    }

  if (schd_RsrcsList == NULL)
    {
    schd_RsrcsList  = new_rsrcs; /* Start the list. */
    }
  else
    {
    for (rptr = schd_RsrcsList; rptr->next != NULL; rptr = rptr->next)
      /* Find the last element in the list. */ ;

    rptr->next = new_rsrcs;
    }

  /* Next pointer for the tail of the list points to nothing. */
  new_rsrcs->next = NULL;

  return (new_rsrcs);
  }
Esempio n. 7
0
int main(

  int   argc,
  char *argv[])

  {
  char  *id = "main";

  struct hostent *hp;
  int  go, c, errflg = 0;
  int  lockfds;
  int  t = 1;
  pid_t  pid;
  char  host[100];
  char  *homedir = PBS_SERVER_HOME;
  unsigned int port;
  char  *dbfile = "sched_out";

  struct sigaction act;
  sigset_t oldsigs;
  caddr_t curr_brk = 0;
  caddr_t next_brk;
  extern char *optarg;
  extern int optind, opterr;
  extern int rpp_fd;
  fd_set fdset;

  int  schedinit(int argc, char **argv);
  int  schedule(int com, int connector);

  glob_argv = argv;
  alarm_time = 180;

  /* The following is code to reduce security risks                */
  /* move this to a place where nss_ldap doesn't hold a socket yet */

  c = sysconf(_SC_OPEN_MAX);

  while (--c > 2)
    (void)close(c); /* close any file desc left open by parent */

  port = get_svrport(PBS_SCHEDULER_SERVICE_NAME, "tcp",
                     PBS_SCHEDULER_SERVICE_PORT);

  pbs_rm_port = get_svrport(PBS_MANAGER_SERVICE_NAME, "tcp",
                            PBS_MANAGER_SERVICE_PORT);

  strcpy(pbs_current_user, "Scheduler");

  msg_daemonname = strdup("pbs_sched");

  opterr = 0;

  while ((c = getopt(argc, argv, "L:S:R:d:p:c:a:-:")) != EOF)
    {
    switch (c)
      {

      case '-':

        if ((optarg == NULL) || (optarg[0] == '\0'))
          {
          errflg = 1;
          }

        if (!strcmp(optarg, "version"))
          {
          fprintf(stderr, "version: %s\n", PACKAGE_VERSION);
          exit(0);
          }
        else
          {
          errflg = 1;
          }

        break;

      case 'L':
        logfile = optarg;
        break;

      case 'S':
        port = atoi(optarg);

        if (port == 0)
          {
          fprintf(stderr,
                  "%s: illegal port\n", optarg);
          errflg = 1;
          }

        break;

      case 'R':

        if ((pbs_rm_port = atoi(optarg)) == 0)
          {
          (void)fprintf(stderr, "%s: bad -R %s\n",
                        argv[0], optarg);
          return 1;
          }

        break;

      case 'd':
        homedir = optarg;
        break;

      case 'p':
        dbfile = optarg;
        break;

      case 'c':
        configfile = optarg;
        break;

      case 'a':
        alarm_time = atoi(optarg);

        if (alarm_time == 0)
          {
          fprintf(stderr,
                  "%s: bad alarm time\n", optarg);
          errflg = 1;
          }

        break;

      case '?':
        errflg = 1;
        break;
      }
    }

  if (errflg)
    {
    fprintf(stderr, "usage: %s %s\n", argv[0], usage);
    exit(1);
    }

#ifndef DEBUG
  if (IamRoot() == 0)
    {
        return (1);
    }
#endif        /* DEBUG */

  /* Save the original working directory for "restart" */
  if ((oldpath = getcwd((char *)NULL, MAXPATHLEN)) == NULL)
    {
    fprintf(stderr, "cannot get current working directory\n");
    exit(1);
    }

  (void)sprintf(log_buffer, "%s/sched_priv", homedir);
#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)
  c  = chk_file_sec(log_buffer, 1, 0, S_IWGRP | S_IWOTH, 1, NULL);
  c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP | S_IWOTH, 0, NULL);

  if (c != 0) exit(1);

#endif  /* not DEBUG and not NO_SECURITY_CHECK */
  if (chdir(log_buffer) == -1)
    {
    perror("chdir");
    exit(1);
    }

  (void)sprintf(path_log,   "%s/sched_logs", homedir);
  (void)sprintf(path_acct,   "%s/%s", log_buffer, PBS_ACCT);


  /* The following is code to reduce security risks                */
  /* start out with standard umask, system resource limit infinite */

  umask(022);

  if (setup_env(PBS_ENVIRON) == -1)
    exit(1);

  c = getgid();

  (void)setgroups(1, (gid_t *)&c); /* secure suppl. groups */

#ifndef DEBUG
#ifdef _CRAY
  (void)limit(C_JOB,      0, L_CPROC, 0);

  (void)limit(C_JOB,      0, L_CPU,   0);

  (void)limit(C_JOBPROCS, 0, L_CPU,   0);

  (void)limit(C_PROC,     0, L_FD,  255);

  (void)limit(C_JOB,      0, L_FSBLK, 0);

  (void)limit(C_JOBPROCS, 0, L_FSBLK, 0);

  (void)limit(C_JOB,      0, L_MEM  , 0);

  (void)limit(C_JOBPROCS, 0, L_MEM  , 0);

#else /* not  _CRAY */
    {

    struct rlimit rlimit;

    rlimit.rlim_cur = RLIM_INFINITY;
    rlimit.rlim_max = RLIM_INFINITY;
    (void)setrlimit(RLIMIT_CPU,   &rlimit);
    (void)setrlimit(RLIMIT_FSIZE, &rlimit);
    (void)setrlimit(RLIMIT_DATA,  &rlimit);
    (void)setrlimit(RLIMIT_STACK, &rlimit);
#ifdef  RLIMIT_RSS
    (void)setrlimit(RLIMIT_RSS  , &rlimit);
#endif  /* RLIMIT_RSS */
#ifdef  RLIMIT_VMEM
    (void)setrlimit(RLIMIT_VMEM  , &rlimit);
#endif  /* RLIMIT_VMEM */
    }
#endif /* not _CRAY */
#endif /* DEBUG */

  if (log_open(logfile, path_log) == -1)
    {
    fprintf(stderr, "%s: logfile could not be opened\n", argv[0]);
    exit(1);
    }

  if (gethostname(host, sizeof(host)) == -1)
    {
    log_err(errno, id, "gethostname");
    die(0);
    }

  if ((hp = gethostbyname(host)) == NULL)
    {
    log_err(errno, id, "gethostbyname");
    die(0);
    }

  if ((server_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
    {
    log_err(errno, id, "socket");
    die(0);
    }

  if (setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR,
                 (char *)&t, sizeof(t)) == -1)
    {
    log_err(errno, id, "setsockopt");
    die(0);
    }

  saddr.sin_family = AF_INET;

  saddr.sin_port = htons(port);
  memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length);

  if (bind(server_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0)
    {
    log_err(errno, id, "bind");
    die(0);
    }

  if (listen(server_sock, 5) < 0)
    {
    log_err(errno, id, "listen");
    die(0);
    }

  okclients = (pbs_net_t *)calloc(START_CLIENTS, sizeof(pbs_net_t));

  addclient("localhost");   /* who has permission to call MOM */
  addclient(host);

  if (configfile)
    {
    if (read_config(configfile) != 0)
      die(0);
    }

  lockfds = open("sched.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644);

  if (lockfds < 0)
    {
    log_err(errno, id, "open lock file");
    exit(1);
    }

  lock_out(lockfds, F_WRLCK);

  fullresp(0);

  if (sigemptyset(&allsigs) == -1)
    {
    perror("sigemptyset");
    exit(1);
    }

  if (sigprocmask(SIG_SETMASK, &allsigs, NULL) == -1)   /* unblock */
    {
    perror("sigprocmask");
    exit(1);
    }

  act.sa_flags = 0;

  sigaddset(&allsigs, SIGHUP);    /* remember to block these */
  sigaddset(&allsigs, SIGINT);    /* during critical sections */
  sigaddset(&allsigs, SIGTERM);   /* so we don't get confused */
  act.sa_mask = allsigs;

  act.sa_handler = restart;       /* do a restart on SIGHUP */
  sigaction(SIGHUP, &act, NULL);

  act.sa_handler = toolong; /* handle an alarm call */
  sigaction(SIGALRM, &act, NULL);

  act.sa_handler = die;           /* bite the biscuit for all following */
  sigaction(SIGINT, &act, NULL);
  sigaction(SIGTERM, &act, NULL);

  /*
   * Catch these signals to ensure we core dump even if
   * our rlimit for core dumps is set to 0 initially.
   *
   * Chris Samuel - VPAC
   * [email protected] - 29th July 2003
   *
   * Now conditional on the PBSCOREDUMP environment variable
   */

  if (getenv("PBSCOREDUMP"))
    {
    act.sa_handler = catch_abort;   /* make sure we core dump */

    sigaction(SIGSEGV, &act, NULL);
    sigaction(SIGBUS, &act, NULL);
    sigaction(SIGFPE, &act, NULL);
    sigaction(SIGILL, &act, NULL);
    sigaction(SIGTRAP, &act, NULL);
    sigaction(SIGSYS, &act, NULL);
    }

  /*
   *  Local initialization stuff
   */

  if (schedinit(argc, argv))
    {
    (void) sprintf(log_buffer,
                   "local initialization failed, terminating");
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    exit(1);
    }

  if (getenv("PBSDEBUG") == NULL)
    {
    lock_out(lockfds, F_UNLCK);

#ifdef DISABLE_DAEMONS
    pid = getpid();
#else
    if ((pid = fork()) == -1)
      {
      /* error on fork */
      perror("fork");

      exit(1);
      }
    else
    if (pid > 0)               /* parent exits */
      {
      exit(0);
      }

    if ((pid = setsid()) == -1)
      {
      perror("setsid");

      exit(1);
      }
#endif  /*  DISABLE_DAEMONS  */

    lock_out(lockfds, F_WRLCK);

    if (freopen(dbfile, "a", stdout) == NULL)
      {
      perror("opening lockfile");

      exit(1);
      }


    setvbuf(stdout, NULL, _IOLBF, 0);

    dup2(fileno(stdout), fileno(stderr));
    }
  else
    {
    setvbuf(stdout, NULL, _IOLBF, 0);
    setvbuf(stderr, NULL, _IOLBF, 0);

    pid = getpid();
    }

  if (freopen("/dev/null", "r", stdin) == NULL)
    {
    perror("opening /dev/null");

    exit(1);
    }

  /* write scheduler's pid into lockfile */

  (void)sprintf(log_buffer, "%ld\n", (long)pid);

  if (write(lockfds, log_buffer, strlen(log_buffer) + 1) != (ssize_t)(strlen(log_buffer) + 1))
    {
    perror("writing to lockfile");

    exit(1);
    }

#if (PLOCK_DAEMONS & 2)
  (void)plock(PROCLOCK); /* lock daemon into memory */

#endif

  sprintf(log_buffer, "%s startup pid %ld", argv[0], (long)pid);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  FD_ZERO(&fdset);

  for (go = 1;go;)
    {
    int cmd;

    if (rpp_fd != -1)
      FD_SET(rpp_fd, &fdset);

    FD_SET(server_sock, &fdset);

    if (select(FD_SETSIZE, &fdset, NULL, NULL, NULL) == -1)
      {
      if (errno != EINTR)
        {
        log_err(errno, id, "select");
        die(0);
        }

      continue;
      }

    if (rpp_fd != -1 && FD_ISSET(rpp_fd, &fdset))
      {
      if (rpp_io() == -1)
        log_err(errno, id, "rpp_io");
      }

    if (!FD_ISSET(server_sock, &fdset))
      continue;

    cmd = server_command();

    if (sigprocmask(SIG_BLOCK, &allsigs, &oldsigs) == -1)
      log_err(errno, id, "sigprocmaskSIG_BLOCK)");

    alarm(alarm_time);

    if (schedule(cmd, connector)) /* magic happens here */
      go = 0;

    alarm(0);

    if (connector >= 0 && server_disconnect(connector))
      {
      log_err(errno, id, "server_disconnect");
      die(0);
      }

    next_brk = (caddr_t)sbrk(0);

    if (next_brk > curr_brk)
      {
      sprintf(log_buffer, "brk point %ld", (long)next_brk);
      log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER,
                 id, log_buffer);
      curr_brk = next_brk;
      }

    if (sigprocmask(SIG_SETMASK, &oldsigs, NULL) == -1)
      log_err(errno, id, "sigprocmask(SIG_SETMASK)");
    }

  sprintf(log_buffer, "%s normal finish pid %ld",

          argv[0],
          (long)pid);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  close(server_sock);

  exit(0);
  }  /* END main() */