Пример #1
0
void
clusNameReq(XDR *xdrs, struct sockaddr_in *from, struct LSFHeader *reqHdr)
{
    XDR    xdrs2;
    static char buf[MSGSIZE];
    enum limReplyCode limReplyCode;
    char *sp;
    struct LSFHeader replyHdr;

    memset(&buf, 0, sizeof(buf));

    initLSFHeader_(&replyHdr);

    limReplyCode = LIME_NO_ERR;
    replyHdr.opCode  = (short) limReplyCode;
    replyHdr.refCode = reqHdr->refCode;

    /* send back my cluster name
     */
    sp = myClusterPtr->clName;

    xdrmem_create(&xdrs2, buf, MSGSIZE, XDR_ENCODE);

    if (!xdr_LSFHeader(&xdrs2, &replyHdr)
        || !xdr_string(&xdrs2, &sp, MAXLSFNAMELEN)) {
        ls_syslog(LOG_ERR, "\
%s: failed decoding message from %s", __func__, sockAdd2Str_(from));
        xdr_destroy(&xdrs2);
        return;
    }
Пример #2
0
/* lsb_getlimits()
 */
struct resLimitReply *
lsb_getlimits()
{
    XDR xdrs;
    struct LSFHeader hdr;
    char *reply;
    int cc;
    char buf[sizeof(struct LSFHeader)];
    struct resLimitReply *limitReply;

    initLSFHeader_(&hdr);
    hdr.opCode = BATCH_RESLIMIT_INFO;

    xdrmem_create(&xdrs, buf, sizeof(struct LSFHeader), XDR_ENCODE);

    if (! xdr_LSFHeader(&xdrs, &hdr)) {
        lsberrno = LSBE_XDR;
        xdr_destroy(&xdrs);
        return NULL;
    }

    reply = NULL;
    cc = callmbd(NULL,
                 buf,
                 XDR_GETPOS(&xdrs),
                 &reply,
                 &hdr,
                 NULL,
                 NULL,
                 NULL);
    if (cc < 0) {
        xdr_destroy(&xdrs);
        lsberrno = LSBE_PROTOCOL;
        return NULL;
    }
    xdr_destroy(&xdrs);

    if (hdr.opCode != LSBE_NO_ERROR) {
        FREEUP(reply);
        lsberrno = hdr.opCode;
        return NULL;
    }

    xdrmem_create(&xdrs, reply, XDR_DECODE_SIZE_(cc), XDR_DECODE);
    limitReply = calloc(1, sizeof(struct resLimitReply));
    if(!xdr_resLimitReply(&xdrs, limitReply, &hdr)) {
        lsberrno = LSBE_XDR;
        xdr_destroy(&xdrs);
        if (cc) {
            FREEUP(reply);
            FREEUP(limitReply);
        }
        return NULL;
    }

    xdr_destroy(&xdrs);
    if (cc)
        FREEUP(reply);
    return limitReply;
}
Пример #3
0
int
lsb_reconfig (int configFlag)
{
  mbdReqType mbdReqtype;
  XDR xdrs;
  char request_buf[MSGSIZE];
  char *reply_buf;
  int cc;
  struct LSFHeader hdr;
  struct lsfAuth auth;
  int tmp;

  mbdReqtype = BATCH_RECONFIG;

  if (authTicketTokens_ (&auth, NULL) == -1)
    return (-1);

  xdrmem_create (&xdrs, request_buf, MSGSIZE, XDR_ENCODE);

  initLSFHeader_ (&hdr);
  hdr.opCode = mbdReqtype;
  tmp = (short) configFlag;
  hdr.reserved = tmp;

  if (!xdr_encodeMsg (&xdrs, NULL, &hdr, NULL, 0, &auth))
    {
      lsberrno = LSBE_XDR;
      return (-1);
    }

  if ((cc = callmbd (NULL, request_buf, XDR_GETPOS (&xdrs), &reply_buf,
		     &hdr, NULL, NULL, NULL)) == -1)
    {
      xdr_destroy (&xdrs);
      return (-1);
    }
  xdr_destroy (&xdrs);
  if (cc)
    free (reply_buf);

  lsberrno = hdr.opCode;
  if (lsberrno == LSBE_NO_ERROR)
    return (0);
  else
    return (-1);
}
Пример #4
0
void
do_reboot(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_reboot()";
    char               reply_buf[MSGSIZE / 8];
    XDR                xdrs2;
    sbdReplyType       reply;
    struct LSFHeader   replyHdr;

    if (logclass & LC_TRACE)
        ls_syslog(LOG_DEBUG, "%s: Entering this routine...", fname);

    reply = ERR_NO_ERROR;
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE / 8, XDR_ENCODE);

    initLSFHeader_(&replyHdr);
    if (reqHdr->opCode == CMD_SBD_REBOOT)
        replyHdr.opCode = LSBE_NO_ERROR;
    else
        replyHdr.opCode = reply;

    if (!xdr_encodeMsg(&xdrs2, (char *) 0, &replyHdr, 0, 0, NULL)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_encodeMsg");
        xdr_destroy(&xdrs2);
        relife();
        return;
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL_M, fname, "chanWrite_");
        xdr_destroy(&xdrs2);
        relife();
        return;
    }
    ls_syslog(LOG_NOTICE, _i18n_msg_get(ls_catd , NL_SETN, 5828,
                                        "Slave batch daemon reboot command received")); /* catgets 5828 */
    relife();
    ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5829,
                                     "Unable to relife during rebooting: %m")); /* catgets 5829 */
    xdr_destroy(&xdrs2);

}
Пример #5
0
void
do_shutdown(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_shutdown()";
    char               reply_buf[MSGSIZE / 8];
    XDR                xdrs2;
    sbdReplyType       reply;
    struct LSFHeader   replyHdr;

    reply = ERR_NO_ERROR;

    xdrmem_create(&xdrs2, reply_buf, MSGSIZE / 8, XDR_ENCODE);

    initLSFHeader_(&replyHdr);
    if (reqHdr->opCode == CMD_SBD_SHUTDOWN)
        replyHdr.opCode = LSBE_NO_ERROR;
    else
        replyHdr.opCode = reply;

    if (!xdr_encodeMsg(&xdrs2, (char *) 0, &replyHdr, 0, 0, NULL)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_encodeMsg");
        xdr_destroy(&xdrs2);
        relife();
        return;
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5835,
                                         "%s: Sending shutdown reply to master failed: %m"), /* catgets 5835 */
                  fname);
        xdr_destroy(&xdrs2);
        relife();
        return;
    }
    xdr_destroy(&xdrs2);
    ls_syslog(LOG_NOTICE, _i18n_msg_get(ls_catd , NL_SETN, 5836,
                                        "Slave batch daemon shutdown command received")); /* catgets 5836 */
    die(SLAVE_SHUTDOWN);

}
Пример #6
0
int lsSendMsg_ (int s, int opCode, int hdrLength, char *data, char *reqBuf,
		int reqLen, bool_t (*xdrFunc)(), int (*writeFunc)(),
		struct lsfAuth *auth)
{
    struct LSFHeader hdr;
    XDR xdrs;

    initLSFHeader_(&hdr);
    hdr.opCode = opCode;
    hdr.refCode = currentSN;

    if (!data)
	hdr.length = hdrLength;

    xdrmem_create(&xdrs, reqBuf, reqLen, XDR_ENCODE);

    if (!xdr_encodeMsg(&xdrs,
                       data,
                       &hdr,
                       xdrFunc,
		       0,
                       auth)) {
	xdr_destroy(&xdrs);
	lserrno = LSE_BAD_XDR;
	return(-1);
    }

    if ((*writeFunc)(s, (char *)reqBuf, XDR_GETPOS(&xdrs)) !=
	XDR_GETPOS(&xdrs)) {
        xdr_destroy(&xdrs);
	lserrno = LSE_MSG_SYS;
        return (-2);
    }

    xdr_destroy(&xdrs);

    return (0);
}
Пример #7
0
void
do_sbdDebug(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char       fname[] = "do_sbdDebug()";
    struct debugReq    debugReq;
    char               reply_buf[MSGSIZE / 8];
    XDR                xdrs2;
    sbdReplyType       reply;
    struct LSFHeader   replyHdr;

    if (!xdr_debugReq(xdrs, &debugReq, reqHdr)) {
        reply = LSBE_XDR;
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_debugReq");
    }
    else
        reply = ctrlSbdDebug(&debugReq);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE / 8, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    if (!xdr_encodeMsg(&xdrs2, (char *) 0, &replyHdr, 0, 0, NULL)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_encodeMsg");
        xdr_destroy(&xdrs2);
        return;
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5833,
                                         "%s: Sending  reply to master failed : %m"), /* catgets 5833 */
                  fname);
        xdr_destroy(&xdrs2);
        return;
    }
    xdr_destroy(&xdrs2);
    return;

}
Пример #8
0
struct queueInfoEnt *
lsb_queueinfo (char **queues,
               int *numQueues,
               char *hosts,
               char *users,
               int options)
{
    mbdReqType mbdReqtype;
    static struct infoReq queueInfoReq;
    static struct queueInfoReply reply;
    static struct queueInfoEnt **qInfo = NULL;
    struct queueInfoEnt **qTmp;
    XDR xdrs;
    XDR xdrs2;
    char *request_buf;
    char *reply_buf;
    int cc;
    int i;
    static struct LSFHeader hdr;
    char *clusterName = NULL;

    if (qInfo != NULL) {
        for (i = 0; i < reply.numQueues; i++) {
            xdr_lsffree(xdr_queueInfoEnt,
                        (char*)qInfo[i],
                        &hdr);
        }
    }

    if (numQueues == NULL) {
        lsberrno = LSBE_BAD_ARG;
        return NULL;
    }
    if ((queues == NULL && *numQueues > 1) || (*numQueues < 0)) {
        lsberrno = LSBE_BAD_ARG;
        return NULL;
    }

    queueInfoReq.options = 0;

    if (queueInfoReq.names) {
        FREEUP (queueInfoReq.names);
    }

    if (numQueues == NULL || *numQueues == 0)
        queueInfoReq.options |= ALL_QUEUE;
    else if (queues == NULL && *numQueues == 1)
        queueInfoReq.options |= DFT_QUEUE;

    if ((queueInfoReq.options & ALL_QUEUE)
        || (queueInfoReq.options & DFT_QUEUE)) {
        if ((queueInfoReq.names = malloc(3 * sizeof(char *))) == NULL) {
            lsberrno = LSBE_NO_MEM;
            return NULL;
        }
        queueInfoReq.names[0] = "";
        queueInfoReq.numNames = 1;
        cc = 1;
    } else {

        if ((queueInfoReq.names = calloc(*numQueues + 2,
                                         sizeof(char*))) == NULL) {
            lsberrno = LSBE_NO_MEM;
            return NULL;
        }

        queueInfoReq.numNames = *numQueues;
        for (i = 0; i < *numQueues; i++) {

            if (queues[i] && strlen(queues[i]) + 1 < MAXHOSTNAMELEN) {
                queueInfoReq.names[i] = queues[i];
            } else {
                free (queueInfoReq.names);
                queueInfoReq.names = NULL;
                lsberrno = LSBE_BAD_QUEUE;
                *numQueues = i;
                return NULL;
            }
        }
        cc = queueInfoReq.numNames;
    }

    if (users != NULL) {
        if (strlen(users) + 1 < MAX_LSB_NAME_LEN) {
            queueInfoReq.options |= CHECK_USER;
            queueInfoReq.names[cc] = users;
            cc++;
        } else {
            lsberrno = LSBE_BAD_USER;
            *numQueues = 0;
            return NULL;
        }
    }

    if (hosts != NULL) {
        if (ls_isclustername(hosts) <= 0) {
            if (strlen (hosts) + 1 < MAXHOSTNAMELEN) {
                queueInfoReq.options |= CHECK_HOST;
                queueInfoReq.names[cc] = hosts;
                cc++;
            } else {
                lsberrno = LSBE_BAD_HOST;
                *numQueues = 0;
                return NULL;
            }
        } else
            clusterName = hosts;
    }

    queueInfoReq.resReq = "";
    mbdReqtype = BATCH_QUE_INFO;

    cc = sizeof(struct infoReq) + cc * MAXHOSTNAMELEN + cc + 128;
    if ((request_buf = malloc (cc)) == NULL) {
        lsberrno = LSBE_NO_MEM;
        return NULL;
    }

    xdrmem_create(&xdrs, request_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&hdr);
    hdr.opCode = mbdReqtype;

    if (!xdr_encodeMsg(&xdrs,
                       (char *)&queueInfoReq,
                       &hdr,
                       xdr_infoReq,
                       0,
                       NULL)) {
        lsberrno = LSBE_XDR;
        xdr_destroy(&xdrs);
        free (request_buf);
        return NULL;
    }

    if ((cc = callmbd(clusterName,
                      request_buf,
                      XDR_GETPOS(&xdrs),
                      &reply_buf,
                      &hdr,
                      NULL,
                      NULL,
                      NULL)) == -1) {
        xdr_destroy(&xdrs);
        free (request_buf);
        return NULL;
    }

    xdr_destroy(&xdrs);
    free (request_buf);

    lsberrno = hdr.opCode;
    if (lsberrno == LSBE_NO_ERROR
        || lsberrno == LSBE_BAD_QUEUE) {

        xdrmem_create(&xdrs2, reply_buf, XDR_DECODE_SIZE_(cc), XDR_DECODE);

        if (!xdr_queueInfoReply(&xdrs2, &reply, &hdr)) {
            lsberrno = LSBE_XDR;
            xdr_destroy(&xdrs2);
            if (cc)
                free(reply_buf);
            *numQueues = 0;
            return NULL;
        }

        xdr_destroy(&xdrs2);

        if (cc)
            free(reply_buf);

        if (lsberrno == LSBE_BAD_QUEUE) {
            *numQueues = reply.badQueue;
            return NULL;
        }

        if ((qTmp = myrealloc(qInfo,
                              reply.numQueues
                              * sizeof(struct queueInfoEnt *))) == NULL) {
            lsberrno = LSBE_NO_MEM;
            return NULL;
        }

        qInfo = qTmp;
        for (i = 0; i < reply.numQueues; i++)
            qInfo[i] = &(reply.queues[i]);

        *numQueues = reply.numQueues;
        return qInfo[0];
    }

    if (cc)
        free(reply_buf);

    *numQueues = 0;

    return NULL;
}
Пример #9
0
int
callLim_(enum limReqCode reqCode,
         void *dsend,
         bool_t (*xdr_sfunc)(),
         void *drecv,
         bool_t (*xdr_rfunc)(),
         char *host,
         int options,
         struct LSFHeader *hdr)
{
    struct LSFHeader reqHdr;
    struct LSFHeader replyHdr;
    XDR    xdrs;
    char   sbuf[8*MSGSIZE];
    char   rbuf[MAXMSGLEN];
    char   *repBuf;
    enum limReplyCode limReplyCode;
    static char first = TRUE;
    int reqLen;

    masterLimDown = FALSE;
    if (first) {

        if (initLimSock_() < 0)
            return(-1);
        first = FALSE;

        if (genParams_[LSF_API_CONNTIMEOUT].paramValue) {
            conntimeout_ = atoi(genParams_[LSF_API_CONNTIMEOUT].paramValue);
            if (conntimeout_ <= 0)
                conntimeout_ =  CONNECT_TIMEOUT;
        }

        if (genParams_[LSF_API_RECVTIMEOUT].paramValue) {
            recvtimeout_ = atoi(genParams_[LSF_API_RECVTIMEOUT].paramValue);
            if (recvtimeout_ <= 0)
                recvtimeout_ = RECV_TIMEOUT;
        }
    }

    initLSFHeader_(&reqHdr);
    reqHdr.opCode = reqCode;

    reqHdr.refCode  = getRefNum_();
    reqHdr.version = OPENLAVA_VERSION;

    xdrmem_create(&xdrs, sbuf, 8*MSGSIZE, XDR_ENCODE);
    if (!xdr_encodeMsg(&xdrs, dsend, &reqHdr, xdr_sfunc, 0, NULL)) {
        xdr_destroy(&xdrs);
        lserrno = LSE_BAD_XDR;
        return -1;
    }

    reqLen = XDR_GETPOS(&xdrs);
    xdr_destroy(&xdrs);
    if (options & _USE_TCP_) {
        if (callLimTcp_(sbuf, &repBuf, reqLen, &replyHdr, options) < 0)
            return -1;
        if (replyHdr.length != 0)
            xdrmem_create(&xdrs, repBuf, XDR_DECODE_SIZE_(replyHdr.length),
                          XDR_DECODE);
        else
            xdrmem_create(&xdrs, rbuf, MAXMSGLEN, XDR_DECODE);
    } else {
        if (callLimUdp_(sbuf, rbuf, reqLen, &reqHdr, host, options) < 0)
            return -1;
        if (options & _NON_BLOCK_)
            return 0;
        xdrmem_create(&xdrs, rbuf, MAXMSGLEN, XDR_DECODE);
    }

    if (!(options & _USE_TCP_)) {
        if (!xdr_LSFHeader(&xdrs, &replyHdr)) {
            xdr_destroy(&xdrs);
            lserrno = LSE_BAD_XDR;
            return -1;
        }
    }

    limReplyCode = replyHdr.opCode;

    lsf_lim_version = (int)replyHdr.version;

    switch (limReplyCode) {
        case LIME_NO_ERR:
            if (drecv != NULL) {
                if (! (*xdr_rfunc) (&xdrs, drecv, &replyHdr)) {
                    xdr_destroy(&xdrs);
                    if (options & _USE_TCP_)
                        FREEUP(repBuf);
                    lserrno = LSE_BAD_XDR;
                    return -1;
                }
            }
            xdr_destroy(&xdrs);
            if (options & _USE_TCP_)
                FREEUP(repBuf);
            if (hdr != NULL)
                *hdr = replyHdr;
            return (0);

        default:
            xdr_destroy(&xdrs);
            if (options & _USE_TCP_)
                FREEUP(repBuf);
            err_return_(limReplyCode);
            return (-1);
    }

}
Пример #10
0
void
clusInfoReq(XDR *xdrs, struct sockaddr_in *from, struct LSFHeader *reqHdr)
{
    static char fname[] = "clusInfoReq()";
    XDR    xdrs2;
    char buf[MSGSIZE*2];
    enum limReplyCode limReplyCode;
    struct LSFHeader replyHdr;
    struct clusterInfoReply clusterInfoReply;
    struct clusterInfoReq clusterInfoReq;

    limReplyCode = LIME_NO_ERR;
    clusterInfoReply.clusterMatrix = NULL;

    memset(&clusterInfoReq, 0, sizeof(clusterInfoReq));
    if (!xdr_clusterInfoReq(xdrs, &clusterInfoReq, reqHdr)) {
        ls_syslog(LOG_WARNING, I18N_FUNC_FAIL, fname, "xdr_clusterInfoReq");
        limReplyCode = LIME_BAD_DATA;
        goto Reply1;
    }

    if (!masterMe && clusterInfoReq.options == FROM_MASTER){
        wrongMaster(from, buf, reqHdr, -1);
        if (clusterInfoReq.resReq)
            FREEUP(clusterInfoReq.resReq);
        return;
    }

    clusterInfoReply.shortLsInfo = getCShortInfo (reqHdr);

    clusterInfoReply.nClus = 1;
    clusterInfoReply.clusterMatrix = malloc(sizeof (struct shortCInfo));
    if (clusterInfoReply.clusterMatrix == NULL) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc");
        limReplyCode = LIME_NO_MEM;
        goto Reply;
    }

    {
        strcpy(clusterInfoReply.clusterMatrix[0].clName,
               myClusterPtr->clName);
        if ((myClusterPtr->status & CLUST_INFO_AVAIL) &&
            (masterMe || (!masterMe && myClusterPtr->masterPtr != NULL))) {
            clusterInfoReply.clusterMatrix[0].status = CLUST_STAT_OK;
            strcpy(clusterInfoReply.clusterMatrix[0].masterName,
                   myClusterPtr->masterPtr->hostName);

            strcpy(clusterInfoReply.clusterMatrix[0].managerName,
                   myClusterPtr->managerName);

            clusterInfoReply.clusterMatrix[0].managerId
                = myClusterPtr->managerId;
            clusterInfoReply.clusterMatrix[0].numServers
                = myClusterPtr->numHosts;
            clusterInfoReply.clusterMatrix[0].numClients
                = myClusterPtr->numClients;
            clusterInfoReply.clusterMatrix[0].resClass
                = myClusterPtr->resClass;
            clusterInfoReply.clusterMatrix[0].typeClass
                = myClusterPtr->typeClass;
            clusterInfoReply.clusterMatrix[0].modelClass
                = myClusterPtr->modelClass;
            clusterInfoReply.clusterMatrix[0].numIndx
                = myClusterPtr->numIndx;
            clusterInfoReply.clusterMatrix[0].numUsrIndx
                = myClusterPtr->numUsrIndx;
            clusterInfoReply.clusterMatrix[0].usrIndxClass
                = myClusterPtr->usrIndxClass;
            clusterInfoReply.clusterMatrix[0].nAdmins
                = myClusterPtr->nAdmins;
            clusterInfoReply.clusterMatrix[0].adminIds
                = myClusterPtr->adminIds;
            clusterInfoReply.clusterMatrix[0].admins
                = myClusterPtr->admins;
            if (reqHdr->version < 4) {
                clusterInfoReply.clusterMatrix[0].nRes = 0;
            } else {
                clusterInfoReply.clusterMatrix[0].nRes
                    = allInfo.nRes;
                clusterInfoReply.clusterMatrix[0].resBitMaps
                    = myClusterPtr->resBitMaps;
            }
            clusterInfoReply.clusterMatrix[0].nTypes = allInfo.nTypes;
            clusterInfoReply.clusterMatrix[0].hostTypeBitMaps =
                myClusterPtr->hostTypeBitMaps;
            clusterInfoReply.clusterMatrix[0].nModels = allInfo.nModels;
            clusterInfoReply.clusterMatrix[0].hostModelBitMaps =
                myClusterPtr->hostModelBitMaps;

            if (logclass & (LC_TRACE | LC_COMM))
                ls_syslog(LOG_DEBUG1, "clusterInfo:clusterInfoReply.clusterMatrix[%d].nRes=%d, name=%s", 0, clusterInfoReply.clusterMatrix[0].nRes, clusterInfoReply.clusterMatrix[0].masterName);
        } else {
            clusterInfoReply.clusterMatrix[0].status = CLUST_STAT_UNAVAIL;
            clusterInfoReply.clusterMatrix[0].masterName[0] = '\0';
            clusterInfoReply.clusterMatrix[0].managerName[0] = '\0';
            clusterInfoReply.clusterMatrix[0].managerId  = 0;
            clusterInfoReply.clusterMatrix[0].numServers = 0;
            clusterInfoReply.clusterMatrix[0].numClients = 0;
            clusterInfoReply.clusterMatrix[0].resClass  = 0;
            clusterInfoReply.clusterMatrix[0].typeClass = 0;
            clusterInfoReply.clusterMatrix[0].modelClass  = 0;
            clusterInfoReply.clusterMatrix[0].numIndx =0;
            clusterInfoReply.clusterMatrix[0].numUsrIndx  = 0;
            clusterInfoReply.clusterMatrix[0].usrIndxClass  = 0;
            clusterInfoReply.clusterMatrix[0].nAdmins =   0;
            clusterInfoReply.clusterMatrix[0].nRes =   0;
            clusterInfoReply.clusterMatrix[0].nTypes = 0;
            clusterInfoReply.clusterMatrix[0].nModels = 0;
        }
    }

Reply:
    free(clusterInfoReq.resReq);

Reply1:
    initLSFHeader_(&replyHdr);
    replyHdr.opCode  = (short) limReplyCode;
    replyHdr.refCode = reqHdr->refCode;

    xdrmem_create(&xdrs2, buf, MSGSIZE*2, XDR_ENCODE);
    if (!xdr_LSFHeader(&xdrs2, &replyHdr)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_LSFHeader");
        if (clusterInfoReply.clusterMatrix != NULL)
            free(clusterInfoReply.clusterMatrix);
        xdr_destroy(&xdrs2);
        return;
    }

    if (limReplyCode == LIME_NO_ERR) {
        if (!xdr_clusterInfoReply(&xdrs2, &clusterInfoReply, &replyHdr)) {
            ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL, fname, "xdr_clusterInfoReply");
            xdr_destroy(&xdrs2);
            if (clusterInfoReply.clusterMatrix != NULL)
                free(clusterInfoReply.clusterMatrix);
            return;
        }
    }

    if (chanSendDgram_(limSock, buf, XDR_GETPOS(&xdrs2), from) < 0) {
        ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "chanSendDgram_",
                  sockAdd2Str_(from));
        xdr_destroy(&xdrs2);
        if (clusterInfoReply.clusterMatrix != NULL)
            free(clusterInfoReply.clusterMatrix);
        return;
    }

    if (clusterInfoReply.clusterMatrix != NULL)
        free(clusterInfoReply.clusterMatrix);

    xdr_destroy(&xdrs2);
    return;

}
Пример #11
0
int
lsb_runjob(struct runJobRequest* runJobRequest) 
{
    XDR                   xdrs;
    struct LSFHeader      lsfHeader;
    struct lsfAuth        auth;
    mbdReqType            mbdReqType;
    char                  request_buf[MSGSIZE/2];
    char*                 reply_buf;
    int                   retVal;
    int                   cc;

    
    if (runJobRequest == NULL 
	|| runJobRequest->numHosts == 0 
	|| runJobRequest->hostname == NULL 
	|| runJobRequest->jobId < 0 
	|| (   runJobRequest->options != 0 
	    && ! (runJobRequest->options & 
		  (RUNJOB_OPT_NORMAL | RUNJOB_OPT_NOSTOP))))
	
    {
	lsberrno = LSBE_BAD_ARG;
	return(-1);
    }

    
    if (!( runJobRequest->options & (RUNJOB_OPT_NORMAL | RUNJOB_OPT_NOSTOP))) {
	runJobRequest->options |= RUNJOB_OPT_NORMAL;
    } 

    
    if (authTicketTokens_(&auth, NULL) == -1) {
	lsberrno = LSBE_LSBLIB;
	return (-1);
    }

    
    mbdReqType = BATCH_JOB_FORCE;

    
    xdrmem_create(&xdrs,
		  request_buf,
		  MSGSIZE/2,
		  XDR_ENCODE);

    initLSFHeader_(&lsfHeader);

    lsfHeader.opCode = mbdReqType;

    if (!xdr_encodeMsg(&xdrs,
		       (char *)runJobRequest,
		       &lsfHeader,
		       xdr_runJobReq,
		       0,
		       &auth)) {
	lsberrno = LSBE_XDR;
	xdr_destroy(&xdrs);
	return(-1);
    }


    
    if ((cc = callmbd(NULL, 
		      request_buf, 
		      XDR_GETPOS(&xdrs), 
		      &reply_buf, 
		      &lsfHeader, 
		      NULL, 
		      NULL, 
		      NULL)) == -1) {
	xdr_destroy(&xdrs);
	return(-1);
    }

    
    xdr_destroy(&xdrs);

    
    lsberrno = lsfHeader.opCode;

    if (lsberrno == LSBE_NO_ERROR)
	retVal = 0;
    else
	retVal = -1;

    return(retVal);

}
Пример #12
0
static int
signalJob_(int sigValue, LS_LONG_INT jobId, time_t period, int options)
{
    struct signalReq signalReq;
    char request_buf[MSGSIZE];
    char *reply_buf;
    XDR xdrs;
    mbdReqType mbdReqtype;
    int cc;
    struct LSFHeader hdr;
    struct lsfAuth auth;

    signalReq.jobId = jobId;

    if (authTicketTokens_(&auth, NULL) == -1)
        return -1;

    signalReq.sigValue = sigValue;
    signalReq.chkPeriod = period;
    signalReq.actFlags = options;

    signalReq.sigValue = sig_encode(signalReq.sigValue);

    mbdReqtype = BATCH_JOB_SIG;
    xdrmem_create(&xdrs, request_buf, MSGSIZE, XDR_ENCODE);

    initLSFHeader_(&hdr);
    hdr.opCode = mbdReqtype;
    if (!xdr_encodeMsg(&xdrs,
                       (char *)&signalReq,
                       &hdr,
                       xdr_signalReq,
                       0,
                       &auth)) {
        lsberrno = LSBE_XDR;
        xdr_destroy(&xdrs);
        return -1;
    }

    if ((cc = callmbd(NULL,
                      request_buf,
                      XDR_GETPOS(&xdrs),
                      &reply_buf,
                      &hdr,
                      NULL,
                      NULL,
                      NULL)) < 0) {
        xdr_destroy(&xdrs);
        return -1;
    }

    xdr_destroy(&xdrs);

    if (cc)
        free(reply_buf);

    lsberrno = hdr.opCode;
    if (lsberrno == LSBE_NO_ERROR || lsberrno == LSBE_JOB_DEP)
        return 0;

    return -1 ;
}
Пример #13
0
struct userInfoEnt *
lsb_userinfo (char **users, int *numUsers)
{
  mbdReqType mbdReqtype;
  XDR xdrs;
  struct LSFHeader hdr;
  char *reply_buf;
  char *request_buf;
  struct userInfoReply userInfoReply, *reply;
  static struct infoReq userInfoReq;
  int i, cc = 0, numReq = -1;
  char lsfUserName[MAXLINELEN];

  if (numUsers)
    {
      numReq = *numUsers;
      *numUsers = 0;
    }
  if (numReq < 0)
    {
      lsberrno = LSBE_BAD_ARG;
      return (NULL);
    }

  if (userInfoReq.names)
    free (userInfoReq.names);

  if (numUsers == NULL || numReq == 0)
    {
      userInfoReq.numNames = 0;
      if ((userInfoReq.names = (char **) malloc (sizeof (char *))) == NULL)
	{
	  lsberrno = LSBE_NO_MEM;
	  return (NULL);
	}
      userInfoReq.names[0] = "";
      cc = 1;
    }
  else if (numReq == 1 && users == NULL)
    {
      if (getLSFUser_ (lsfUserName, MAXLINELEN) != 0)
	{
	  return (NULL);
	}
      userInfoReq.numNames = 1;
      if ((userInfoReq.names = (char **) malloc (sizeof (char *))) == NULL)
	{
	  lsberrno = LSBE_NO_MEM;
	  return (NULL);
	}
      userInfoReq.names[0] = lsfUserName;
      cc = 1;
    }
  else
    {
      if ((userInfoReq.names = (char **) calloc
	   (numReq, sizeof (char *))) == NULL)
	{
	  lsberrno = LSBE_NO_MEM;
	  return (NULL);
	}
      userInfoReq.numNames = numReq;
      for (i = 0; i < numReq; i++)
	{
	  if (users[i] && strlen (users[i]) + 1 < MAXHOSTNAMELEN)
	    userInfoReq.names[i] = users[i];
	  else
	    {
	      free (userInfoReq.names);
	      lsberrno = LSBE_BAD_USER;
	      *numUsers = i;
	      return (NULL);
	    }
	}
      cc = numReq;
    }
  userInfoReq.resReq = "";


  mbdReqtype = BATCH_USER_INFO;
  cc = sizeof (struct infoReq) + cc * MAXHOSTNAMELEN + cc + 100;
  if ((request_buf = malloc (cc)) == NULL)
    {
      lsberrno = LSBE_NO_MEM;
      return (NULL);
    }
  xdrmem_create (&xdrs, request_buf, cc, XDR_ENCODE);

  initLSFHeader_ (&hdr);
  hdr.opCode = mbdReqtype;
  if (!xdr_encodeMsg (&xdrs, (char *) &userInfoReq, &hdr, xdr_infoReq,
		      0, NULL))
    {
      xdr_destroy (&xdrs);
      free (request_buf);
      lsberrno = LSBE_XDR;
      return (NULL);
    }


  if ((cc = callmbd (NULL, request_buf, XDR_GETPOS (&xdrs),
		     &reply_buf, &hdr, NULL, NULL, NULL)) == -1)
    {
      xdr_destroy (&xdrs);
      free (request_buf);
      return (NULL);
    }
  xdr_destroy (&xdrs);
  free (request_buf);

  lsberrno = hdr.opCode;
  if (lsberrno == LSBE_NO_ERROR || lsberrno == LSBE_BAD_USER)
    {
      xdrmem_create (&xdrs, reply_buf, XDR_DECODE_SIZE_ (cc), XDR_DECODE);
      reply = &userInfoReply;
      if (!xdr_userInfoReply (&xdrs, reply, &hdr))
	{
	  lsberrno = LSBE_XDR;
	  xdr_destroy (&xdrs);
	  if (cc)
	    free (reply_buf);
	  return (NULL);
	}
      xdr_destroy (&xdrs);
      if (cc)
	free (reply_buf);
      if (lsberrno == LSBE_BAD_USER)
	{
	  *numUsers = reply->badUser;
	  return (NULL);
	}
      *numUsers = reply->numUsers;
      return (reply->users);
    }

  if (cc)
    free (reply_buf);
  return (NULL);

}
Пример #14
0
int
status_job (mbdReqType reqType,
	    struct jobCard *jp, int newStatus, sbdReplyType err)
{
  static char fname[] = "status_job()";
  static int seq = 1;
  static char lastHost[MAXHOSTNAMELEN];
  int reply;
  char *request_buf;
  char *reply_buf = NULL;
  XDR xdrs;
  struct LSFHeader hdr;
  int cc;
  struct statusReq statusReq;
  int flags;
  int i;
  int len;
  struct lsfAuth *auth = NULL;

  if ((logclass & LC_TRACE) && (logclass & LC_SIGNAL))
    ls_syslog (LOG_DEBUG, "%s: Entering ... regType %d jobId %s",
	       fname, reqType, lsb_jobid2str (jp->jobSpecs.jobId));

  if (newStatus == JOB_STAT_EXIT)
    {
      jp->userJobSucc = FALSE;
    }

  if (MASK_STATUS (newStatus) == JOB_STAT_DONE)
    {
      jp->userJobSucc = TRUE;
    }

  if (IS_POST_FINISH (newStatus))
    {
      if (jp->userJobSucc != TRUE)
	{
	  return 0;
	}
    }

  if (masterHost == NULL)
    return -1;

  if (jp->notReported < 0)
    {
      jp->notReported = -INFINIT_INT;
      return (0);
    }

  statusReq.jobId = jp->jobSpecs.jobId;
  statusReq.actPid = jp->jobSpecs.actPid;
  statusReq.jobPid = jp->jobSpecs.jobPid;
  statusReq.jobPGid = jp->jobSpecs.jobPGid;
  statusReq.newStatus = newStatus;
  statusReq.reason = jp->jobSpecs.reasons;
  statusReq.subreasons = jp->jobSpecs.subreasons;
  statusReq.sbdReply = err;
  statusReq.lsfRusage = jp->lsfRusage;
  statusReq.execUid = jp->jobSpecs.execUid;
  statusReq.numExecHosts = 0;
  statusReq.execHosts = NULL;
  statusReq.exitStatus = jp->w_status;
  statusReq.execCwd = jp->jobSpecs.execCwd;
  statusReq.execHome = jp->jobSpecs.execHome;
  statusReq.execUsername = jp->execUsername;
  statusReq.queuePostCmd = "";
  statusReq.queuePreCmd = "";
  statusReq.msgId = jp->delieveredMsgId;

  if (IS_FINISH (newStatus))
    {
      if (jp->maxRusage.mem > jp->runRusage.mem)
	jp->runRusage.mem = jp->maxRusage.mem;
      if (jp->maxRusage.swap > jp->runRusage.swap)
	jp->runRusage.swap = jp->maxRusage.swap;
      if (jp->maxRusage.stime > jp->runRusage.stime)
	jp->runRusage.stime = jp->maxRusage.stime;
      if (jp->maxRusage.utime > jp->runRusage.utime)
	jp->runRusage.utime = jp->maxRusage.utime;
    }
  statusReq.runRusage.mem = jp->runRusage.mem;
  statusReq.runRusage.swap = jp->runRusage.swap;
  statusReq.runRusage.utime = jp->runRusage.utime;
  statusReq.runRusage.stime = jp->runRusage.stime;
  statusReq.runRusage.npids = jp->runRusage.npids;
  statusReq.runRusage.pidInfo = jp->runRusage.pidInfo;
  statusReq.runRusage.npgids = jp->runRusage.npgids;
  statusReq.runRusage.pgid = jp->runRusage.pgid;
  statusReq.actStatus = jp->actStatus;
  statusReq.sigValue = jp->jobSpecs.actValue;
  statusReq.seq = seq;
  seq++;
  if (seq >= MAX_SEQ_NUM)
    seq = 1;

  len = 1024 + ALIGNWORD_ (sizeof (struct statusReq));

  len += ALIGNWORD_ (strlen (statusReq.execHome)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execCwd)) + 4 +
    ALIGNWORD_ (strlen (statusReq.execUsername)) + 4;

  for (i = 0; i < statusReq.runRusage.npids; i++)
    len += ALIGNWORD_ (sizeof (struct pidInfo)) + 4;

  for (i = 0; i < statusReq.runRusage.npgids; i++)
    len += ALIGNWORD_ (sizeof (int)) + 4;

  if (logclass & (LC_TRACE | LC_COMM))
    ls_syslog (LOG_DEBUG, "%s: The length of the job message is: <%d>", fname,
	       len);

  if ((request_buf = malloc (len)) == NULL)
    {
      ls_syslog (LOG_ERR, I18N_FUNC_FAIL_M, fname, "malloc");
      return (-1);
    }

  xdrmem_create (&xdrs, request_buf, len, XDR_ENCODE);
  initLSFHeader_ (&hdr);
  hdr.opCode = reqType;

  if (!xdr_encodeMsg (&xdrs, (char *) &statusReq, &hdr, xdr_statusReq, 0,
		      auth))
    {
      ls_syslog (LOG_ERR, I18N_JOB_FAIL_S_M,
		 fname, lsb_jobid2str (jp->jobSpecs.jobId), "xdr_statusReq");
      lsb_merr2 (I18N_FUNC_FAIL, fname, "xdr_statusReq");
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      relife ();
    }

  flags = CALL_SERVER_NO_HANDSHAKE;
  if (statusChan >= 0)
    flags |= CALL_SERVER_USE_SOCKET;

  if (reqType == BATCH_RUSAGE_JOB)
    flags |= CALL_SERVER_NO_WAIT_REPLY;

  if (logclass & LC_COMM)
    ls_syslog (LOG_DEBUG1, "%s: before call_server statusChan=%d flags=%d",
	       fname, statusChan, flags);

  cc = call_server (masterHost,
		    mbd_port,
		    request_buf,
		    XDR_GETPOS (&xdrs),
		    &reply_buf,
		    &hdr,
		    connTimeout, readTimeout, &statusChan, NULL, NULL, flags);
  if (cc < 0)
    {
      statusChan = -1;
      if (!equalHost_ (masterHost, lastHost))
	{
	  if (errno != EINTR)
	    ls_syslog (LOG_DEBUG,
		       "%s: Failed to reach mbatchd on host <%s> for job <%s>: %s",
		       fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId),
		       lsb_sysmsg ());
	  strcpy (lastHost, masterHost);
	}
      xdr_destroy (&xdrs);
      FREEUP (request_buf);
      failcnt++;
      return (-1);
    }
  else if (cc == 0)
    {

    }

  failcnt = 0;
  lastHost[0] = '\0';
  xdr_destroy (&xdrs);
  FREEUP (request_buf);

  if (cc)
    free (reply_buf);

  if (flags & CALL_SERVER_NO_WAIT_REPLY)
    {

      struct timeval timeval;

      timeval.tv_sec = 0;
      timeval.tv_usec = 0;

      if (rd_select_ (chanSock_ (statusChan), &timeval) == 0)
	{
	  jp->needReportRU = FALSE;
	  jp->lastStatusMbdTime = now;
	  return 0;
	}

      CLOSECD (statusChan);

      if (logclass & LC_COMM)
	ls_syslog (LOG_DEBUG1,
		   "%s: Job <%s> rd_select() failed, assume connection broken",
		   fname, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
  reply = hdr.opCode;
  switch (reply)
    {
    case LSBE_NO_ERROR:
    case LSBE_LOCK_JOB:
      jp->needReportRU = FALSE;
      jp->lastStatusMbdTime = now;
      if (reply == LSBE_LOCK_JOB)
	{
	  if (IS_SUSP (jp->jobSpecs.jStatus))
	    jp->jobSpecs.reasons |= SUSP_MBD_LOCK;
	  else
	    ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5204, "%s: Job <%s> is in status <%x> and mbatchd wants to lock it, ignored."),	/* catgets 5204 */
		       fname,
		       lsb_jobid2str (jp->jobSpecs.jobId),
		       jp->jobSpecs.jStatus);
	}
      return (0);
    case LSBE_NO_JOB:
      if (!IS_POST_FINISH (jp->jobSpecs.jStatus))
	{
	  ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5205, "%s: Job <%s> is forgotten by mbatchd on host <%s>, ignored."), fname, lsb_jobid2str (jp->jobSpecs.jobId), masterHost);	/* catgets 5205 */
	}

      jp->notReported = -INFINIT_INT;
      return (0);
    case LSBE_STOP_JOB:
      if (jobsig (jp, SIGSTOP, TRUE) < 0)
	SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_EXIT);
      else
	{
	  SET_STATE (jp->jobSpecs.jStatus, JOB_STAT_USUSP);
	  jp->jobSpecs.reasons |= SUSP_USER_STOP;
	}
      return (-1);
    case LSBE_SBATCHD:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5206, "%s: mbatchd on host <%s> doesn't think I'm configured as a batch server when I report the status for job <%s>"),	/* catgets 5206 */
		 fname, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    default:
      ls_syslog (LOG_ERR, _i18n_msg_get (ls_catd, NL_SETN, 5207, "%s: Illegal reply code <%d> from mbatchd on host <%s> for job <%s>"),	/* catgets 5207 */
		 fname,
		 reply, masterHost, lsb_jobid2str (jp->jobSpecs.jobId));
      return (-1);
    }
}
Пример #15
0
struct jRusage *getJInfo_(int npgid, int *pgid, int options, int cpgid)
{
    static char fname[] = "lib.pim.c/getJInfo_()";
    struct jRusage *jru;

    static char pfile[MAXFILENAMELEN];
    char *myHost;
    static struct sockaddr_in pimAddr;
    struct LSFHeader sendHdr, recvHdr, hdrBuf;
    int s, cc;
    struct timeval timeOut;
    static time_t lastTime = 0, lastUpdateNow = 0;
    time_t now;
    static time_t pimSleepTime = PIM_SLEEP_TIME;
    static bool_t periodicUpdateOnly = FALSE;
    
    now = time(0);

    if (logclass & LC_PIM)
       ls_syslog(LOG_DEBUG3,"now = %ld, lastTime = %ld, sleepTime = %ld",
	   now, lastUpdateNow, pimSleepTime);
    argOptions = options;

    if (lastTime == 0) {
	struct hostent *hp;
        struct config_param *plp;

        for (plp = pimParams; plp->paramName != NULL; plp++) {
             if (plp->paramValue != NULL) {
                 FREEUP (plp->paramValue);
             }
        }
 	
	if (initenv_(pimParams, NULL) < 0) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG, "%s: initenv_() failed: %M", fname);
	    return NULL;
	}

	if ((myHost = ls_getmyhostname()) == NULL) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG,
			  "%s: ls_getmyhostname() failed: %m", fname);
	    return (NULL);
	}
	
	if (pimParams[LSF_PIM_INFODIR].paramValue)
	    sprintf(pfile, "%s/pim.info.%s",
		    pimParams[LSF_PIM_INFODIR].paramValue, myHost);
	else {
	    if (pimParams[LSF_LIM_DEBUG].paramValue) {
		if (pimParams[LSF_LOGDIR].paramValue)
		    sprintf(pfile, "%s/pim.info.%s",
			    pimParams[LSF_LOGDIR].paramValue, myHost);
		else
		    sprintf(pfile, "/tmp/pim.info.%s.%d", myHost, (int)getuid());
	    } else {
		sprintf(pfile, "/tmp/pim.info.%s", myHost);
	    }
	}
	
	if (pimParams[LSF_PIM_SLEEPTIME].paramValue) {
	    if ((pimSleepTime =
		 atoi(pimParams[LSF_PIM_SLEEPTIME].paramValue)) < 0) {
		if (logclass & LC_PIM)
		    ls_syslog(LOG_DEBUG, "LSF_PIM_SLEEPTIME value <%s> must be a positive integer, defaulting to %d", pimParams[LSF_PIM_SLEEPTIME].paramValue, PIM_SLEEP_TIME);
		pimSleepTime = PIM_SLEEP_TIME;
	    }
	}

        if (pimParams[LSF_PIM_SLEEPTIME_UPDATE].paramValue != NULL
            && strcasecmp(pimParams[LSF_PIM_SLEEPTIME_UPDATE].paramValue, "y") == 0) { 
            periodicUpdateOnly = TRUE;
            if (logclass & LC_PIM)
                ls_syslog(LOG_DEBUG, "%s: Only to call pim each PIM_SLEEP_TIME interval", fname);
        }

	if ((hp = Gethostbyname_(myHost)) == NULL) {
	    return NULL;
	}

	memset((char *) &pimAddr, 0, sizeof(pimAddr));
	memcpy((char *) &pimAddr.sin_addr, (char *) hp->h_addr,
	       (int)hp->h_length);
	pimAddr.sin_family = AF_INET;
    }


    if (now - lastUpdateNow >= pimSleepTime || (options & PIM_API_UPDATE_NOW)) {
        if (logclass & LC_PIM)
	    ls_syslog(LOG_DEBUG,"%s: update now", fname);
	lastUpdateNow = now;	

	if ((s = TcpCreate_(FALSE, 0)) < 0) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG, "%s: tcpCreate failed: %m", fname);
	    return (NULL);
	}

	if (pimPort(&pimAddr, pfile) == -1) {
	    close(s);
	    return (NULL);
	}
	
	if (b_connect_(s, (struct sockaddr *) &pimAddr, sizeof(pimAddr), 0)
	    == -1) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG, "%s: b_connect() failed: %m", fname);
	    lserrno = LSE_CONN_SYS;
	    close(s);
	    return (NULL);
	}

	initLSFHeader_(&sendHdr);
	initLSFHeader_(&recvHdr);
    
	sendHdr.opCode = options;
	sendHdr.refCode = (short) now & 0xffff;
	sendHdr.reserved = cpgid;

	if ((cc = writeEncodeHdr_(s, &sendHdr, b_write_fix)) < 0) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG,
			  "%s: writeEncodeHdr failed cc=%d: %M", fname, cc);
	    close(s);
	    return (NULL);
	}

	timeOut.tv_sec = 10;
	timeOut.tv_usec = 0;
	if ((cc = rd_select_(s, &timeOut)) < 0) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG, "%s: rd_select_ cc=%d: %m", fname, cc);
	    close(s);
	    return (NULL);
	}

	if ((cc = lsRecvMsg_(s, (char *) &hdrBuf, sizeof(hdrBuf), &recvHdr,
			     NULL, NULL, b_read_fix)) < 0) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG, "%s: lsRecvMsg_ failed cc=%d: %M",
			  fname, cc);
	    close(s);
	    return (NULL);
	}
	close(s);

	if (recvHdr.refCode != sendHdr.refCode) {
	    if (logclass & LC_PIM)
		ls_syslog(LOG_DEBUG,
			  "%s: recv refCode=%d not equal to send refCode=%d, server is not PIM",
			  fname, (int) recvHdr.refCode, (int) sendHdr.refCode);
	    return (NULL);
	}
        if (logclass & LC_PIM)
	    ls_syslog(LOG_DEBUG,"%s updated now",fname);
	if (!readPIMFile(pfile)) {
		ls_syslog(LOG_ERR, I18N_FUNC_FAIL,  fname, "readPIMFile"); 
		return(NULL);
	}
    }

    lastTime = now;

    if ((jru = readPIMInfo(npgid, pgid)) == NULL &&
	!(options & PIM_API_UPDATE_NOW) 
	&& (periodicUpdateOnly == FALSE
	    || (periodicUpdateOnly == TRUE
		&& now - lastUpdateNow >= pimSleepTime))) {
	if (hitPGid > 0) {
	   jru = getJInfo_(npgid, pgid, options | PIM_API_UPDATE_NOW, hitPGid);
	   hitPGid = 0;
	   return jru;
	}
	else {
	   return (getJInfo_(npgid, pgid, options | PIM_API_UPDATE_NOW, cpgid));
	}
    }
    return jru;
	
	
} 
Пример #16
0
void
do_switchjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_switchjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSpecs    jobSpecs;
    struct jobReply    jobReply;
    int                i;
    sbdReplyType       reply;
    char               *cp;
    char               *word;
    char               found = FALSE;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct jobCard     *jp;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));

    if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) {
        reply = ERR_BAD_REQ;
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
        goto sendReply;
    }
    for (jp = jobQueHead->back; jp != jobQueHead; jp = jp->back) {
        if (jp->jobSpecs.jobId == jobSpecs.jobId) {
            found = TRUE;
            break;
        }
    }
    if (!found) {
        reply = ERR_NO_JOB;
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5807,
                                         "%s: mbatchd trying to switch a non-existent job <%s>"), fname, lsb_jobid2str(jobSpecs.jobId)); /* catgets 5807 */
        goto sendReply;
    }
    if (jp->jobSpecs.jStatus & (JOB_STAT_DONE | JOB_STAT_EXIT)) {
        reply = ERR_JOB_FINISH;
        goto sendReply;
    }


    cp = jobSpecs.windows;
    freeWeek(jp->week);
    while ((word = getNextWord_(&cp)) != NULL) {
        if (addWindow(word, jp->week, "switchJob jobSpecs") < 0) {
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_M, fname,
                      lsb_jobid2str(jp->jobSpecs.jobId), "addWindow", word);
            freeWeek(jp->week);
            reply = ERR_BAD_REQ;
            goto sendReply;
        }
    }
    jp->windEdge = now;


    if ((jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE)
	&& !(jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE))
	for (i = 0; i < jp->jobSpecs.numToHosts; i++)
	    if (unlockHost_(jp->jobSpecs.toHosts[i]) < 0
		&& lserrno != LSE_LIM_NLOCKED)
		ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S_MM, fname,
                          lsb_jobid2str(jp->jobSpecs.jobId), "unlockHost_", jp->jobSpecs.toHosts[i]);



    strcpy(jp->jobSpecs.queue, jobSpecs.queue);
    strcpy(jp->jobSpecs.windows, jobSpecs.windows);
    jp->jobSpecs.priority = jobSpecs.priority;
    jp->jobSpecs.nice = jobSpecs.nice;
    jp->jobSpecs.jAttrib = jobSpecs.jAttrib;

    freeThresholds (&jp->jobSpecs.thresholds);
    saveThresholds (&jp->jobSpecs, &jobSpecs.thresholds);


    memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   sizeof(struct lsfLimit));


    strcpy (jp->jobSpecs.requeueEValues, jobSpecs.requeueEValues);
    strcpy (jp->jobSpecs.resumeCond, jobSpecs.resumeCond);
    strcpy (jp->jobSpecs.stopCond, jobSpecs.stopCond);

    lsbFreeResVal (&jp->resumeCondVal);
    if (jobSpecs.resumeCond && jobSpecs.resumeCond[0] != '\0') {
        if ((jp->resumeCondVal = checkThresholdCond (jobSpecs.resumeCond))
            == NULL)
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname,
		      lsb_jobid2str(jp->jobSpecs.jobId),
		      "checkThresholdCond", jobSpecs.resumeCond);
    }

    lsbFreeResVal (&jp->stopCondVal);
    if (jobSpecs.stopCond && jobSpecs.stopCond[0] != '\0') {
        if ((jp->stopCondVal = checkThresholdCond (jobSpecs.stopCond))
            == NULL)
            ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_S, fname,
		      lsb_jobid2str(jp->jobSpecs.jobId),
		      "checkThresholdCond", jobSpecs.stopCond);
    }

    if (jobSpecs.options & SUB_LOGIN_SHELL) {
	FREEUP (jp->jobSpecs.loginShell);
	jp->jobSpecs.loginShell = safeSave (jobSpecs.loginShell);
    }

    strcpy (jp->jobSpecs.suspendActCmd, jobSpecs.suspendActCmd);
    strcpy (jp->jobSpecs.resumeActCmd, jobSpecs.resumeActCmd);
    strcpy (jp->jobSpecs.terminateActCmd, jobSpecs.terminateActCmd);

    setRunLimit (jp, FALSE);
    offList ((struct listEntry *)jp);
    inJobLink (jp);

    if (reniceJob(jp) < 0)
	ls_syslog(LOG_DEBUG, "%s: renice job <%s> failed",
		  fname, lsb_jobid2str(jp->jobSpecs.jobId));

    reply = ERR_NO_ERROR;
    jobReply.jobId = jp->jobSpecs.jobId;
    jobReply.jobPid = jp->jobSpecs.jobPid;
    jobReply.jobPGid = jp->jobSpecs.jobPGid;
    jobReply.jStatus = jp->jobSpecs.jStatus;

sendReply:
    xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    if (reply == ERR_NO_ERROR)
	replyStruct = (char *) &jobReply;
    else {
	replyStruct = (char *) 0;
    }

    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId),
		  "xdr_jobReply");
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId), "chanWrite_");
    }

    xdr_destroy(&xdrs2);

    return;

}
Пример #17
0
int 
lsb_switchjob (LS_LONG_INT jobId, char *queue)
{
    struct jobSwitchReq jobSwitchReq;
    char request_buf[MSGSIZE];
    char *reply_buf;
    XDR xdrs;
    mbdReqType mbdReqtype;
    int cc;
    struct LSFHeader hdr;
    struct lsfAuth auth;


    if (jobId <= 0 || queue == 0) {
	lsberrno = LSBE_BAD_ARG;
	return(-1);
    }
    if (queue && (strlen (queue) >= MAX_LSB_NAME_LEN - 1)) {
        lsberrno = LSBE_BAD_QUEUE;
        return(-1);
    }


    if (authTicketTokens_(&auth, NULL) == -1)
	return (-1);
    
    jobSwitchReq.jobId = jobId;
    strcpy (jobSwitchReq.queue, queue);


    
    
    mbdReqtype = BATCH_JOB_SWITCH;
    xdrmem_create(&xdrs, request_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&hdr);
    hdr.opCode = mbdReqtype;
    if (!xdr_encodeMsg(&xdrs, (char *)&jobSwitchReq, &hdr, xdr_jobSwitchReq,
		       0, &auth)) {
        lsberrno = LSBE_XDR;
        return(-1);
    }

    
    if ((cc = callmbd (NULL, request_buf, XDR_GETPOS(&xdrs), &reply_buf, 
                       &hdr, NULL, NULL, NULL)) == -1)
    {
	xdr_destroy(&xdrs);
	return (-1);
    }

    xdr_destroy(&xdrs);
    lsberrno = hdr.opCode;
    if (cc)
	free(reply_buf);

    if (lsberrno == LSBE_NO_ERROR)
        return(0);
    else
	return(-1);

} 
Пример #18
0
struct lsbSharedResourceInfo *
lsb_sharedresourceinfo(char **resources, int *numResources, char *hostName, int options)
{
    static char fname[] = "lsb_sharedresourceinfo";
    static struct lsbShareResourceInfoReply lsbResourceInfoReply;
    struct resourceInfoReq  resourceInfoReq;
    int cc = 0, i;
    char *clusterName = NULL;
    static struct LSFHeader hdr;
    char *request_buf;
    char *reply_buf;
    mbdReqType mbdReqtype;
    XDR xdrs, xdrs2;

#define FREE_MEMORY \
    { \
	free(request_buf); \
	free(resourceInfoReq.resourceNames); \
    }

#define FREE_REPLY_BUFFER \
    { \
	if (cc) \
	    free(reply_buf); \
    }


    if (logclass & (LC_TRACE))
        ls_syslog(LOG_DEBUG1, "%s: Entering this routine...", fname);

    if (lsbResourceInfoReply.numResources > 0)
	xdr_lsffree(xdr_lsbShareResourceInfoReply, (char *)&lsbResourceInfoReply, &hdr);
	
    if (numResources == NULL ||
	*numResources < 0 ||
	(resources == NULL && *numResources != 0) ||
	(resources != NULL && *numResources == 0)) {
	lsberrno = LSBE_BAD_ARG;
	return (NULL);
    }
    resourceInfoReq.options = 0;

    if (*numResources == 0) {
	
	if ((resourceInfoReq.resourceNames = 
		  (char **) malloc(sizeof (char *))) == NULL) {
             lsberrno = LSBE_NO_MEM;
             return (NULL);
	}
        resourceInfoReq.resourceNames[0] = ""; 
	resourceInfoReq.numResourceNames = 1;
        cc += 2;
    } else {
        if ((resourceInfoReq.resourceNames = 
	       (char **) malloc (*numResources * sizeof(char *))) == NULL) {
             lsberrno = LSBE_NO_MEM;
             return(NULL);
        }
        for (i = 0; i < *numResources; i++) {
            if (resources[i] && strlen (resources[i]) + 1 < MAXLSFNAMELEN) {
                resourceInfoReq.resourceNames[i] = resources[i]; 
                cc += MAXLSFNAMELEN;
            } else {
		free (resourceInfoReq.resourceNames);
		lserrno = LSBE_BAD_RESOURCE;
		return (NULL);
            }
        }
	resourceInfoReq.numResourceNames = *numResources;
    }
    if (hostName != NULL) {
        if (ls_isclustername(hostName) <= 0) {          
            if (strlen (hostName) + 1 < MAXHOSTNAMELEN) {
                resourceInfoReq.hostName = hostName;
            } else {
                lsberrno = LSBE_BAD_HOST;
                return (NULL);
            }
        } else {                      
            clusterName = hostName;
            cc += MAXHOSTNAMELEN;
        }
    } else
        resourceInfoReq.hostName = " ";

    

    mbdReqtype = BATCH_RESOURCE_INFO;
    cc = sizeof(struct resourceInfoReq) + cc + 100;
    if ((request_buf = malloc (cc)) == NULL) {
        lsberrno = LSBE_NO_MEM;
        return(NULL);
    }
    xdrmem_create(&xdrs, request_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&hdr);
    hdr.opCode = mbdReqtype;
    if (!xdr_encodeMsg(&xdrs, (char*)&resourceInfoReq, &hdr, xdr_resourceInfoReq,
                       0, NULL)) {
        lsberrno = LSBE_XDR;
        xdr_destroy(&xdrs);
        FREE_MEMORY;
        return(NULL);
    }
 
    
    if ((cc = callmbd(clusterName, request_buf, XDR_GETPOS(&xdrs), &reply_buf,
                      &hdr, NULL, NULL, NULL)) == -1)
    {
        xdr_destroy(&xdrs);
        FREE_MEMORY;
        return (NULL);
    }
    FREE_MEMORY;

    
    lsberrno = hdr.opCode;
    if (lsberrno == LSBE_NO_ERROR) {
        xdrmem_create(&xdrs2, reply_buf, XDR_DECODE_SIZE_(cc), XDR_DECODE);
        if (!xdr_lsbShareResourceInfoReply(&xdrs2, &lsbResourceInfoReply, &hdr)) {
            lsberrno = LSBE_XDR;
            xdr_destroy(&xdrs2);
            FREE_REPLY_BUFFER;
            return(NULL);
        }
        xdr_destroy(&xdrs2);
        FREE_REPLY_BUFFER;
        *numResources = lsbResourceInfoReply.numResources;
        return (lsbResourceInfoReply.resources);
    }

    FREE_REPLY_BUFFER;
    return (NULL);
} 
Пример #19
0
void
do_newjob(XDR *xdrs, int chfd, struct LSFHeader *reqHdr)
{
    static char        fname[] = "do_newjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSpecs    jobSpecs;
    struct jobReply    jobReply;
    struct jobCard     *jp;
    sbdReplyType       reply;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));

    if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) {
	reply = ERR_BAD_REQ;
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
	goto sendReply;
    }


    for (jp = jobQueHead->forw; (jp != jobQueHead); jp = jp->forw) {
        if (jp->jobSpecs.jobId == jobSpecs.jobId) {

	    jobReply.jobId = jp->jobSpecs.jobId;
	    jobReply.jobPid = jp->jobSpecs.jobPid;
	    jobReply.jobPGid = jp->jobSpecs.jobPGid;
	    jobReply.jStatus = jp->jobSpecs.jStatus;
	    reply = ERR_NO_ERROR;
	    goto sendReply;
	}
    }

    jp = calloc(1, sizeof(struct jobCard));
    if (jp == NULL) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
                  lsb_jobid2str(jobSpecs.jobId), "calloc");
	reply = ERR_MEM;
	goto sendReply;
    }

    memcpy((char *) &jp->jobSpecs, (char *) &jobSpecs,
	   sizeof(struct jobSpecs));

    jp->jobSpecs.jStatus &= ~JOB_STAT_MIG;
    jp->jobSpecs.startTime = now;
    jp->jobSpecs.reasons = 0;
    jp->jobSpecs.subreasons = 0;
    /* Initialize the core number
     */
    jp->core_num = -1;

    if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) {
	if (lockHosts (jp) < 0) {
	    ls_syslog(LOG_ERR, I18N_JOB_FAIL_S, fname,
                      lsb_jobid2str(jp->jobSpecs.jobId), "lockHosts");
            unlockHosts (jp, jp->jobSpecs.numToHosts);
	    reply = ERR_LOCK_FAIL;
	    freeWeek(jp->week);
	    FREEUP(jp);
	    goto sendReply;
        }
    }
    jp->runTime = 0;
    if (initJobCard(jp, &jobSpecs, (int *)&reply) < 0) {

	if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) {
	    unlockHosts (jp, jp->jobSpecs.numToHosts);
	}
	FREEUP(jp);
	goto sendReply;
    }

    jp->execJobFlag = 0;

    if (jp->runTime < 0) {
        jp->runTime = 0;
    }
    jp->execGid = 0;
    jp->execUsername[0] = '\0';
    jp->jobSpecs.execUid = -1;
    jp->jobSpecs.execUsername[0] = '\0';

    if (jp->jobSpecs.jobSpoolDir[0] != '\0') {
        char *tmp;

        if ((tmp = getUnixSpoolDir (jp->jobSpecs.jobSpoolDir)) == NULL) {

            jp->jobSpecs.jobSpoolDir[0] = '\0';
        }
    }

    if ((logclass & LC_TRACE) && jp->jobSpecs.jobSpoolDir[0] != 0) {
        ls_syslog(LOG_DEBUG,
                  "%s: the SpoolDir for  job <%s>  is %s \n",
                  fname, lsb_jobid2str(jp->jobSpecs.jobId),
                  jp->jobSpecs.jobSpoolDir);
    }
    if (jp->jobSpecs.options & SUB_PRE_EXEC)
	SBD_SET_STATE(jp, (JOB_STAT_RUN | JOB_STAT_PRE_EXEC))
        else
            SBD_SET_STATE(jp, JOB_STAT_RUN);

    reply = job_exec(jp, chfd);

    if (reply != ERR_NO_ERROR) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S, fname,
                  lsb_jobid2str(jp->jobSpecs.jobId), "job_exec");
	if (jp->jobSpecs.jAttrib & Q_ATTRIB_EXCLUSIVE) {
            unlockHosts (jp, jp->jobSpecs.numToHosts);
	}
	deallocJobCard(jp);
    } else {
	jobReply.jobId = jp->jobSpecs.jobId;
	jobReply.jobPid = jp->jobSpecs.jobPid;
	jobReply.jobPGid = jp->jobSpecs.jobPGid;
	jobReply.jStatus = jp->jobSpecs.jStatus;
    }


sendReply:
    xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    replyStruct = (reply == ERR_NO_ERROR) ? (char *) &jobReply : (char *) NULL;
    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobReply");
	lsb_merr(_i18n_msg_get(ls_catd , NL_SETN, 5804,
			       "Fatal error: xdr_jobReply() failed; sbatchd relifing")); /* catgets 5804 */
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5805,
					 "%s: Sending jobReply (len=%d) to master failed: %m"), /* catgets 5805 */
		  fname, XDR_GETPOS(&xdrs2));
    }

    xdr_destroy(&xdrs2);


    if (reply == ERR_NO_ERROR && !daemonParams[LSB_BSUBI_OLD].paramValue &&
	PURE_INTERACTIVE(&jp->jobSpecs)) {
  	if (status_job (BATCH_STATUS_JOB, jp, jp->jobSpecs.jStatus,
		        ERR_NO_ERROR) < 0) {
            jp->notReported++;
	}
    }

}
Пример #20
0
void
do_sigjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_sigjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSig      jobSig;
    sbdReplyType       reply;
    struct jobReply    jobReply;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct jobCard     *jp = NULL;
    char               found = FALSE;
    int                cc;
    int                sigValue;
    int                savedActReasons;
    int                savedActSubReasons;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));
    if (!xdr_jobSig(xdrs, &jobSig, reqHdr)) {
	reply = ERR_BAD_REQ;
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSig");

	goto Reply1;
    }

    jobSig.sigValue = sig_decode(jobSig.sigValue);
    sigValue = jobSig.sigValue;

    if (logclass & LC_SIGNAL)
        ls_syslog(LOG_DEBUG, "do_sigJob: sigValue =%d", sigValue);

    for (jp = jobQueHead->forw; (jp != jobQueHead); jp = jp->forw) {
        if (jp->jobSpecs.jobId != jobSig.jobId)
            continue;
        found = TRUE;
        break;
    }
    if (found == FALSE) {
        reply = ERR_NO_JOB;
        jp = NULL;
        goto Reply1;
    }

    if (jobSig.reasons & SUSP_MBD_LOCK) {

        jp->jobSpecs.reasons = jobSig.reasons;
        jp->jobSpecs.subreasons = jobSig.subReasons;
        savedActReasons = jp->actReasons;
        savedActSubReasons = jp->actSubReasons;
        jp->actReasons = jobSig.reasons;
        jp->actSubReasons = jobSig.subReasons;
    }


    if (jp->postJobStarted) {
        reply = ERR_NO_ERROR;
        goto Reply1;
    }

    if (IS_FINISH(jp->jobSpecs.jStatus)) {
        reply = ERR_NO_ERROR;
        goto Reply1;
    }

    if (jp->jobSpecs.jobPGid == -1) {
        SBD_SET_STATE(jp, JOB_STAT_EXIT);
        reply = ERR_NO_ERROR;
        goto Reply;
    }

    if (!JOB_STARTED(jp)) {
        if (isSigTerm(sigValue) == TRUE) {
            if ((cc = jobSigStart (jp, sigValue, jobSig.actFlags, jobSig.chkPeriod, NO_SIGLOG)) < 0)
           	reply = ERR_SIG_RETRY;
    	    else
            	reply = ERR_NO_ERROR;

            goto Reply;
        }

        reply = ERR_SIG_RETRY;

        if (logclass & LC_EXEC)
            ls_syslog(LOG_DEBUG, "%s: Retry signal %s for job <%s>",
                      fname, getLsbSigSymbol(sigValue),
                      lsb_jobid2str(jp->jobSpecs.jobId));
        goto Reply1;
    }

    if (IS_PEND(jp->jobSpecs.jStatus)) {
        reply = ERR_SIG_RETRY;
        goto Reply1;
    }

    if (jp->jobSpecs.actPid || (jp->jobSpecs.jStatus & JOB_STAT_MIG)) {

        if ((cc = jobSigStart(jp,
                              sigValue,
                              jobSig.actFlags,
                              jobSig.chkPeriod,
                              NO_SIGLOG)) < 0)
            reply = ERR_SIG_RETRY;
        else {
            jp->jobSpecs.jStatus &= ~JOB_STAT_MIG;
            reply = ERR_NO_ERROR;
        }
        goto Reply;
    }

    if ((cc = jobSigStart(jp,
                          sigValue,
                          jobSig.actFlags,
                          jobSig.chkPeriod,
                          NO_SIGLOG)) < 0)
        reply = ERR_SIG_RETRY;
    else
        reply = ERR_NO_ERROR;

Reply:
    sbdlog_newstatus(jp);

Reply1:

    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);

    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    if (reply == ERR_NO_ERROR) {
        jobReply.jobPid = jp->jobSpecs.jobPid;
        jobReply.actPid = jp->jobSpecs.actPid;
        jobReply.jobId = jp->jobSpecs.jobId;
        jobReply.jobPGid = jp->jobSpecs.jobPGid;
        jobReply.jStatus = jp->jobSpecs.jStatus;
        jobReply.reasons = jp->jobSpecs.reasons;
        jobReply.actStatus = jp->actStatus;
        replyStruct = (char *) &jobReply;
    } else {
        if (reply != ERR_NO_JOB)
            if  ((jp != NULL) && (jobSig.reasons & SUSP_MBD_LOCK)) {
                jp->actReasons = savedActReasons;
                jp->actSubReasons = savedActSubReasons;
            }
        replyStruct = (char *) 0;
    }

    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
        ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
                  lsb_jobid2str(jp->jobSpecs.jobId), "xdr_jobReply");
        relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5821,
                                         "%s: Sending jobReply (len=%d) to master failed for job <%s>: %m"), fname, XDR_GETPOS(&xdrs2), lsb_jobid2str(jobSig.jobId)); /* catgets 5821 */
    }
    if (jp != NULL)
        jp->actStatus = ACT_NO;

    xdr_destroy(&xdrs2);

    return;
}
Пример #21
0
void
do_probe(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char         fname[] = "do_probe()";
    char                reply_buf[MSGSIZE];
    XDR                 xdrs2;
    struct LSFHeader    replyHdr;
    struct sbdPackage   sbdPackage;
    struct jobSpecs     *jobSpecs;
    int                 i;
    struct lsfAuth      *auth = NULL;

    if (reqHdr->length == 0)
        return;

    initLSFHeader_(&replyHdr);
    replyHdr.opCode = ERR_NO_ERROR;
    jobSpecs = NULL;

    if (!xdr_sbdPackage(xdrs, &sbdPackage, reqHdr)) {
        ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_sbdPackage");
        relife();
    } else {
        if (sbdPackage.numJobs) {
            jobSpecs = my_calloc(sbdPackage.numJobs,
                                 sizeof(struct jobSpecs), fname);
            for (i = 0; i < sbdPackage.numJobs; i++) {
                if (!xdr_arrayElement(xdrs, (char *) &(jobSpecs[i]),
                                      reqHdr, xdr_jobSpecs)) {
                    replyHdr.opCode = ERR_BAD_REQ;
                    ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5815,
                                                     "%s: %s(%d) failed for %d jobs"), /* catgets 5815 */
                              fname, "xdr_arrayElement", i, sbdPackage.numJobs);
                    break;
                }
                refreshJob(&(jobSpecs[i]));
                xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs[i], reqHdr);
            }
        }
    }
    if (replyHdr.opCode == ERR_NO_ERROR)
	if (!xdr_sbdPackage1(xdrs, &sbdPackage, reqHdr)) {
	    ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_sbdPackage1");
	    relife();
	}
    if (replyHdr.opCode == ERR_NO_ERROR) {
        if (myStatus & NO_LIM) {
	    replyHdr.opCode = ERR_NO_LIM;
        }
    }
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);

    if (!xdr_encodeMsg(&xdrs2, NULL, &replyHdr, NULL, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_encodeMsg");
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL_M, fname, "chanWrite_");
    }

    xdr_destroy(&xdrs2);

    if (jobSpecs != NULL)
	free(jobSpecs);


    getManagerId(&sbdPackage);

    mbdPid = sbdPackage.mbdPid;
    sbdSleepTime = sbdPackage.sbdSleepTime;
    retryIntvl = sbdPackage.retryIntvl;
    preemPeriod = sbdPackage.preemPeriod;
    pgSuspIdleT = sbdPackage.pgSuspIdleT;
    maxJobs = sbdPackage.maxJobs;
    uJobLimit = sbdPackage.uJobLimit;
    rusageUpdateRate = sbdPackage.rusageUpdateRate;
    rusageUpdatePercent = sbdPackage.rusageUpdatePercent;
    jobTerminateInterval = sbdPackage.jobTerminateInterval;


    for (i = 0; i < sbdPackage.nAdmins; i++)
	FREEUP(sbdPackage.admins[i]);
    FREEUP(sbdPackage.admins);

    return;
}
Пример #22
0
void
do_modifyjob(XDR * xdrs, int chfd, struct LSFHeader * reqHdr)
{
    static char        fname[] = "do_switchjob()";
    char               reply_buf[MSGSIZE];
    XDR                xdrs2;
    struct jobSpecs    jobSpecs;
    struct jobReply    jobReply;
    sbdReplyType       reply;
    char               found = FALSE;
    struct LSFHeader   replyHdr;
    char               *replyStruct;
    struct jobCard     *jp;
    struct lsfAuth     *auth = NULL;

    memset(&jobReply, 0, sizeof(struct jobReply));

    if (!xdr_jobSpecs(xdrs, &jobSpecs, reqHdr)) {
	reply = ERR_BAD_REQ;
	ls_syslog(LOG_ERR, I18N_FUNC_FAIL, fname, "xdr_jobSpecs");
	goto sendReply;
    }
    for (jp = jobQueHead->back; jp != jobQueHead; jp = jp->back)
	if (jp->jobSpecs.jobId == jobSpecs.jobId) {
	    found = TRUE;
	    break;
	}
    if (!found) {
	reply = ERR_NO_JOB;
	ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5808,
                                         "%s: mbatchd trying to modify a non-existent job <%s>"), fname, lsb_jobid2str(jobSpecs.jobId)); /* catgets 5808 */
	goto sendReply;
    }
    if (jp->jobSpecs.jStatus & (JOB_STAT_DONE | JOB_STAT_EXIT)) {
	reply = ERR_JOB_FINISH;
	goto sendReply;
    }
    if ((lsbJobCpuLimit != 1) &&
	((jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_maxh) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_CPU].rlim_curh)
	    )) {
        ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd, NL_SETN, 5809, "%s, LSB_JOB_CPULIMIT is not set for the host, job <%s>, CPU limit not modified"), fname, lsb_jobid2str(jobSpecs.jobId));
    } else {
	memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_CPU],
	       (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_CPU],
	       sizeof(struct lsfLimit));
    }
    if ((lsbJobMemLimit != 1) &&
	((jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_maxh) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curl
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curl) ||
	 (jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curh
	  != jobSpecs.lsfLimits[LSF_RLIMIT_RSS].rlim_curh)
	    )) {
	ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd, NL_SETN, 5810, "%s, LSB_JOB_MEMLIMIT is not set for the host, job <%s>, memory limit not modified"), fname, lsb_jobid2str(jobSpecs.jobId));
    } else {
	memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RSS],
	       (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RSS],
	       sizeof(struct lsfLimit));
    }

    memcpy((char *) &jp->jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   (char *) &jobSpecs.lsfLimits[LSF_RLIMIT_RUN],
	   sizeof(struct lsfLimit));
    setRunLimit(jp, FALSE);
    if (strcmp(jp->jobSpecs.outFile, jobSpecs.outFile) ||
	!(strcmp(jobSpecs.outFile, "/dev/null")))
    {
	strcpy(jp->jobSpecs.outFile, jobSpecs.outFile);
	if (strcmp(jobSpecs.outFile, "/dev/null") ||
	    (jobSpecs.options & SUB_OUT_FILE)) {
	    jp->jobSpecs.options |= SUB_OUT_FILE;
	}
	else {
	    jp->jobSpecs.options &= ~SUB_OUT_FILE;
	}
    }
    if (strcmp(jp->jobSpecs.errFile, jobSpecs.errFile))
    {
	strcpy(jp->jobSpecs.errFile, jobSpecs.errFile);
	if (!strcmp(jp->jobSpecs.errFile, "/dev/null")
	    && !(jobSpecs.options & SUB_ERR_FILE)) {
            jp->jobSpecs.options &= ~SUB_ERR_FILE;
	}
    }

    if (jobSpecs.options & SUB_RERUNNABLE) {
	jp->jobSpecs.options |= SUB_RERUNNABLE;
    } else {
	jp->jobSpecs.options &= ~SUB_RERUNNABLE;
    }

sendReply:
    xdr_lsffree(xdr_jobSpecs, (char *)&jobSpecs, reqHdr);
    xdrmem_create(&xdrs2, reply_buf, MSGSIZE, XDR_ENCODE);
    initLSFHeader_(&replyHdr);
    replyHdr.opCode = reply;
    if (reply == ERR_NO_ERROR)
	replyStruct = (char *) &jobReply;
    else {
	replyStruct = (char *) 0;
    }

    if (!xdr_encodeMsg(&xdrs2, replyStruct, &replyHdr, xdr_jobReply, 0, auth)) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId),
		  "xdr_jobReply");
	relife();
    }

    if (chanWrite_(chfd, reply_buf, XDR_GETPOS(&xdrs2)) <= 0) {
	ls_syslog(LOG_ERR, I18N_JOB_FAIL_S_M, fname,
		  lsb_jobid2str(jp->jobSpecs.jobId), "chanWrite_");
    }

    xdr_destroy(&xdrs2);

    return;

}