Exemple #1
0
struct rudp *rudpOpenBound(struct sockaddr_in *sai)
/* Open up a rudp socket bound to a particular port and address.
 * Use this rather than rudpOpen if you want to wait for
 * messages at a specific address in a server or the like. */
{
struct rudp *ru = rudpOpen();
if (ru != NULL)
    {
    if (bind(ru->socket, (struct sockaddr *)sai, sizeof(*sai)) < 0)
	{
	warn("Couldn't bind rudp socket: %s", strerror(errno));
	rudpClose(&ru);
	}
    }
return ru;
}
Exemple #2
0
void paraNodeStop(char *machineList)
/* Stop node server on all machines in list. */
{
struct lineFile *lf = lineFileOpen(machineList, FALSE);
char *row[1];

while (lineFileRow(lf, row))
    {
    struct rudp *ru = rudpMustOpen();
    char *name = row[0];
    struct paraMessage pm;
    ru->maxRetries = 6;
    printf("Telling %s to quit \n", name);
    pmInitFromName(&pm, name, paraNodePort);
    pmPrintf(&pm, "%s", "quit");
    pmSend(&pm, ru);
    rudpClose(&ru);
    }
}
void paraNode()
/* paraNode - a net server. */
{
char *line;
char *command;
struct sockaddr_in sai;

/* We have to know who we are... */
hostName = getMachine();
initRandom();
getTicksToHundreths();

/* log init */
if (optionExists("log"))
    logOpenFile("paraNode", optionVal("log", NULL));
else    
    logOpenSyslog("paraNode", optionVal("logFacility", NULL));
logSetMinPriority(optionVal("logMinPriority", "info"));
logInfo("starting paraNode on %s", hostName);

/* Make job lists. */
jobsRunning = newDlList();
jobsFinished = newDlList();

/* Set up socket and self to listen to it. */
ZeroVar(&sai);
sai.sin_family = AF_INET;
sai.sin_port = htons(paraNodePort);
sai.sin_addr.s_addr = INADDR_ANY;
mainRudp = rudpMustOpenBound(&sai);
mainRudp->maxRetries = 12;

/* Event loop. */
findNow();
for (;;)
    {
    /* Get next incoming message and optionally check to make
     * sure that it's from a host we trust, and check signature
     * on first bit of incoming data. */
    if (pmReceive(&pmIn, mainRudp))
	{
	findNow();
	if (hubName == NULL || ntohl(pmIn.ipAddress.sin_addr.s_addr) == hubIp 
		|| ntohl(pmIn.ipAddress.sin_addr.s_addr) == localIp)
	    {
	    /* Host and signature look ok,  read a string and
	     * parse out first word as command. */
	    line = pmIn.data;
	    logDebug("message from %s: \"%s\"",
                     paraFormatIp(ntohl(pmIn.ipAddress.sin_addr.s_addr)),
                     line);
	    command = nextWord(&line);
	    if (command != NULL)
		{
		if (sameString("quit", command))
		    break;
		else if (sameString("run", command))
		    doRun(line, &pmIn.ipAddress);
		else if (sameString("jobDone", command))
		    jobDone(line);
		else if (sameString("status", command))
		    doStatus();
		else if (sameString("kill", command))
		    doKill(line);
		else if (sameString("check", command))
		    doCheck(line, &pmIn.ipAddress);
		else if (sameString("resurrect", command))
		    doResurrect(line, &pmIn.ipAddress);
		else if (sameString("listJobs", command))
		    listJobs();
		else if (sameString("fetch", command))
		    doFetch(line);
                else
                    logWarn("invalid command: \"%s\"", command);
		}
	    logDebug("done command");
	    }
	else
	    {
	    logWarn("command from unauthorized host %s",
                    paraFormatIp(ntohl(pmIn.ipAddress.sin_addr.s_addr)));
	    }
	}
    }
rudpClose(&mainRudp);
}
void execProc(char *managingHost, char *jobIdString, char *reserved,
	char *user, char *dir, char *in, char *out, char *err, long long memLimit,
	char *exe, char **params)
/* This routine is the child process of doExec.
 * It spawns a grandchild that actually does the
 * work and waits on it.  It sends message to the
 * main message loop here when done. */
{
if ((grandChildId = forkOrDie()) == 0)
    {
    char *homeDir = "";

    /* Change to given user (if root) */
    changeUid(user, &homeDir);

    /* create output files just after becoming user so that errors in the rest
     * of this proc will go to the err file and be available via para
     * problems */
    setupProcStdio(in, out, err);

    if (chdir(dir) < 0)
        errnoAbort("can't chdir to %s", dir);
    setsid();
    // setpgid(0,0);
    umask(umaskVal); 

    struct rlimit rlim;
    rlim.rlim_cur = rlim.rlim_max = memLimit;
    if(setrlimit(RLIMIT_CORE, &rlim) < 0)
    perror("setrlimit"); 


    /* Update environment. */
        {
	struct hash *hash = environToHash(environ);
	hashUpdate(hash, "JOB_ID", jobIdString);
	hashUpdate(hash, "USER", user);
	hashUpdate(hash, "HOME", homeDir);
	hashUpdate(hash, "HOST", hostName);
	hashUpdate(hash, "PARASOL", "7");
	updatePath(hash, userPath, homeDir, sysPath);
        addEnvExtras(hash);
	environ = hashToEnviron(hash);
	freeHashAndVals(&hash);
	}

    randomSleep();	/* Sleep a random bit before executing this thing
                         * to help spread out i/o when a big batch of jobs
			 * hit idle cluster */
    execvp(exe, params);
    errnoAbort("execvp'ing %s", exe);
    }
else
    {
    /* Wait on executed job and send jobID and status back to 
     * main process. */
    int status = -1;
    int cid;
    struct paraMessage pm;
    struct rudp *ru = NULL;
    struct tms tms;
    unsigned long uTime = 0;
    unsigned long sTime = 0;

    if (grandChildId >= 0)
	{
	signal(SIGTERM, termHandler);
	cid = waitpid(grandChildId, &status, 0);
        if (cid < 0)
            errnoAbort("wait on grandchild failed");
	times(&tms);
	uTime = ticksToHundreths*tms.tms_cutime;
	sTime = ticksToHundreths*tms.tms_cstime;
	}
    ru = rudpOpen();
    if (ru != NULL)
	{
	ru->maxRetries = 20;
	pmInit(&pm, localIp, paraNodePort);
	pmPrintf(&pm, "jobDone %s %s %d %lu %lu", managingHost, 
	    jobIdString, status, uTime, sTime);
	pmSend(&pm, ru);
	rudpClose(&ru);
	}
    }
}