Beispiel #1
0
void
MigPdev_End()
{
    int status;

    if (migPdev_Debug > 2) {
	PRINT_PID;
	fprintf(stderr, "MigPdev_End -\n");
    }

    Pdev_Close(pdev);
    pdev = NULL;

    status = unlink(pdevName);
    if (status == -1) {
	SYSLOG2(LOG_WARNING, "couldn't remove %s: %s\n",
	       pdevName, strerror(errno));
    }
}
Beispiel #2
0
void
get_ps_data(void)
{
    ps_ldata_t *ps_last = PS_LNULL;
    ps_ldata_t *ps_head = PS_LNULL;
    ps_ldata_t *psp;
    ps_data_t  *pstp;
    static char *usrname;
    int i = 0;
    DIR *dirp;
    struct dirent *dentp;
    char pname[MAXNAMELEN];
    int pdlen;
    char *gettty();

    if (pstable != PS_NULL) {  /* Don't run ps unless we need to */
        if ((cache_now - ps_cache_time) <= cache_lifetime)
            return;
        free(pstable);
    }

    pstable_lines = 0;
    ps_cache_time = cache_now;
    /*
     * Determine root path for remote machine.
     */
    if (!readata()) {        /* get data from psfile */
        call_ftw_for_dev();
        wrdata();
    }

    /*
     * Determine which processes to print info about by searching
     * the /proc directory and looking at each process.
     */
    if ((dirp = opendir(procdir)) == NULL) {
        (void) SYSLOG0("Cannot open PROC directory\n");
        return;
    }

    (void) strcpy(pname, procdir);
    pdlen = strlen(pname);
    pname[pdlen++] = '/';

    /* for each active process --- */
    while (dentp = readdir(dirp)) {
        int procfd;

        if (dentp->d_name[0] == '.')                /* skip . and .. */
                continue;
        (void) strcpy(pname + pdlen, dentp->d_name);
retry:
        if ((procfd = open(pname, O_RDONLY)) == -1)
                continue;

        /*
         * Get the info structure for the process and close quickly.
         */
        if (ioctl(procfd, PIOCPSINFO, (char *)&info) == -1) {
            int saverr = errno;

            (void) close(procfd);
            if (saverr == EAGAIN)
                goto retry;
            if (saverr != ENOENT)
                (void) SYSLOG2("PIOCPSINFO on %s: %s\n",
                               pname, strerror(saverr));
            continue;
        }
        (void) close(procfd);
        if ((psp = (ps_ldata_t *)malloc(sizeof (ps_ldata_t))) == PS_LNULL)
            break;
        memset((char *)psp, 0, sizeof (ps_ldata_t));
        psp->pdata.uid = info.pr_uid;
        psp->pdata.pid = info.pr_pid;
        psp->pdata.ppid = info.pr_ppid;
        psp->pdata.sz = info.pr_size;
        if (info.pr_wchan)
            sprintf(psp->pdata.wchan, "%9x", info.pr_wchan);
        else
            strcpy(psp->pdata.wchan, "         ");
        memset(&psp->pdata.stat[0], 0, STAT_SZ+1);
        if (info.pr_sname)
            psp->pdata.stat[0] = info.pr_sname;
        i = 0;
        strcpy(psp->pdata.tty, (char *)gettty(&i));
        psp->pdata.cpu = info.pr_time.tv_sec;
        strcpy(psp->pdata.cmd, info.pr_fname);

        if ((usrname = (get_usr_name(psp->pdata.uid))) != NULL)
            strncpy(psp->pdata.usrname, usrname, USRNM_SZ);
        else {
            free(psp);
            continue;
        }

        psp->pdata.usrname[USRNM_SZ] = '\0';
        pstable_lines++;
        if (ps_last == PS_LNULL)
            ps_head = psp;
        else
            ps_last->link = psp;
        ps_last = psp;
    }

    (void) closedir(dirp);
    if ((pstable = (ps_data_t *)malloc(pstable_lines
                    * sizeof (ps_data_t))) == PS_NULL) {
        clean_ps(ps_head);
        return;
    }
    for (pstp = pstable, psp = ps_head; psp != PS_LNULL;
                                    pstp++, psp = psp->link) {
        memcpy((char *)pstp, (char *)&(psp->pdata), sizeof (ps_data_t));
    }
    clean_ps(ps_head);
    qsort(pstable, pstable_lines, sizeof (ps_data_t), (int (*)())pscomp);
}
Beispiel #3
0
void
Migd_GatherLoad()
{
    int oldAllow;
    int	oldInput;
    int	oldForeign;
    static int iteration = 0;
    int numWritten;
    int error;
    int status;
    
    
    oldAllow = curVecPtr->allowMigration;
    oldInput = curVecPtr->noInput;
    oldForeign = curVecPtr->foreignProcs;
    if (migd_Debug > 2) {
	fprintf(stderr, "Migd_GatherLoad - time %d, oldAllow %d, oldInput %d\n",
	       time((int *) NULL), oldAllow, oldInput);
    }
    GetStats(curVecPtr->lengths, &curVecPtr->noInput,
	     &curVecPtr->foreignProcs);
    curVecPtr->timestamp = time((time_t *)0);
    ExamineLoads(curVecPtr);

    if ((oldInput > migd_Parms.noInput) &&
	(curVecPtr->noInput < migd_Parms.noInput) &&
	!ignoreInput && !migd_NeverEvict && !refuseMigration) {
	Migd_Evict(TRUE);
    }

    /*
     * Send the new load vector to the global daemon periodically,
     * or if our migration status changes, or if the number of
     * foreign processes goes from zero to non-zero or vice-versa.
     * This way the global daemon can track things like the last use
     * of a machine by a process that won't release the host when it
     * finishes.
     */
    if (iteration == 0 || (oldAllow != curVecPtr->allowMigration) ||
	(oldForeign > 0 && curVecPtr->foreignProcs == 0) ||
	(oldForeign == 0 && curVecPtr->foreignProcs > 0)) {
	if (migd_Debug > 2) {
	    fprintf(stderr,
		   "Notifying global server, iteration %d, oldAllow %d, newAllow %d, oldForeign %d, newForeign %d.\n",
		   iteration, oldAllow, curVecPtr->allowMigration,
		   oldForeign, curVecPtr->foreignProcs);
	}
	iteration = 0;

	/*
	 * Get the kernel's variable determining whether to refuse
	 * migrations.  We keep rechecking periodically in case it changes.
	 */

	status = Sys_Stats(SYS_PROC_MIGRATION, SYS_PROC_MIG_GET_STATE,
			   (Address) &migd_Parms.criteria);
	if (status != SUCCESS) {
	    SYSLOG1(LOG_ERR, "Error in Sys_Stats getting migration state: %s.\n",
		    Stat_GetMsg(status));
	    exit(Compat_MapCode(status));
	}
	ParseMigStatus();

	if (curVecPtr->lengths[1] >= 1.0)  {
	    struct timeval tv;
	    struct timeval curTime;
	    
	    /*
	     * The 5-minute load average is over 1.  This could
	     * happen if there is a long-running process but it
	     * also seems to happen without anything running.
	     * Sleep a short period of time to try to
	     * keep from being in lock-step with someone else.  There's
	     * nothing too magical about the number except that it's
	     * intended to be something that other processes are unlikely
	     * to sleep for.  
	     */
	    tv.tv_sec = 0;
	    tv.tv_usec = ((random() % 999) + 1) * 1000;
;
	    if (migd_Debug > 2) {
		if (gettimeofday(&curTime,
				 (struct timezone *) NULL) < 0) {
		    perror("Error in gettimeofday");
		    exit(1);
		}
		fprintf(stderr,
			"Sleeping %d usec to avoid lock step, time %d.%d.\n",
			tv.tv_usec, curTime.tv_sec, curTime.tv_usec);
	    }
	    if (select(0, (int *) NULL, (int *) NULL, (int *) NULL,
			  &tv) < 0) {
		if (migd_Debug > 2) {
		    perror("select");
		}
	    }
	    if (migd_Debug > 2) {
		if (gettimeofday(&curTime,
				 (struct timezone *) NULL) < 0) {
		    perror("Error in gettimeofday");
		    exit(1);
		}
		fprintf(stderr, "Time is now %d.%d.\n", curTime.tv_sec,
			curTime.tv_usec);
	    }
	}
	if (migd_Debug > 3) {
	    fprintf(stderr, "Writing vector to global daemon.\n");
	}

	/*
	 * OK, here's the tricky part.  We don't want our write to wait
	 * indefinitely, so we set an alarm.  But just waking up won't
	 * cause Fs_Write to return an error, so we have to longjmp.
	 * So we set the signal handler, set the timer, and setjmp, then
	 * after the write we reverse the process.
	 */
	if (setjmp(writejmp)) {
	    numWritten = -1;
	    errno = EIO;
	} else {
	    if ((int) signal(SIGALRM, WriteAlarm) < 0) {
		syslog(LOG_ERR, "Error setting signal handler: %s.\n",
		       strerror(errno));
		exit(1);
	    }
	    if (setitimer(ITIMER_REAL, &timeOutTimer,
			  (struct itimerval *) NULL) == -1) {
		syslog(LOG_ERR, "Error setting interval timer: %s.\n",
		       strerror(errno));
		exit(1);
	    }
	    numWritten = write(migdGlobalDesc, (char *) curVecPtr,
			       sizeof(Mig_LoadVector));
	}
	error = errno;
	if (setitimer(ITIMER_REAL, &noTimer,
		      (struct itimerval *) NULL) == -1) {
	    syslog(LOG_ERR, "Error disabling interval timer: %s.\n",
		   strerror(errno));
	    exit(1);
	}
	(void) signal(SIGALRM, SIG_IGN);
	errno = error;

	/*
	 * Now we're back to where we would be if all we'd done was
	 * write(), with errno and numWritten set to appropriate values.
	 */

	if (migd_Debug > 3) {
	    fprintf(stderr, "Write returned value %d.\n", numWritten);
	}
	if (numWritten < 0) {
	    if (migd_Debug > 0) {
		fprintf(stderr, "Error %d writing to global daemon: %s.\n",
		       error, strerror(error));
	    }
	    close(migdGlobalDesc);
	    if (migd_Quit || ContactGlobal() < 0) {
		fprintf(stderr, "Exiting.\n");
		exit(1);
	    }
	} else if (numWritten != sizeof(Mig_LoadVector)) {
	    SYSLOG2(LOG_WARNING, "short write to global daemon of %d/%d bytes.\n",
		   numWritten, sizeof(Mig_LoadVector));
	}
	iteration = 0;

	/*
	 * Check on currentInfo.state in case we have to reconnect to the
	 * global daemon or a user process reads the Mig_Info struct from
	 * us.
	 */
	if (curVecPtr->allowMigration &&
	    currentInfo.state == MIG_HOST_ACTIVE) {
	    currentInfo.state = MIG_HOST_IDLE;
	} else if (!curVecPtr->allowMigration &&
		   currentInfo.state == MIG_HOST_IDLE) {
	    currentInfo.state = refuseMigration ?
		MIG_HOST_REFUSES : MIG_HOST_ACTIVE;
	}

	if (CheckMessages() >= 0) {
	    if (migd_Debug > 0) {
		fprintf(stderr,
			"This host is being reclaimed by order of global migration daemon.\n");
	    }
	    Migd_Evict(FALSE);
	}

    }

    iteration = (iteration + 1) % writeRate;
}
Beispiel #4
0
int
Migd_Init()
{
    int realErrno;
    char fileName[FS_MAX_NAME_LENGTH];
    int status;
    struct stat atts;
    Time	period;

    if (migd_Debug > 0) {
	fprintf(stderr, "Migd_Init -\n");
    }

    signal(SIGCHLD, Reap);

    ParseMigStatus();
    
    /*
     * Set up the initial Mig_Info structure, and variables we're going
     * to access periodically.  By default, we're active when we
     * start up, but later there's a check for idleTime being less
     * than the threshold if it's also equal to the time since we booted.
     */
    curVecPtr = &currentInfo.loadVec;
    bzero((char *) &currentInfo, sizeof(Mig_Info));
    currentInfo.hostID = migd_HostID;
    currentInfo.migVersion = migd_Parms.version;
    currentInfo.maxProcs = 1;	/* XXX number of processors */
    curVecPtr->allowMigration = migd_AlwaysAccept;
    currentInfo.state = refuseMigration ?  MIG_HOST_REFUSES : MIG_HOST_ACTIVE;

    curVecPtr->timestamp = time((time_t *)0);

    /*
     * Get bootstamp.  Note, this should be a kernel call instead of a
     * check on the file, but this will do for the "moment".
     */
    (void) sprintf(fileName, "/hosts/%s/boottime", migd_HostName);

    status = stat(fileName, &atts);
    if (status == 0) {
	currentInfo.bootTime = atts.st_mtime;
    } else {
	SYSLOG2(LOG_ERR, "Error getting boot time from %s: %s\n", fileName,
	       strerror(errno));
	currentInfo.bootTime = 0;
    }


    /*
     * Check to see if the global daemon is prohibited.
     */
    if (!migd_NeverRunGlobal) {
	(void) sprintf(fileName, 
		       "/hosts/%s/global-migd-prohibited", migd_HostName);
	status = stat(fileName, &atts);
	if (status == 0) {
	    syslog(LOG_INFO, "We will not run the global master.\n");
	    migd_NeverRunGlobal = 1;
	}
    }


    writeRate = migd_WriteInterval / migd_LoadInterval;
    
    GetStats(curVecPtr->lengths, &curVecPtr->noInput,
	     &curVecPtr->foreignProcs);
    curVecPtr->timestamp = time((time_t *)0);
    
    if (ContactGlobal() < 0) {
	realErrno = errno;
	fprintf(stderr, "Migd_Init - Unable to contact global daemon: %s\n",
	       strerror(errno));
	errno = realErrno;
	return(-1);
    }
    if (MigPdev_OpenMaster() < 0) {
	realErrno = errno;
	fprintf(stderr, "Migd_Init - Unable to initialize pdev %s: %s\n",
	       migd_LocalPdevName, strerror(errno));
	errno = realErrno;
	return(-1);
    }

    syslog(LOG_INFO, "running.");
    period.seconds = migd_LoadInterval;
    period.microseconds = 0;

    /*
     * Set up timeout for writes.
     */
    timeOutTimer.it_interval.tv_sec = 0;
    timeOutTimer.it_interval.tv_usec = 0;
    timeOutTimer.it_value.tv_sec = WRITE_TIMEOUT;
    timeOutTimer.it_value.tv_usec = 0;
    noTimer.it_interval.tv_sec = 0;
    noTimer.it_interval.tv_usec = 0;
    noTimer.it_value.tv_sec = 0;
    noTimer.it_value.tv_usec = 0;


    migd_TimeoutToken = Fs_TimeoutHandlerCreate(period, TRUE, Migd_GatherLoad,
						(ClientData) NULL);


    if (migd_Debug > 0) {
	fprintf(stderr, "Migd_Init - returning 0\n");
    }

    return(0);
}