Пример #1
0
/*
 * call_ftw_for_dev() uses ftw() to pass pathnames under /dev to gdev()
 * along with a status buffer.
 */
static void
call_ftw_for_dev(void)
{
    int gdev();
    int rcode;

    ndev = 0;
    rcode = ftw("/dev", gdev, 17);

    switch (rcode) {
    case 0:
        return;                /* successful return, devl populated */
    case 1:
        SYSLOG0(" ftw() encountered problem\n");
        break;
    case -1:
        SYSLOG1(" ftw() failed, %s\n", strerror(errno));
        break;
    default:
        SYSLOG1(" ftw() unexpected return, rcode=%d\n", rcode);
        break;
    }
    exit(1);
}
Пример #2
0
int
readata()
{
    struct stat sbuf1, sbuf2;
    int fd;

    if ((fd = open(psfile, O_RDONLY)) == -1)
        return (0);

    if (fstat(fd, &sbuf1) < 0 || sbuf1.st_size == 0 ||
        stat("/dev", &sbuf2) == -1 || sbuf1.st_mtime <= sbuf2.st_mtime ||
        sbuf1.st_mtime <= sbuf2.st_ctime) {

        if (!rd_only) {                /* if read-only, believe old data */
            (void) close(fd);
            return (0);
        }
    }

    /* Read /dev data from psfile. */
    if (read_tmp_file(fd, (char *) &ndev, sizeof (ndev)) == 0)  {
        (void) close(fd);
        return (0);
    }

    if (devl)
        free(devl);

    if ((devl = (struct devl *)malloc(ndev * sizeof (*devl))) == NULL) {
        SYSLOG1("malloc() for device table failed, %s\n", strerror(errno));
        exit(1);
    }
    if (read_tmp_file(fd, (char *)devl, ndev * sizeof (*devl)) == 0)  {
        (void) close(fd);
        return (0);
    }

    (void) close(fd);
    return (1);
}
Пример #3
0
Файл: migd.c Проект: npe9/sprite
void
Migd_GatherLoad()
{
    int oldAllow;
    int	oldInput;
    int	oldForeign;
    static int iteration = 0;
    int numWritten;
    int error;
    int status;
    
    
    oldAllow = curVecPtr->allowMigration;
    oldInput = curVecPtr->noInput;
    oldForeign = curVecPtr->foreignProcs;
    if (migd_Debug > 2) {
	fprintf(stderr, "Migd_GatherLoad - time %d, oldAllow %d, oldInput %d\n",
	       time((int *) NULL), oldAllow, oldInput);
    }
    GetStats(curVecPtr->lengths, &curVecPtr->noInput,
	     &curVecPtr->foreignProcs);
    curVecPtr->timestamp = time((time_t *)0);
    ExamineLoads(curVecPtr);

    if ((oldInput > migd_Parms.noInput) &&
	(curVecPtr->noInput < migd_Parms.noInput) &&
	!ignoreInput && !migd_NeverEvict && !refuseMigration) {
	Migd_Evict(TRUE);
    }

    /*
     * Send the new load vector to the global daemon periodically,
     * or if our migration status changes, or if the number of
     * foreign processes goes from zero to non-zero or vice-versa.
     * This way the global daemon can track things like the last use
     * of a machine by a process that won't release the host when it
     * finishes.
     */
    if (iteration == 0 || (oldAllow != curVecPtr->allowMigration) ||
	(oldForeign > 0 && curVecPtr->foreignProcs == 0) ||
	(oldForeign == 0 && curVecPtr->foreignProcs > 0)) {
	if (migd_Debug > 2) {
	    fprintf(stderr,
		   "Notifying global server, iteration %d, oldAllow %d, newAllow %d, oldForeign %d, newForeign %d.\n",
		   iteration, oldAllow, curVecPtr->allowMigration,
		   oldForeign, curVecPtr->foreignProcs);
	}
	iteration = 0;

	/*
	 * Get the kernel's variable determining whether to refuse
	 * migrations.  We keep rechecking periodically in case it changes.
	 */

	status = Sys_Stats(SYS_PROC_MIGRATION, SYS_PROC_MIG_GET_STATE,
			   (Address) &migd_Parms.criteria);
	if (status != SUCCESS) {
	    SYSLOG1(LOG_ERR, "Error in Sys_Stats getting migration state: %s.\n",
		    Stat_GetMsg(status));
	    exit(Compat_MapCode(status));
	}
	ParseMigStatus();

	if (curVecPtr->lengths[1] >= 1.0)  {
	    struct timeval tv;
	    struct timeval curTime;
	    
	    /*
	     * The 5-minute load average is over 1.  This could
	     * happen if there is a long-running process but it
	     * also seems to happen without anything running.
	     * Sleep a short period of time to try to
	     * keep from being in lock-step with someone else.  There's
	     * nothing too magical about the number except that it's
	     * intended to be something that other processes are unlikely
	     * to sleep for.  
	     */
	    tv.tv_sec = 0;
	    tv.tv_usec = ((random() % 999) + 1) * 1000;
;
	    if (migd_Debug > 2) {
		if (gettimeofday(&curTime,
				 (struct timezone *) NULL) < 0) {
		    perror("Error in gettimeofday");
		    exit(1);
		}
		fprintf(stderr,
			"Sleeping %d usec to avoid lock step, time %d.%d.\n",
			tv.tv_usec, curTime.tv_sec, curTime.tv_usec);
	    }
	    if (select(0, (int *) NULL, (int *) NULL, (int *) NULL,
			  &tv) < 0) {
		if (migd_Debug > 2) {
		    perror("select");
		}
	    }
	    if (migd_Debug > 2) {
		if (gettimeofday(&curTime,
				 (struct timezone *) NULL) < 0) {
		    perror("Error in gettimeofday");
		    exit(1);
		}
		fprintf(stderr, "Time is now %d.%d.\n", curTime.tv_sec,
			curTime.tv_usec);
	    }
	}
	if (migd_Debug > 3) {
	    fprintf(stderr, "Writing vector to global daemon.\n");
	}

	/*
	 * OK, here's the tricky part.  We don't want our write to wait
	 * indefinitely, so we set an alarm.  But just waking up won't
	 * cause Fs_Write to return an error, so we have to longjmp.
	 * So we set the signal handler, set the timer, and setjmp, then
	 * after the write we reverse the process.
	 */
	if (setjmp(writejmp)) {
	    numWritten = -1;
	    errno = EIO;
	} else {
	    if ((int) signal(SIGALRM, WriteAlarm) < 0) {
		syslog(LOG_ERR, "Error setting signal handler: %s.\n",
		       strerror(errno));
		exit(1);
	    }
	    if (setitimer(ITIMER_REAL, &timeOutTimer,
			  (struct itimerval *) NULL) == -1) {
		syslog(LOG_ERR, "Error setting interval timer: %s.\n",
		       strerror(errno));
		exit(1);
	    }
	    numWritten = write(migdGlobalDesc, (char *) curVecPtr,
			       sizeof(Mig_LoadVector));
	}
	error = errno;
	if (setitimer(ITIMER_REAL, &noTimer,
		      (struct itimerval *) NULL) == -1) {
	    syslog(LOG_ERR, "Error disabling interval timer: %s.\n",
		   strerror(errno));
	    exit(1);
	}
	(void) signal(SIGALRM, SIG_IGN);
	errno = error;

	/*
	 * Now we're back to where we would be if all we'd done was
	 * write(), with errno and numWritten set to appropriate values.
	 */

	if (migd_Debug > 3) {
	    fprintf(stderr, "Write returned value %d.\n", numWritten);
	}
	if (numWritten < 0) {
	    if (migd_Debug > 0) {
		fprintf(stderr, "Error %d writing to global daemon: %s.\n",
		       error, strerror(error));
	    }
	    close(migdGlobalDesc);
	    if (migd_Quit || ContactGlobal() < 0) {
		fprintf(stderr, "Exiting.\n");
		exit(1);
	    }
	} else if (numWritten != sizeof(Mig_LoadVector)) {
	    SYSLOG2(LOG_WARNING, "short write to global daemon of %d/%d bytes.\n",
		   numWritten, sizeof(Mig_LoadVector));
	}
	iteration = 0;

	/*
	 * Check on currentInfo.state in case we have to reconnect to the
	 * global daemon or a user process reads the Mig_Info struct from
	 * us.
	 */
	if (curVecPtr->allowMigration &&
	    currentInfo.state == MIG_HOST_ACTIVE) {
	    currentInfo.state = MIG_HOST_IDLE;
	} else if (!curVecPtr->allowMigration &&
		   currentInfo.state == MIG_HOST_IDLE) {
	    currentInfo.state = refuseMigration ?
		MIG_HOST_REFUSES : MIG_HOST_ACTIVE;
	}

	if (CheckMessages() >= 0) {
	    if (migd_Debug > 0) {
		fprintf(stderr,
			"This host is being reclaimed by order of global migration daemon.\n");
	    }
	    Migd_Evict(FALSE);
	}

    }

    iteration = (iteration + 1) % writeRate;
}
Пример #4
0
Файл: migd.c Проект: npe9/sprite
static int
ContactGlobal()
{
    int sleepTime;
    int status;
    int retries;
    int ioctlRetries;
    int realErrno;
    int success = 0;
    static int firstContact = 1; /* First time we are trying to reach the
				    global daemon? */
    int t;
    
    t = time(0);
    if (migd_Debug > 1) {
	fprintf(stderr, "ContactGlobal - %s\n", ctime(&t));
    }

    /*
     * Set a temporary variable to track firstContact, and reset it so
     * any subsequent calls have the updated value.  This avoids the
     * need to reset it before every return statement.
     */
    if (firstContact) {
	/*
	 * First time we've been called. Set up seed for random
	 * numbers.
	 */
	srandom(getpid());
	firstContact = 0;
    } 

    /*
     * Clean up any old descriptor.
     */
    if (migdGlobalDesc >= 0) {
	Fs_EventHandlerDestroy(migdGlobalDesc);
	(void) close(migdGlobalDesc);
    }
    
    sleepTime = (random() & 07) + 1;
    for (retries = 1;
	 retries <= MAX_GLOBAL_CONTACTS && !migd_Quit && !success;
	 retries++) {
	migdGlobalDesc = open(migd_GlobalPdevName, O_RDWR, 0);
	if (migdGlobalDesc < 0) {
	    if (migd_Debug > 2) {
		fprintf(stderr, "ContactGlobal - sleeping %d seconds\n",
		       sleepTime);
	    }
	    sleep(sleepTime);
	    sleepTime *= 2;
	    migdGlobalDesc = open(migd_GlobalPdevName, O_RDWR, 0);
	}
	if (migdGlobalDesc < 0) {
	    if (migd_Debug > 0) {
		fprintf(stderr,
		       "ContactGlobal: couldn't open %s: %s\n",
		       migd_GlobalPdevName, strerror(errno));
	    }
	    /*
	     * If errno is ENOENT, there is not currently a master, anywhere.
	     * (When the master exits it removes the pdev.)  EIO 
	     * may mean the daemon crashed.  EINVAL may mean the daemon's host
	     * crashed.  We special case EIO due to a race condition
	     * between recovery and starting daemons.
	     */
	    if (errno == ENOENT || errno == EIO || errno == EINVAL) {
		if (retries == MAX_GLOBAL_CONTACTS - 1 && errno != ENOENT) {
		    /*
		     * We're getting desperate here.  We can't open
		     * the file, but we should be able to.  Remove
		     * the pdev and try one last time to create the
		     * master, since it may be that the host running
		     * the master has crashed and the name server
		     * is continually returning a bad status to us.
		     * We risk clobbering someone else who has successfully
		     * opened the pdev just before us, but there's a small
		     * window of vulnerability and by this time we're sleeping
		     * a long time.  
		     */
		    (void) unlink(migd_GlobalPdevName);
		}
		if (!migd_NeverRunGlobal) {
		    if (CreateGlobal() < 0) {
			return(-1);
		    }
		}
		/*
		 * Go to start of for loop, trying to open pdev.
		 */
		continue;
	    } else {
		realErrno = errno;
		fprintf(stderr, "Migd_Init - Unable to contact master of global pdev: %s\n",
		       strerror(errno));
		errno = realErrno;
		return(-1);
	    }
	} else {
	    /*
	     * We've successfully opened the pdev.
	     * Try to tell the global master that we're a daemon.  It may say
	     * DEV_BUSY, which means that there is already a daemon.  In that
	     * case, it tells the other daemon to go away, and we will keep
	     * trying.  In some cases we may get an error doing the ioctl,
	     * such as a stale handle, in which case we close the file and
	     * go to the top again.
	     */
	    for (ioctlRetries = 1; ioctlRetries <= MAX_GLOBAL_CONTACTS;
		 ioctlRetries++) {
		status = Fs_IOControl(migdGlobalDesc, IOC_MIG_DAEMON, sizeof(Mig_Info),
				      (char *) &currentInfo,
				      0, (char *) NULL);
		if (status == DEV_BUSY) {
		    if (migd_Debug > 0) {
			fprintf(stderr, "ContactGlobal - ioctl returned busy.\n");
		    }
		    sleepTime = ((random() & 07) + 1) * ioctlRetries;
		    if (migd_Debug > 2) {
			fprintf(stderr, "ContactGlobal - sleeping %d seconds\n",
			       sleepTime);
		    }
		    sleep(sleepTime);
		} else {
		    /*
		     * An error we can't deal with, or SUCCESS.
		     */
		    break;
		}
	    }
	    if (status != SUCCESS) {
		SYSLOG1(LOG_ERR,
		       "ContactGlobal: warning: error during ioctl to global master: %s\n",
		       Stat_GetMsg(status));
		errno = Compat_MapCode(status);
		close(migdGlobalDesc);
	    } else {
		/*
		 * We did it!  Break out of the inner for loop, and the
		 * success flag will break us out of the outer loop.
		 */
		success = 1;
		break;
	    }
	}
    }
    if (!success) {
	realErrno = errno;
	SYSLOG0(LOG_ERR, "unable to contact master; giving up.\n");
	errno = realErrno;
	return(-1);
    }

#ifdef FAST_SELECT
    Fs_EventHandlerCreate(migdGlobalDesc, FS_READ|FS_EXCEPTION,
			  HandleException, (ClientData) NULL);
#endif /* FAST_SELECT */

    if (migd_Debug > 1) {
	fprintf(stderr, "ContactGlobal - completed successfully\n");
    }

    return (0);
}