Beispiel #1
0
/******************************************************************************************
* @brief	sendAlarm
*
* purpose:	send a trap and log the process information
*
******************************************************************************************/
void ServerMonitor::sendAlarm(string alarmItem, ALARMS alarmID, int action, float sensorValue)
{
	ServerMonitor serverMonitor;
	Oam oam;

	//Log this event 
	LoggingID lid(SERVER_MONITOR_LOG_ID);
	MessageLog ml(lid);
	Message msg;
	Message::Args args;
	args.add(alarmItem);
	args.add(", sensor value out-of-range: ");
	args.add(sensorValue);

	// get current server name
	string moduleName;
	oamModuleInfo_t st;
	try {
		st = oam.getModuleInfo();
		moduleName = boost::get<0>(st);
	}
	catch (...) {
		moduleName = "Unknown Server";
	}

	// check if there is an active alarm above the reporting theshold 
	// that needs to be cleared
	serverMonitor.checkAlarm(alarmItem, alarmID);

	// check if Alarm is already active, don't resend
	if ( !( oam.checkActiveAlarm(alarmID, moduleName, alarmItem)) ) {

		SNMPManager alarmMgr;
		// send alarm
		alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action);

		args.add(", Alarm set: ");
		args.add(alarmID);
	}

	// output log
	msg.format(args);
	ml.logWarningMessage(msg);

	return;
}
Beispiel #2
0
void hardwareMonitor(int IPMI_SUPPORT)
{
	ServerMonitor serverMonitor;
	string data[10];
	string SensorName;
	float SensorValue;
    string Units;
	string SensorStatus;
	float lowFatal;
	float lowCritical;
	float lowWarning;
	float highWarning;
	float highCritical;
	float highFatal;
	char *p;

	if( IPMI_SUPPORT == 0) {
		int returnCode = system("ipmitool sensor list > /tmp/harwareMonitor.txt");
		if (returnCode) {
			// System error, Log this event 
			LoggingID lid(SERVER_MONITOR_LOG_ID);
			MessageLog ml(lid);
			Message msg;
			Message::Args args;
			args.add("Error running ipmitool sensor list!!!");
			msg.format(args);
			ml.logWarningMessage(msg);
			while(TRUE)
				sleep(10000);
		}
	}
	else
	{
		while(TRUE)
			sleep(10000);
	}

	// register for Heartbeat monitoring
/*	try {
		ProcHeartbeat procheartbeat;
		procheartbeat.registerHeartbeat(HW_HEARTBEAT_ID);
	}
	catch (exception& ex)
	{
		string error = ex.what();
		LoggingID lid(SERVER_MONITOR_LOG_ID);
		MessageLog ml(lid);
		Message msg;
		Message::Args args;
		args.add("EXCEPTION ERROR on registerHeartbeat: ");
		args.add(error);
		msg.format(args);
		ml.logErrorMessage(msg);
	}
	catch(...)
	{
		LoggingID lid(SERVER_MONITOR_LOG_ID);
		MessageLog ml(lid);
		Message msg;
		Message::Args args;
		args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!");
		msg.format(args);
		ml.logErrorMessage(msg);
	}
*/
	// loop forever reading the hardware status
	while(TRUE)
	{
		// parse output file
	
		ifstream File ("/tmp/harwareMonitor.txt");
		if (!File){
			// System error, Log this event 
			LoggingID lid(SERVER_MONITOR_LOG_ID);
			MessageLog ml(lid);
			Message msg;
			Message::Args args;
			args.add("Error opening /tmp/harwareMonitor.txt!!!");
			msg.format(args);
			ml.logWarningMessage(msg);
			sleep(300);
			continue;
		}
		
		char line[200];
		while (File.getline(line, 200))
		{
			// parse the line
			int f = 0;
			p = strtok(line,"|");
			while (p) 
			{
				data[f]=p;
				data[f] = serverMonitor.StripWhitespace(data[f]);
				p = strtok (NULL, "|");
				f++;
			}
	
			if( f == 0 )
				// nothing on this line, skip
				continue;
	
			SensorName = data[0];
			SensorValue = atof(data[1].c_str());
			Units = data[2];
			SensorStatus = data[3];
			lowFatal = atof(data[4].c_str());
			lowCritical = atof(data[5].c_str());
			lowWarning = atof(data[6].c_str());
			highWarning = atof(data[7].c_str());
			highCritical = atof(data[8].c_str());
			highFatal = atof(data[9].c_str());

			// check status and issue apporiate alarm if needed
			if ( (SensorStatus != "ok") && (SensorStatus != "nr") && (SensorStatus != "na") ) {
				// Status error, check for warning or critical levels

				if ( SensorValue >= highFatal ) {
					// issue critical alarm and send message to shutdown Server
					serverMonitor.sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue);
					serverMonitor.sendMsgShutdownServer();
				}
				else if ( (SensorValue < highFatal) && (SensorValue >= highCritical) )
					// issue major alarm
					serverMonitor.sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue);

				else if ( (SensorValue < highCritical ) && (SensorValue >= highWarning) )
					// issue minor alarm
					serverMonitor.sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue);

				else if ( (SensorValue <= lowWarning) && (SensorValue > lowCritical) )
					// issue minor alarm
					serverMonitor.sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue);

				else if ( (SensorValue <= lowCritical) && (SensorValue > lowFatal) )
					// issue major alarm
					serverMonitor.sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue);

				else if ( SensorValue <= lowFatal ) {
					// issue critical alarm and send message to shutdown Server
					serverMonitor.sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue);
					serverMonitor.sendMsgShutdownServer();
				}
				else
					// check if there are any active alarms that needs to be cleared
					serverMonitor.checkAlarm(SensorName);
			}
			else
				// check if there are any active alarms that needs to be cleared
				serverMonitor.checkAlarm(SensorName);

		} //end of parsing file while
		
		File.close();

		// send heartbeat message
/*		try {
			ProcHeartbeat procheartbeat;
			procheartbeat.sendHeartbeat(HW_HEARTBEAT_ID);

			LoggingID lid(SERVER_MONITOR_LOG_ID);
			MessageLog ml(lid);
			Message msg;
			Message::Args args;
			args.add("Sent Heartbeat Msg");
			msg.format(args);
			ml.logDebugMessage(msg);
		}
		catch (exception& ex)
		{
			string error = ex.what();
			if ( error.find("Disabled") == string::npos ) {
				LoggingID lid(SERVER_MONITOR_LOG_ID);
				MessageLog ml(lid);
				Message msg;
				Message::Args args;
				args.add("EXCEPTION ERROR on sendHeartbeat: ");
				args.add(error);
				msg.format(args);
				ml.logErrorMessage(msg);
			}
		}
		catch(...)
		{
			LoggingID lid(SERVER_MONITOR_LOG_ID);
			MessageLog ml(lid);
			Message msg;
			Message::Args args;
			args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!");
			msg.format(args);
			ml.logErrorMessage(msg);
		}
*/
		// sleep
		sleep(MONITOR_PERIOD);
	} //end of forever while loop
}