Пример #1
0
/******************************************************************************************
* @brief	sendAlarm
*
* purpose:	send a trap and log the process information
*
******************************************************************************************/
void ServerMonitor::sendAlarm(string alarmItem, ALARMS alarmID, int action, float sensorValue)
{
	ServerMonitor serverMonitor;
	Oam oam;

	//Log this event 
	LoggingID lid(SERVER_MONITOR_LOG_ID);
	MessageLog ml(lid);
	Message msg;
	Message::Args args;
	args.add(alarmItem);
	args.add(", sensor value out-of-range: ");
	args.add(sensorValue);

	// get current server name
	string moduleName;
	oamModuleInfo_t st;
	try {
		st = oam.getModuleInfo();
		moduleName = boost::get<0>(st);
	}
	catch (...) {
		moduleName = "Unknown Server";
	}

	// check if there is an active alarm above the reporting theshold 
	// that needs to be cleared
	serverMonitor.checkAlarm(alarmItem, alarmID);

	// check if Alarm is already active, don't resend
	if ( !( oam.checkActiveAlarm(alarmID, moduleName, alarmItem)) ) {

		SNMPManager alarmMgr;
		// send alarm
		alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action);

		args.add(", Alarm set: ");
		args.add(alarmID);
	}

	// output log
	msg.format(args);
	ml.logWarningMessage(msg);

	return;
}
Пример #2
0
/******************************************************************************************
* @brief	checkDiskAlarm
*
* purpose:	check to see if an alarm(s) is set on Disk and clear if so
*
******************************************************************************************/
void ServerMonitor::checkDiskAlarm(string alarmItem, ALARMS alarmID)
{
	Oam oam;
	ServerMonitor serverMonitor;

	// get current server name
	string serverName;
	oamModuleInfo_t st;
	try {
		st = oam.getModuleInfo();
		serverName = boost::get<0>(st);
	}
	catch (...) {
		serverName = "Unknown Server";
	}

	switch (alarmID) {
		case NO_ALARM: 	// clear all alarms set if any found
			if ( oam.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) )
				//  alarm set, clear it
				clearAlarm(alarmItem, DISK_USAGE_HIGH);
			if ( oam.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) )
				//  alarm set, clear it
				clearAlarm(alarmItem, DISK_USAGE_MED);
			if ( oam.checkActiveAlarm(DISK_USAGE_LOW, serverName, alarmItem) )
				//  alarm set, clear it
				clearAlarm(alarmItem, DISK_USAGE_LOW);
			break;
		case DISK_USAGE_LOW: 	// clear high and medium alarms set if any found
			if ( oam.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) )
				//  alarm set, clear it
				clearAlarm(alarmItem, DISK_USAGE_HIGH);
			if ( oam.checkActiveAlarm(DISK_USAGE_MED, serverName, alarmItem) )
				//  alarm set, clear it
				clearAlarm(alarmItem, DISK_USAGE_MED);
			break;
		case DISK_USAGE_MED: 	// clear high alarms set if any found
			if ( oam.checkActiveAlarm(DISK_USAGE_HIGH, serverName, alarmItem) )
				//  alarm set, clear it
				clearAlarm(alarmItem, DISK_USAGE_HIGH);
			break;
		default:			// none to clear
			break;
		} // end of switch
	return;
}
Пример #3
0
/******************************************************************************************
* @brief	sendResourceAlarm
*
* purpose:	send a trap and log the process information
*
******************************************************************************************/
bool ServerMonitor::sendResourceAlarm(string alarmItem, ALARMS alarmID, int action, int usage)
{
	ServerMonitor serverMonitor;
	Oam oam;

	//Log this event 
	LoggingID lid(SERVER_MONITOR_LOG_ID);
	MessageLog ml(lid);
	Message msg;
	Message::Args args;
	args.add(alarmItem);
	args.add(" usage at percentage of ");
	args.add(usage);

	// get current module name
	string moduleName;
	oamModuleInfo_t st;
	try {
		st = oam.getModuleInfo();
		moduleName = boost::get<0>(st);
	}
	catch (...) {
		moduleName = "Unknown Server";
	}

	// check if there is an active alarm above the reporting theshold 
	// that needs to be cleared

	if (alarmItem == "CPU")
		serverMonitor.checkCPUAlarm(alarmItem, alarmID);
	else if (alarmItem == "Local Disk" || alarmItem == "External")
			serverMonitor.checkDiskAlarm(alarmItem, alarmID);
	else if (alarmItem == "Local Memory")
			serverMonitor.checkMemoryAlarm(alarmItem, alarmID);
	else if (alarmItem == "Local Swap")
			serverMonitor.checkSwapAlarm(alarmItem, alarmID);

	// don't issue an alarm on thge dbroots is already issued by this or another server
	if ( alarmItem.find(startup::StartUp::installDir() + "/data") == 0 ) {
		// check if Alarm is already active from any module, don't resend
		if ( !( oam.checkActiveAlarm(alarmID, "*", alarmItem)) ) {
	
			SNMPManager alarmMgr;
			// send alarm
			alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action);
	
			args.add(", Alarm set: ");
			args.add(alarmID);
			msg.format(args);
			ml.logInfoMessage(msg);
			return true;
		}
		else
			return false;
	}
	else
	{
		// check if Alarm is already active from this module, don't resend
		if ( !( oam.checkActiveAlarm(alarmID, moduleName, alarmItem)) ) {
	
			SNMPManager alarmMgr;
			// send alarm
			alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action);
	
			args.add(", Alarm set: ");
			args.add(alarmID);
			msg.format(args);
			ml.logInfoMessage(msg);
			return true;
		}
		else
			return false;
	}

	return true;
}
Пример #4
0
/*****************************************************************************************
* @brief	diskMonitor Thread
*
* purpose:	Get current Local and External disk usage and report alarms
*
*****************************************************************************************/
void diskMonitor()
{
	ServerMonitor serverMonitor;
	Oam oam;
    SystemConfig systemConfig;
	ModuleTypeConfig moduleTypeConfig;
	typedef std::vector<std::string> LocalFileSystems;
	LocalFileSystems lfs;
	struct statvfs buf; 

	// set defaults
	int localDiskCritical = 90,
		localDiskMajor = 80,
		localDiskMinor = 70,
		ExternalDiskCritical = 90,
		ExternalDiskMajor = 80,
		ExternalDiskMinor = 70;

	// get module types
	string moduleType;
	int moduleID=-1;
	string moduleName;
	oamModuleInfo_t t;
	try {
		t = oam.getModuleInfo();
		moduleType = boost::get<1>(t);
		moduleID = boost::get<2>(t);
		moduleName = boost::get<0>(t);
	}
	catch (exception& e) {}

	bool Externalflag = false;

	//check for external disk
	DBrootList dbrootList;
	if (moduleType == "pm") {
		systemStorageInfo_t t;
		t = oam.getStorageConfig();
		if ( boost::get<0>(t) == "external")
			Externalflag = true;

		// get dbroot list and storage type from config file
		DBRootConfigList dbrootConfigList;
		oam.getPmDbrootConfig(moduleID, dbrootConfigList);
	
		DBRootConfigList::iterator pt = dbrootConfigList.begin();
		for( ; pt != dbrootConfigList.end() ; pt++)
		{
			int dbrootID = *pt;
	
			string dbroot = "DBRoot" + oam.itoa(dbrootID);
	
			string dbootdir;
			try{
				oam.getSystemConfig(dbroot, dbootdir);
			}
			catch(...) {}
	
			if ( dbootdir.empty() || dbootdir == "" )
				continue;
	
			DBrootData dbrootData;
			dbrootData.dbrootDir = dbootdir;
			dbrootData.downFlag = false;
	
			dbrootList.push_back(dbrootData);
		}
	}

	string cloud = oam::UnassignedName;
	try {
		oam.getSystemConfig( "Cloud", cloud);
	}
	catch(...) {
		cloud = oam::UnassignedName;
	}

	//get Gluster Config setting
	string GlusterConfig = "n";
	try {
		oam.getSystemConfig( "GlusterConfig", GlusterConfig);
	}
	catch(...)
	{
		GlusterConfig = "n";
	}

	int diskSpaceCheck = 0;

	while(true)
	{
		SystemStatus systemstatus;
		try {
			oam.getSystemStatus(systemstatus);
		}
		catch (exception& ex)
		{}
		
		if (systemstatus.SystemOpState != oam::ACTIVE ) {
			sleep(5);
			continue;
		}

		// Get Local/External Disk Mount points to monitor and associated thresholds
		
		try {
			oam.getSystemConfig (moduleTypeConfig);
			localDiskCritical = moduleTypeConfig.ModuleDiskCriticalThreshold; 
			localDiskMajor = moduleTypeConfig.ModuleDiskMajorThreshold; 
			localDiskMinor = moduleTypeConfig.ModuleDiskMinorThreshold;

			DiskMonitorFileSystems::iterator p = moduleTypeConfig.FileSystems.begin();
			for( ; p != moduleTypeConfig.FileSystems.end() ; p++)
			{
				string fs = *p;
				lfs.push_back(fs);

				if (DISK_DEBUG) {
					//Log this event 
					LoggingID lid(SERVER_MONITOR_LOG_ID);
					MessageLog ml(lid);
					Message msg;
					Message::Args args;
					args.add("Local Config File System to monitor =");
					args.add(fs);
					msg.format(args);
					ml.logDebugMessage(msg);
				}
			}

		} catch (...)
		{
			sleep(5);
			continue;
		}

		// get External info
		try
		{
			oam.getSystemConfig(systemConfig);

		} catch (...)
		{
			sleep(5);
			continue;
		}

		if (Externalflag) {
			// get External info
			try
			{
				ExternalDiskCritical = systemConfig.ExternalCriticalThreshold;
				ExternalDiskMajor = systemConfig.ExternalMajorThreshold;
				ExternalDiskMinor = systemConfig.ExternalMinorThreshold;

			} catch (...)
			{
				sleep(5);
				continue;
			}
		}

		//check for local file systems
		LocalFileSystems::iterator p = lfs.begin();
		while(p != lfs.end())
		{
			string deviceName = *p;
			++p;
			string fileName;
			// check local
			if ( deviceName == "/") {
				fileName = deviceName + "usr/local/Calpont/releasenum";
			}
			else
			{
				fileName = deviceName + "/000.dir";
			}

			uint64_t totalBlocks;
			uint64_t usedBlocks;

			if (!statvfs(fileName.c_str(), &buf)) {

				uint64_t blksize, blocks, freeblks, free; 

				blksize = buf.f_bsize; 
				blocks = buf.f_blocks; 
				freeblks = buf.f_bfree; 

				totalBlocks = blocks * blksize;
				free = freeblks * blksize; 
				usedBlocks = totalBlocks - free; 
			}
			else
				continue;

			int64_t diskUsage = 0;
			if ( totalBlocks == 0 ) {
				diskUsage = 0;
	
				//Log this event 
				LoggingID lid(SERVER_MONITOR_LOG_ID);
				MessageLog ml(lid);
				Message msg;
				Message::Args args;
				args.add("Total Disk Usage is set to 0");
				msg.format(args);
				ml.logWarningMessage(msg);
			}
			else
				diskUsage =  (usedBlocks / (totalBlocks / 100)) + 1;

			SMSystemDisk sd;
			sd.deviceName = deviceName;
			sd.usedPercent = diskUsage;
			sd.totalBlocks = totalBlocks;
			sd.usedBlocks = usedBlocks;
			sdl.push_back(sd);

			if (DISK_DEBUG)
				cout << "Disk Usage for " << deviceName << " is " << diskUsage << endl;
	
			if ( diskSpaceCheck == 0 )
			{
				if (diskUsage >= localDiskCritical && localDiskCritical > 0 ) {
					//adjust if over 100%
					if ( diskUsage > 100 )
						diskUsage = 100;
					if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_HIGH, SET, (int) diskUsage) )
					{
						LoggingID lid(SERVER_MONITOR_LOG_ID);
						MessageLog ml(lid);
						Message msg;
						Message::Args args;
						args.add("Local Disk above Critical Disk threshold with a percentage of ");
						args.add((int) diskUsage);
						msg.format(args);
						ml.logInfoMessage(msg);
					}
				}
				else if (diskUsage >= localDiskMajor && localDiskMajor > 0 ) {
					if (serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_MED, SET, (int) diskUsage))
					{
						LoggingID lid(SERVER_MONITOR_LOG_ID);
						MessageLog ml(lid);
						Message msg;
						Message::Args args;
						args.add("Local Disk above Major Disk threshold with a percentage of ");
						args.add((int) diskUsage);
						msg.format(args);
						ml.logInfoMessage(msg);
					}
				}
				else if (diskUsage >= localDiskMinor && localDiskMinor > 0 ) {
					if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_LOW, SET, (int) diskUsage))
					{
						LoggingID lid(SERVER_MONITOR_LOG_ID);
						MessageLog ml(lid);
						Message msg;
						Message::Args args;
						args.add("Local Disk above Minor Disk threshold with a percentage of ");
						args.add((int) diskUsage);
						msg.format(args);
						ml.logInfoMessage(msg);
					}
				}
				else
					serverMonitor.checkDiskAlarm(deviceName);
			}
	
			//check for external file systems/devices
			if (Externalflag ||
				(!Externalflag && GlusterConfig == "y" && moduleType == "pm") ){
				try
				{
					DBRootConfigList dbrootConfigList;
					oam.getPmDbrootConfig(moduleID, dbrootConfigList);
	
					DBRootConfigList::iterator pt = dbrootConfigList.begin();
					for( ; pt != dbrootConfigList.end() ; pt++)
					{
						int dbroot = *pt;
						string deviceName = systemConfig.DBRoot[dbroot-1];
						string fileName = deviceName + "/000.dir";
			
						if (DISK_DEBUG) {
							//Log this event 
							LoggingID lid(SERVER_MONITOR_LOG_ID);
							MessageLog ml(lid);
							Message msg;
							Message::Args args;
							args.add("DBRoots monitoring");
							args.add(dbroot);
							args.add(" ,file system =" );
							args.add(fileName);
							msg.format(args);
							ml.logDebugMessage(msg);
						}
	
						uint64_t totalBlocks;
						uint64_t usedBlocks;
			
						if (!statvfs(fileName.c_str(), &buf)) {
			
							uint64_t blksize, blocks, freeblks, free; 
			
							blksize = buf.f_bsize; 
							blocks = buf.f_blocks; 
							freeblks = buf.f_bfree; 
			
							totalBlocks = blocks * blksize;
							free = freeblks * blksize; 
							usedBlocks = totalBlocks - free; 
						}
						else
						{
							SMSystemDisk sd;
							sd.deviceName = deviceName;
							sd.usedPercent = 0;
							sd.totalBlocks = 0;
							sd.usedBlocks = 0;
							sdl.push_back(sd);
							continue;
						}
			
						int diskUsage = 0;
						if ( totalBlocks == 0 ) {
							diskUsage = 0;
				
							//Log this event 
							LoggingID lid(SERVER_MONITOR_LOG_ID);
							MessageLog ml(lid);
							Message msg;
							Message::Args args;
							args.add("Total Disk Usage is set to 0");
							msg.format(args);
							ml.logWarningMessage(msg);
						}
						else
							diskUsage =  (usedBlocks / (totalBlocks / 100)) + 1;
			
						SMSystemDisk sd;
						sd.deviceName = deviceName;
						sd.usedPercent = diskUsage;
						sd.totalBlocks = totalBlocks;
						sd.usedBlocks = usedBlocks;
						sdl.push_back(sd);
		
						if (DISK_DEBUG)
							cout << "Disk Usage for " << deviceName << " is " << diskUsage << endl;
			
						if (diskUsage >= ExternalDiskCritical && ExternalDiskCritical > 0 ) {
							//adjust if over 100%
							if ( diskUsage > 100 )
								diskUsage = 100;
							if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_HIGH, SET, diskUsage))
							{
								LoggingID lid(SERVER_MONITOR_LOG_ID);
								MessageLog ml(lid);
								Message msg;
								Message::Args args;
								args.add("Disk usage for");
								args.add(deviceName);
								args.add(" above Critical Disk threshold with a percentage of ");
								args.add((int) diskUsage);
								msg.format(args);
								ml.logInfoMessage(msg);
							}
						}
						else if (diskUsage >= ExternalDiskMajor && ExternalDiskMajor > 0 ) {
							if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_MED, SET, diskUsage))
							{
								LoggingID lid(SERVER_MONITOR_LOG_ID);
								MessageLog ml(lid);
								Message msg;
								Message::Args args;
								args.add("Disk usage for");
								args.add(deviceName);
								args.add(" above Major Disk threshold with a percentage of ");
								args.add((int) diskUsage);
								msg.format(args);
								ml.logInfoMessage(msg);
							}
						}
						else if (diskUsage >= ExternalDiskMinor && ExternalDiskMinor > 0 ) {
							if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_LOW, SET, diskUsage))
							{
								LoggingID lid(SERVER_MONITOR_LOG_ID);
								MessageLog ml(lid);
								Message msg;
								Message::Args args;
								args.add("Disk usage for");
								args.add(deviceName);
								args.add(" above Minor Disk threshold with a percentage of ");
								args.add((int) diskUsage);
								msg.format(args);
								ml.logInfoMessage(msg);
							}
						}
						else
							serverMonitor.checkDiskAlarm(deviceName);
					}
				}
				catch (exception& e)
				{
					cout << endl << "**** getPmDbrootConfig Failed :  " << e.what() << endl;
				}
			}
		}

		//check OAM dbroot test flag to validate dbroot exist if on pm
		if ( moduleName.find("pm") != string::npos ) {
			//check OAM dbroot test flag to validate dbroot exist
			if ( dbrootList.size() != 0 ) {
				DBrootList::iterator p = dbrootList.begin();
				while ( p != dbrootList.end() )
				{
					//get dbroot directory
					string dbrootDir = (*p).dbrootDir;
					string dbrootName;
					string dbrootID;

					//get dbroot name
					string::size_type pos = dbrootDir.rfind("/",80);
					if (pos != string::npos)
						dbrootName = dbrootDir.substr(pos+1,80);

					//get ID
					dbrootID = dbrootName.substr(4,80);
			
					string fileName = dbrootDir + "/OAMdbrootCheck";
					// retry in case we hit the remount window
					for ( int retry = 0 ; ; retry++ )
					{
						bool fail = false;
						//first test, check if OAMdbrootCheck exists
						ifstream file (fileName.c_str());
						if (!file)
							fail = true;
						else
						{	//second test for amazon, check volume status
							if ( cloud != oam::UnassignedName ) {
								string volumeNameID = "PMVolumeName" + dbrootID;
								string volumeName = oam::UnassignedName;
								try {
									oam.getSystemConfig( volumeNameID, volumeName);
								}
								catch(...)
								{}
							
								if ( volumeName.empty() || volumeName == oam::UnassignedName )
									fail = false;
								else
								{
									string status = oam.getEC2VolumeStatus(volumeName);
									if ( status == "attached" )
										fail = false;
									else
									{
										fail = true;
										LoggingID lid(SERVER_MONITOR_LOG_ID);
										MessageLog ml(lid);
										Message msg;
										Message::Args args;
										args.add("dbroot monitoring: Volume not attached");
										args.add(volumeName);
										args.add("/");
										args.add(dbrootName);
										msg.format(args);
										ml.logCriticalMessage(msg);
									}
								}
							}
							else
								fail = false;
						}

						if (fail) {
							//double check system status before reporting any error BUG 5078
							SystemStatus systemstatus;
							try {
								oam.getSystemStatus(systemstatus);
							}
							catch (exception& ex)
							{}
							
							if (systemstatus.SystemOpState != oam::ACTIVE ) {
								break;
							}

							if ( retry < 10 ) {
								sleep(3);
								continue;
							}
							else
							{
								if ( !(*p).downFlag ) {
									LoggingID lid(SERVER_MONITOR_LOG_ID);
									MessageLog ml(lid);
									Message msg;
									Message::Args args;
									args.add("dbroot monitoring: Lost access to ");
									args.add(dbrootDir);
									msg.format(args);
									ml.logCriticalMessage(msg);

									oam.sendDeviceNotification(dbrootName, DBROOT_DOWN, moduleName);
									(*p).downFlag = true;

									try{
										oam.setDbrootStatus(dbrootID, oam::AUTO_OFFLINE);
									}
									catch (exception& ex)
									{}

									break;
								}
							}
						}
						else
						{
							if ( (*p).downFlag ) {
								LoggingID lid(SERVER_MONITOR_LOG_ID);
								MessageLog ml(lid);
								Message msg;
								Message::Args args;
								args.add("dbroot monitoring: Access back to ");
								args.add(dbrootDir);
								msg.format(args);
								ml.logInfoMessage(msg);
		
								oam.sendDeviceNotification(dbrootName, DBROOT_UP, moduleName);
								(*p).downFlag = false;

								try{
									oam.setDbrootStatus(dbrootID, oam::ACTIVE);
								}
								catch (exception& ex)
								{}
							}
							file.close();
							break;
						}
					}
					p++;
				}
			}
		}

		//do Gluster status check, if configured
		if ( GlusterConfig == "y")
		{
			bool pass = true;
			string errmsg = "unknown";
			try {
				string arg1 = "";
				string arg2 = "";
				int ret = oam.glusterctl(oam::GLUSTER_STATUS, arg1, arg2, errmsg);
				if ( ret != 0 )
				{
					cerr << "FAILURE: Status check error: " + errmsg << endl;
					pass = false;
				}
			}
			catch (exception& e)
			{
				cerr << endl << "**** glusterctl API exception:  " << e.what() << endl;
				cerr << "FAILURE: Status check error" << endl;
				pass = false;
			}
			catch (...)
			{
				cerr << endl << "**** glusterctl API exception: UNKNOWN" << endl;
				cerr << "FAILURE: Status check error" << endl;
				pass = false;
			}

			if ( !pass )
			{ // issue log and alarm
				LoggingID lid(SERVER_MONITOR_LOG_ID);
				MessageLog ml(lid);
				Message msg;
				Message::Args args;
				args.add("Gluster Status check failure error msg: ");
				args.add(errmsg);
				msg.format(args);
				ml.logWarningMessage(msg);
				serverMonitor.sendResourceAlarm(errmsg, GLUSTER_DISK_FAILURE, SET, 0);
			}
		}

		// sleep 10 seconds
		sleep(MONITOR_PERIOD/6);

		//check disk space every 10 minutes
		diskSpaceCheck++;
		if ( diskSpaceCheck >= 60 )
			diskSpaceCheck = 0;

		lfs.clear();
		sdl.clear();

	} // end of while loop
}
Пример #5
0
/*****************************************************************************************
* @brief	setSNMPModuleName API
*
* purpose:	Set SNMP Module name in the snmpdx.conf file
*
*****************************************************************************************/
void SNMPManager::setSNMPModuleName ()
{
	// get current Module name
	Oam oam;
	string ModuleName;
	oamModuleInfo_t st;
	try {
		st = oam.getModuleInfo();
		ModuleName = boost::get<0>(st);
	}
	catch (...) {
		ModuleName = "Unknown Report Module";
	}

	string agentName = SUB_AGENT;
	string fileName;
	makeFileName (agentName, fileName);
	vector <string> lines;

	ifstream oldFile (fileName.c_str());
	if (!oldFile) throw runtime_error ("No configuration file found");
	
	char line[200];
	string buf;
	string newLine;
	string newLine1;
	string delimiters = " ";
	while (oldFile.getline(line, 200))
	{
		buf = line;
		string::size_type pos = buf.find("ModuleNameStub",0);
		if (pos != string::npos)
		{
	        newLine = buf.substr(0, pos);
    	    newLine.append(ModuleName);

			string::size_type pos1 = buf.find("|",pos);
			if (pos1 != string::npos)
			{
	        	newLine1 = buf.substr(pos1, 200);
    	    	newLine.append(newLine1);
			}
		buf = newLine;
		}
		// output to temp file
		lines.push_back(buf);
	}
	
	oldFile.close();
	unlink (fileName.c_str());
   	ofstream newFile (fileName.c_str());	
	
	// create new file
	int fd = open(fileName.c_str(), O_RDWR|O_CREAT, 0666);
	
	// Aquire an exclusive lock
   	if (flock(fd,LOCK_EX) == -1) {
    	throw runtime_error ("Lock SNMP configuration file error");
   	}

	copy(lines.begin(), lines.end(), ostream_iterator<string>(newFile, "\n"));
	newFile.close();
	
	// Release lock
	if (flock(fd,LOCK_UN) == -1)
	{
    	throw runtime_error ("Release lock SNMP configuration file error");		
	}
	close(fd);
}
Пример #6
0
int main(int argc, char** argv)
{
    int c;
    string pname(argv[0]);
    bool vflg = false;
    bool dflg = false;
    bool xflg = false;
    string configFile;

    opterr = 0;

    while ((c = getopt(argc, argv, "c:vdxh")) != EOF)
        switch (c)
        {
        case 'v':
            vflg = true;
            break;
        case 'd':
            dflg = true;
            break;
        case 'c':
            configFile = optarg;
            break;
        case 'x':
            xflg = true;
            break;
        case 'h':
        case '?':
        default:
            usage(pname);
            return (c == 'h' ? 0 : 1);
            break;
        }

    if ((argc - optind) < 3)
    {
        usage(pname);
        return 1;
    }

#ifdef COMMUNITY_KEYRANGE
    //No OAM in CE...
    dflg = true;
#endif

    Oam oam;
    oamModuleInfo_t t;
    bool parentOAMModuleFlag = true;
    string parentOAMModule = " ";
    int serverInstallType = oam::INSTALL_COMBINE_DM_UM_PM;

    //get local module info; validate running on Active Parent OAM Module
    try {
        t = oam.getModuleInfo();
        parentOAMModuleFlag = boost::get<4>(t);
        parentOAMModule = boost::get<3>(t);
        serverInstallType = boost::get<5>(t);
    }
    catch (exception&) {
        parentOAMModuleFlag = true;
    }

    if (!dflg && !parentOAMModuleFlag)
    {
        cerr << "Exiting, setConfig can only be run on the Active "
             "OAM Parent Module '" << parentOAMModule << "'" << endl;
        return 2;
    }

    Config* cf;
    if (configFile.length() > 0)
        cf = Config::makeConfig(configFile);
    else
        cf = Config::makeConfig();

    if (vflg)
        cout << "Using config file: " << cf->configFile() << endl;

    if (xflg)
        cf->delConfig(argv[optind + 0], argv[optind + 1]);
    else
        cf->setConfig(argv[optind + 0], argv[optind + 1], argv[optind + 2]);
    cf->write();

    if (dflg || serverInstallType == oam::INSTALL_COMBINE_DM_UM_PM)
        return 0;

    //get number of pms
    string count = cf->getConfig("PrimitiveServers", "Count");

    try {
        oam.distributeConfigFile();
        //sleep to give time for change to be distributed
        sleep(atoi(count.c_str()));
    }
    catch (...) {
        return 1;
    }

    return 0;
}
int main(int argc, char *argv[])
{
    Oam oam;
	string installDir(startup::StartUp::installDir());

	Config* sysConfig = Config::makeConfig();
	string SystemSection = "SystemConfig";
	string InstallSection = "Installation";

	bool HARDWARE = false;
	bool SOFTWARE = false;
	bool CONFIG = false;
	bool DBMS = false;
	bool RESOURCE = false;
	bool LOG = false;
	bool BULKLOG = false;
	bool HADOOP = false;

	//get current time and date
	time_t now;
	now = time(NULL);
	struct tm tm;
	localtime_r(&now, &tm);
	char timestamp[200];
	strftime (timestamp, 200, "%m:%d:%y-%H:%M:%S", &tm);
	currentDate = timestamp;

	char helpArg[3] = "-h";

	// Get System Name
	try{
		oam.getSystemConfig("SystemName", systemName);
	}
	catch(...)
	{ 
		systemName = "unassigned";
	}

	// get Local Module Name and Server Install Indicator
	string singleServerInstall;

	oamModuleInfo_t st;
	try {
		st = oam.getModuleInfo();
		localModule = boost::get<0>(st);
	}
	catch (...) {
		cout << endl << "**** Failed : Failed to read Local Module Name" << endl;
		exit(-1);
	}

	try{
		oam.getSystemConfig("SingleServerInstall", singleServerInstall);
	}
	catch(...)
	{ 
		singleServerInstall = "y";
	}

    if (argc == 1) {
		argv[1] = &helpArg[0];
		argc = 2;
	}

	string DataFilePlugin;
	try{
		DataFilePlugin = sysConfig->getConfig(SystemSection, "DataFilePlugin");
	}
	catch(...)
	{
		cout << "ERROR: Problem accessing InfiniDB configuration file" << endl;
		exit(-1);
	}
 
   for( int i = 1; i < argc; i++ )
   {
		if( string("-h") == argv[i] ) {
			cout << endl;
			cout << "'calpontSupport' generates a Set of System Support Report Files in a tar file" << endl;
			cout << "called calpontSupportReport.'system-name'.tar.gz in the local directory." << endl;
			cout << "It should be run on the server with the DBRM front-end." << endl;
			cout << "Check the Admin Guide for additional information." << endl;
			cout << endl;
			cout << "Usage: calpontSupport [-h][-a][-hw][-s][-c][-db][-r][-l][-bl][-lc][-p 'root-password'][-mp 'mysql-root-password'][-de]";
			// if hdfs set up print the hadoop option
			if (!DataFilePlugin.empty())
				cout << "[-hd]";
			cout << endl;
			cout << "			-h  help" << endl;
			cout << "			-a  Output all Reports (excluding Bulk Logs Reports)" << endl;
			cout << "			-hw Output Hardware Reports only" << endl;
			cout << "			-s  Output Software Reports only" << endl;
			cout << "			-c  Output Configuration/Status Reports only" << endl;
			cout << "			-db Output DBMS Reports only" << endl;
			cout << "			-r  Output Resource Reports only" << endl;
			cout << "			-l  Output Calpont Log/Alarms Reports only" << endl;
			cout << "			-bl Output Calpont Bulk Log Reports only" << endl;
			cout << "			-lc Output Reports for Local Server only" << endl;
			cout << "			-p  password (multi-server systems), root-password or 'ssh' to use 'ssh keys'" << endl;
			cout << "			-mp mysql root user password" << endl;
			cout << "			-de Debug Flag" << endl;
			// if hdfs set up print the hadoop option
			if (!DataFilePlugin.empty())
			cout << "			-hd Output hadoop reports only" << endl;
			
			exit (0);
		}
		else
		{
			if( string("-a") == argv[i] ) {
				HARDWARE = true;
				SOFTWARE = true;
				CONFIG = true;
				DBMS = true;
				RESOURCE = true;
				LOG = true;
				HADOOP = (DataFilePlugin.empty()? false : true);
			}
			else if( string("-hw") == argv[i] )
				HARDWARE = true;
			else if( string("-s") == argv[i] )
				SOFTWARE = true;
			else if( string("-c") == argv[i] )
				CONFIG = true;
			else if( string("-db") == argv[i] )
				DBMS = true;
			else if( string("-r") == argv[i] )
				RESOURCE = true;
			else if( string("-l") == argv[i] )
				LOG = true;
			else if( string("-bl") == argv[i] )
				BULKLOG = true;
			else if( string("-lc") == argv[i] )
				LOCAL = true;
			else if( string("-p") == argv[i] ) {
				i++;
				if ( argc == i ) {
					cout << "ERROR: missing root password argument" << endl;
					exit(-1);
				}
				rootPassword = argv[i];
				//add single quote for special characters
				if ( rootPassword != "ssh" )
				{
					rootPassword = "******" + rootPassword + "'";
				}
			}
			else if( string("-mp") == argv[i] ) {
				i++;
				if ( argc == i ) {
					cout << "ERROR: missing mysql root user password argument" << endl;
					exit(-1);
				}
				mysqlpw = argv[i];
				mysqlpw = "'" + mysqlpw + "'";
			}
			else if( string("-de") == argv[i] )
				debug_flag = "1";
			else if ( string("-hd") == argv[i] )
			{
				HADOOP = (DataFilePlugin.empty()? false : true);
			}
			else
			{
				cout << "Invalid Option of '" << argv[i] << "', run with '-h' for help" << endl;
				exit (1);
			}
		}
	}

	//default to -a if nothing is set
	if ( !HARDWARE && !SOFTWARE && !CONFIG && !DBMS && !RESOURCE && !LOG && !BULKLOG && !HADOOP)
	{
		HARDWARE = true;
		SOFTWARE = true;
		CONFIG = true;
		DBMS = true;
		RESOURCE = true;
		LOG = true;
		HADOOP = (DataFilePlugin.empty()? false : true);
	}

	//get Parent OAM Module Name and setup of it's Custom OS files
	string PrimaryUMModuleName;
	try{
		PrimaryUMModuleName = sysConfig->getConfig(SystemSection, "PrimaryUMModuleName");
	}
	catch(...)
	{
		cout << "ERROR: Problem getting Parent OAM Module Name" << endl;
		exit(-1);
	}

	if ( PrimaryUMModuleName == "unassigned" )
		PrimaryUMModuleName = localModule;

	if ( (localModule != PrimaryUMModuleName) && DBMS )
	{
		char* pcommand = 0;
		char *p;
		string argument = "n";
	
		while(true)
		{
			cout << endl << "You selected to get the DBMS data." << endl;
			cout << "You need to run the calpontSupport command on module '" << PrimaryUMModuleName << "' to get that information." << endl;
			cout << "Or you can proceed on to get all data except the DBMS." << endl;

			pcommand = readline("           Do you want to proceed: (y or n) [n]: ");
	
			if (pcommand && *pcommand) {
				p = strtok(pcommand," ");
				argument = p;
				free(pcommand);
				pcommand = 0;
			}
	
			if (pcommand) {
				free(pcommand);
				pcommand = 0;
			}
	
			if( argument == "y") {
				cout << endl;
				break;
			}
			else if( argument == "n")
				exit (1);
		}
	}

	//get number of worker-nodes, will tell us if a single server system
	//get Parent OAM Module Name and setup of it's Custom OS files
	try{
		string NumWorkers = sysConfig->getConfig("DBRM_Controller", "NumWorkers");
		if ( NumWorkers == "1" )
			singleServerInstall = "y";
	}
	catch(...)
	{}

	if ( singleServerInstall == "n" && !LOCAL)
		if ( HARDWARE || SOFTWARE || CONFIG || RESOURCE || LOG || HADOOP )
			if ( rootPassword.empty() ) {
				cout << "ERROR: Multi-Module System, Password Argument required or use '-lc' option, check help for more information" << endl;
				exit(-1);
			}

	//get Parent OAM Module Name and setup of it's Custom OS files
	//string parentOAMModuleName;
	ChildModule parentOAMModule;
	try{
		parentOAMModule.moduleName = sysConfig->getConfig(SystemSection, "ParentOAMModuleName");
	}
	catch(...)
	{
		cout << "ERROR: Problem getting Parent OAM Module Name" << endl;
		exit(-1);
	}

	//Get list of configured system modules
	SystemModuleTypeConfig sysModuleTypeConfig;

	try{
		oam.getSystemConfig(sysModuleTypeConfig);
	}
	catch(...)
	{
		cout << "ERROR: Problem reading the Calpont System Configuration file" << endl;
		exit(-1);
	}

	string ModuleSection = "SystemModuleConfig";

	for ( unsigned int i = 0 ; i < sysModuleTypeConfig.moduletypeconfig.size(); i++)
	{
		string moduleType = sysModuleTypeConfig.moduletypeconfig[i].ModuleType;
		int moduleCount = sysModuleTypeConfig.moduletypeconfig[i].ModuleCount;

		if ( moduleCount == 0 )
			//no modules equipped for this Module Type, skip
			continue;

		//get IP addresses and Host Names
		DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin();
		for( ; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end() ; listPT++)
		{
			string moduleName = (*listPT).DeviceName;
			HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin();
			string moduleIPAddr = (*pt1).IPAddr;
			string moduleHostName = (*pt1).HostName;

			if ( moduleName == localModule) {
				localModuleHostName = moduleHostName;
			}

			//save Child modules
			if ( moduleName != localModule && moduleType != "xm") {
				childmodule.moduleName = moduleName;
				childmodule.moduleIP = moduleIPAddr;
				childmodule.hostName = moduleHostName;
				childmodulelist.push_back(childmodule);
			}
			
			if (moduleName == parentOAMModule.moduleName)
			{
				parentOAMModule.moduleIP = moduleIPAddr;
				parentOAMModule.hostName = moduleHostName;
				parentOAMModule.moduleName = moduleName;
			}
		}
	} //end of i for loop

	// create a clean Calpont Support Report
	system("rm -f *_configReport.txt");
	system("rm -f *_dbmsReport.txt");
	system("rm -f *_hardwareReport.txt");
	system("rm -f *_logReport.txt");
	system("rm -f *_bulklogReport.txt");
	system("rm -f *_resourceReport.txt");
	system("rm -f *_softwareReport.txt");
	system("rm -f hadoopReport.txt");

	//
	// Software
	//

	if ( SOFTWARE ) {
		string reportType = "software";
		pthread_t reportthread;
		int status = pthread_create (&reportthread, NULL, (void*(*)(void*)) &reportThread, &reportType);

		if ( status != 0 ) {
			cout <<  "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status);
		}

		sleep(5);
	}

	//
	// Configuration
	//

	if ( CONFIG ) {
		string reportType = "config";
		pthread_t reportthread;
		int status = pthread_create (&reportthread, NULL, (void*(*)(void*)) &reportThread, &reportType);

		if ( status != 0 ) {
			cout <<  "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status);
		}

		sleep(5);
	}

	//
	// Alarms and Calpont Logs
	//

	if ( LOG ) {
		string reportType = "log";
		pthread_t reportthread;
		int status = pthread_create (&reportthread, NULL, (void*(*)(void*)) &reportThread, &reportType);

		if ( status != 0 ) {
			cout <<  "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status);
		}

		sleep(5);
	}

	//
	// Bulk Logs
	//

	if ( BULKLOG ) {
		string reportType = "bulklog";
		pthread_t reportthread;
		int status = pthread_create (&reportthread, NULL, (void*(*)(void*)) &reportThread, &reportType);

		if ( status != 0 ) {
			cout <<  "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status);
		}

		sleep(5);
	}

	//
	// Hardware
	//

	if ( HARDWARE ) {
		string reportType = "hardware";
		pthread_t reportthread;
		int status = pthread_create (&reportthread, NULL, (void*(*)(void*)) &reportThread, &reportType);

		if ( status != 0 ) {
			cout <<  "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status);
		}

		sleep(5);
	}

	//
	// Resources
	//

	if ( RESOURCE ) {
		string reportType = "resource";
		pthread_t reportthread;
		int status = pthread_create (&reportthread, NULL, (void*(*)(void*)) &reportThread, &reportType);

		if ( status != 0 ) {
			cout <<  "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status);
		}

		sleep(5);
	}

	//
	// DBMS
	//

	if ( DBMS ) {
		system("rm -f calpontSupportReport.txt;touch calpontSupportReport.txt");
		title();

		system("echo '=======================================================================' >> calpontSupportReport.txt");
		system("echo '=                    DBMS Report                                      =' >> calpontSupportReport.txt");
		system("echo '=======================================================================' >> calpontSupportReport.txt");

		// run DBMS report on local server
		cout << "Get dbms report data for " << localModule << endl;

		bool FAILED = false;
		if ( localModule != PrimaryUMModuleName )
		{
			cout << "     FAILED: run calpontSupport on '" << PrimaryUMModuleName << "' to get the dbrm report" << endl;
			FAILED = true;
		}
		else
		{
			// check if mysql is supported and get info
			string calpontMysql = installDir + "/mysql/bin/mysql --defaults-file=" + installDir + "/mysql/my.cnf -u root ";
			string cmd = calpontMysql + " -e 'status' > /tmp/idbmysql.log 2>&1";
			system(cmd.c_str());

			//check for mysql password set
			string pwprompt = " ";
	
			if (oam.checkLogStatus("/tmp/idbmysql.log", "ERROR 1045") ) {
				cout << "NOTE: MySQL root user password is set" << endl;
				//needs a password, was password entered on command line
				if ( mysqlpw == " " )
				{	//go check my.cnf
					string file = installDir + "/mysql/my.cnf";
					ifstream oldFile (file.c_str());
					
					vector <string> lines;
					char line[200];
					string buf;
					while (oldFile.getline(line, 200))
					{
						buf = line;
						string::size_type pos = buf.find("password",0);
						if (pos != string::npos) {
							string::size_type pos1 = buf.find("=",0);
							if (pos1 != string::npos) {
								pos = buf.find("#",0);
								if (pos == string::npos) {
									//password arg in my.cnf, go get password
									cout << "NOTE: Using password from my.cnf" << endl;
									mysqlpw = buf.substr(pos1+1,80);
									cout << mysqlpw << endl;
									break;
								}
							}
						}
					}

					oldFile.close();

					if ( mysqlpw == " " )
					{
						cout << "NOTE: No password provide on command line or found uncommented in my.cnf" << endl;
						cout << endl;
						string prompt = " *** Enter MySQL password > ";
						mysqlpw = getpass(prompt.c_str());
					}
				}

				//check for mysql password set
				pwprompt = "--password="******" -e 'status' > /tmp/idbmysql.log 2>&1";
				system(cmd.c_str());

				if (oam.checkLogStatus("/tmp/idbmysql.log", "ERROR 1045") ) {
					cout << "FAILED: Failed login using MySQL root user password '" << mysqlpw << "'" << endl;
					FAILED = true;
				}
			}

			if (!FAILED)
			{	
				// check if mysql is supported and get info
				string calpontMysql = installDir + "/mysql/bin/mysql --defaults-file=" + installDir + "/mysql/my.cnf -u root " + pwprompt;
				string cmd = calpontMysql + " -V > /dev/null 2>&1";
				int ret = system(cmd.c_str());
				if ( WEXITSTATUS(ret) == 0) {
					// run DBMS report info
					system("echo ' ' >> calpontSupportReport.txt");
					system("echo '******************** DBMS InfiniDB Mysql Version ********************' >> calpontSupportReport.txt");
					system("echo ' ' >> calpontSupportReport.txt");
					cmd = "echo '################# " + calpontMysql + " -e status ################# ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = "echo ' ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = calpontMysql + " -e 'status' >> calpontSupportReport.txt";
					system(cmd.c_str());
			
					system("echo ' ' >> calpontSupportReport.txt");
					system("echo '******************** DBMS Mysql Calpont System Column  ********************' >> calpontSupportReport.txt");
					system("echo ' ' >> calpontSupportReport.txt");
					cmd = "echo '################# " + calpontMysql + " -e desc calpontsys.syscolumn ################# ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = "echo ' ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = calpontMysql + " -e 'desc calpontsys.syscolumn;' >> calpontSupportReport.txt";
					system(cmd.c_str());
			
					system("echo ' ' >> calpontSupportReport.txt");
					system("echo '******************** DBMS Mysql Calpont System Table  ********************' >> calpontSupportReport.txt");
					system("echo ' ' >> calpontSupportReport.txt");
					cmd = "echo '################# " + calpontMysql + " -e desc calpontsys.systable ################# ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = "echo ' ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = calpontMysql + " -e 'desc calpontsys.systable;' >> calpontSupportReport.txt";
					system(cmd.c_str());
			
					system("echo ' ' >> calpontSupportReport.txt");
					system("echo '******************** DBMS Mysql Calpont System Catalog Data ********************' >> calpontSupportReport.txt");
					system("echo ' ' >> calpontSupportReport.txt");
					cmd = "echo '################# " + calpontMysql + " calpontsys < " + installDir + "/mysql/dumpcat_mysql.sql ################# ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = "echo ' ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = calpontMysql + " calpontsys < " + installDir + "/mysql/dumpcat_mysql.sql >> calpontSupportReport.txt";
					system(cmd.c_str());
			
					system("echo ' ' >> calpontSupportReport.txt");
					system("echo '******************** DBMS Mysql Calpont System Table Data ********************' >> calpontSupportReport.txt");
					system("echo ' ' >> calpontSupportReport.txt");
					cmd = "echo '################# " + calpontMysql + " -e select * from calpontsys.systable ################# ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = "echo ' ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = calpontMysql + " -e 'select * from calpontsys.systable;' >> calpontSupportReport.txt";
					system(cmd.c_str());
			
					system("echo ' ' >> calpontSupportReport.txt");
					system("echo '******************** DBMS Mysql Calpont Usernames ********************' >> calpontSupportReport.txt");
					system("echo ' ' >> calpontSupportReport.txt");
					cmd = "echo '################# " + calpontMysql + " -e show databases ################# ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = "echo ' ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = calpontMysql + " -e 'show databases;' >> calpontSupportReport.txt";
					system(cmd.c_str());
		
					system("echo ' ' >> calpontSupportReport.txt");
					system("echo '******************** DBMS Mysql InfiniDB variables ********************' >> calpontSupportReport.txt");
					system("echo ' ' >> calpontSupportReport.txt");
					cmd = "echo '################# " + calpontMysql + " show variables ################# ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = "echo ' ' >> calpontSupportReport.txt";
					system(cmd.c_str());
					cmd = calpontMysql + " -e 'show variables;' >> calpontSupportReport.txt";
					system(cmd.c_str());
				}
			}
		}

		system("echo ' ' >> calpontSupportReport.txt");
		system("echo '******************** Database Size Report ********************' >> calpontSupportReport.txt");
		system("echo ' ' >> calpontSupportReport.txt");
		string cmd = "echo '################# /bin/databaseSizeReport ################# ' >> calpontSupportReport.txt";
		system(cmd.c_str());
		cmd = "echo ' ' >> calpontSupportReport.txt";
		system(cmd.c_str());
		cmd = installDir + "/bin/databaseSizeReport >> calpontSupportReport.txt";
		system(cmd.c_str());

		system("echo ' ' >> calpontSupportReport.txt");
		system("echo '******************** DBMS Mysql InfiniDB config file ********************' >> calpontSupportReport.txt");
		system("echo ' ' >> calpontSupportReport.txt");
		cmd = "echo '################# cat /mysql/my.cnf ################# ' >> calpontSupportReport.txt";
		system(cmd.c_str());
		cmd = "echo ' ' >> calpontSupportReport.txt";
		system(cmd.c_str());
		cmd = "cat " + installDir + "/mysql/my.cnf 2>/dev/null >> calpontSupportReport.txt";
		system(cmd.c_str());

		system("echo ' ' >> calpontSupportReport.txt");
		system("echo '******************** Active Queries ********************' >> calpontSupportReport.txt");
		system("echo ' ' >> calpontSupportReport.txt");
		cmd = "echo '################# mcsadmin getActiveSqlStatement ################# ' >> calpontSupportReport.txt";
		system(cmd.c_str());
		cmd = "echo ' ' >> calpontSupportReport.txt";
		system(cmd.c_str());
		cmd = installDir + "/bin/mcsadmin getActiveSqlStatement >> calpontSupportReport.txt";
		system(cmd.c_str());

		cmd = "cat calpontSupportReport.txt > " + localModule + "_dbmsReport.txt";
		system(cmd.c_str());
	}
	
	//
	// HADOOP
	//

	if (HADOOP)
	{
		if (LOCAL || childmodulelist.empty())
		{
			cout << "Get hadoop report data" << endl;
			string cmd = installDir + "/bin/hadoopReport.sh " + localModule + " " + installDir + "\n";
			cmd += " mv -f /tmp/hadoopReport.txt .";
			FILE* pipe = popen(cmd.c_str(), "r");
			if (!pipe) 
			{
				cout << "Failed to get a pipe for hadoop health check commands" << endl;
				exit(-1);
			}
			pclose(pipe);
		}
		else
		{
			// only get hadoop report from parentOAMModule, because it's consistant view.
			parentmodulelist.push_back(parentOAMModule);
			threadInfo_t *st = new threadInfo_t;
			ChildModuleList::iterator iter = parentmodulelist.begin();
			*st = boost::make_tuple(iter, "hadoop");

			pthread_t hdthread;
			int status = pthread_create (&hdthread, NULL, (void*(*)(void*)) &childReportThread, st);
			if ( status != 0 ) 
			{
				cout <<  "ERROR: childreportthread: pthread_create failed, return status = " + oam.itoa(status) << endl;
			}
		}
	}
	
	//wait for all threads to complete
	sleep(5);
	int wait = 0;
	while (true)
	{
//cout << "check " << runningThreads << endl;
		if (runningThreads < 1)
			break;

		sleep(2);
		wait++;
		// give it 60 minutes to complete
		if ( wait >= 3600 * 5)
		{
			cout << "Timed out (60 minutes) waiting for Requests to complete" << endl;
		}
	}

	system("rm -f calpontSupportReport.txt");

	system("unix2dos *Report.txt > /dev/null 2>&1");
	system("rm -rf calpontSupportReport;mkdir calpontSupportReport;mv *Report.txt calpontSupportReport/. > /dev/null 2>&1;mv *Report.tar.gz calpontSupportReport/. > /dev/null 2>&1");
	string cmd = "tar -zcf calpontSupportReport." + systemName + ".tar.gz calpontSupportReport/*";
	system(cmd.c_str());


	cout << endl << "Calpont Support Script Successfully completed, files located in calpontSupportReport." + systemName + ".tar.gz" << endl;
}
Пример #8
0
void procmonMonitor()
{
	ServerMonitor serverMonitor;
	Oam oam;

	//wait before monitoring is started
	sleep(60);

	// get current server name
	string moduleName;
	oamModuleInfo_t st;
	try {
		st = oam.getModuleInfo();
		moduleName = boost::get<0>(st);
	}
	catch (...) {
		// Critical error, Log this event and exit
		LoggingID lid(SERVER_MONITOR_LOG_ID);
		MessageLog ml(lid);
		Message msg;
		Message::Args args;
		args.add("Failed to read local module Info");
		msg.format(args);
		ml.logCriticalMessage(msg);
		exit(-1);
	}

	string msgPort = moduleName + "_ProcessMonitor";

	int heartbeatCount = 0;

	// loop forever monitoring Local Process Monitor
	while(true)
	{

		ByteStream msg;
		ByteStream::byte requestID = LOCALHEARTBEAT;
	
		msg << requestID;
	
		try
		{
			MessageQueueClient mqRequest(msgPort);
			mqRequest.write(msg);
		
			// wait 10 seconds for response
			ByteStream::byte returnACK;
			ByteStream::byte returnRequestID;
			ByteStream::byte requestStatus;
			ByteStream receivedMSG;
		
			struct timespec ts = { 10, 0 };
			try {
				receivedMSG = mqRequest.read(&ts);
	
				if (receivedMSG.length() > 0) {
					receivedMSG >> returnACK;
					receivedMSG >> returnRequestID;
					receivedMSG >> requestStatus;
			
					if ( returnACK == oam::ACK &&  returnRequestID == requestID) {
						// ACK for this request
						heartbeatCount = 0;
					}
				}
				else
				{
					LoggingID lid(SERVER_MONITOR_LOG_ID);
					MessageLog ml(lid);
					Message msg;
					Message::Args args;
					args.add("procmonMonitor: ProcMon Msg timeout!!!");
					msg.format(args);
					ml.logWarningMessage(msg);

					heartbeatCount++;

					if ( heartbeatCount > 2 ) {
						//Process Monitor not responding, restart it
						system("pkill ProcMon");
					LoggingID lid(SERVER_MONITOR_LOG_ID);
					MessageLog ml(lid);
					Message msg;
					Message::Args args;
					args.add("procmonMonitor: Restarting ProcMon");
					msg.format(args);
					ml.logWarningMessage(msg);

						sleep(60);
						heartbeatCount = 0;
					}
				}
		
				mqRequest.shutdown();
	
			}
			catch (SocketClosed &ex) {
				string error = ex.what();

				LoggingID lid(SERVER_MONITOR_LOG_ID);
				MessageLog ml(lid);
				Message msg;
				Message::Args args;
				args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: " + error);
				msg.format(args);
				ml.logErrorMessage(msg);
			}
			catch (...) {
				LoggingID lid(SERVER_MONITOR_LOG_ID);
				MessageLog ml(lid);
				Message msg;
				Message::Args args;
				args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: Caught unknown exception");
				msg.format(args);
				ml.logErrorMessage(msg);
			}
		}
		catch (exception& ex)