TupleInfo getTupleInfo(uint32_t columnKey, const JobInfo& jobInfo) { TupleInfoMap::const_iterator cit = jobInfo.keyInfo->tupleInfoMap.find(columnKey); if ((cit == jobInfo.keyInfo->tupleInfoMap.end()) || (cit->second.dtype == CalpontSystemCatalog::BIT)) { ostringstream strstm; strstm << "TupleInfo for (" << jobInfo.keyInfo->tupleKeyVec[columnKey].fId << "," << jobInfo.keyInfo->tupleKeyVec[columnKey].fTable; if (jobInfo.keyInfo->tupleKeyVec[columnKey].fView.length() > 0) strstm << "," << jobInfo.keyInfo->tupleKeyVec[columnKey].fView; strstm << ") could not be found." << endl; cerr << strstm.str(); Message::Args args; args.add(strstm.str()); jobInfo.logger->logMessage(LOG_TYPE_DEBUG, LogMakeJobList, args, LoggingID(5, jobInfo.sessionId, jobInfo.txnId, 0)); throw runtime_error("column's tuple info could not be found"); } return cit->second; }
string ERYDBErrorInfo::errorMsg(const unsigned eid, const string& s) { string errMsg = lookupError(eid); Message::Args args; args.add(s); format(errMsg, args); return errMsg; }
/****************************************************************************************** * @brief clearAlarm * * purpose: clear Alarm that was previously set * ******************************************************************************************/ void ServerMonitor::clearAlarm(string alarmItem, ALARMS alarmID) { SNMPManager alarmMgr; alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, CLEAR); //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add(alarmItem); args.add(" alarm #"); args.add(alarmID); args.add("cleared"); msg.format(args); ml.logWarningMessage(msg); }
/****************************************************************************************** * @brief sendAlarm * * purpose: send a trap and log the process information * ******************************************************************************************/ void ServerMonitor::sendAlarm(string alarmItem, ALARMS alarmID, int action, float sensorValue) { ServerMonitor serverMonitor; Oam oam; //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add(alarmItem); args.add(", sensor value out-of-range: "); args.add(sensorValue); // get current server name string moduleName; oamModuleInfo_t st; try { st = oam.getModuleInfo(); moduleName = boost::get<0>(st); } catch (...) { moduleName = "Unknown Server"; } // check if there is an active alarm above the reporting theshold // that needs to be cleared serverMonitor.checkAlarm(alarmItem, alarmID); // check if Alarm is already active, don't resend if ( !( oam.checkActiveAlarm(alarmID, moduleName, alarmItem)) ) { SNMPManager alarmMgr; // send alarm alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); args.add(", Alarm set: "); args.add(alarmID); } // output log msg.format(args); ml.logWarningMessage(msg); return; }
int main(int argc, char** argv) { int c; opterr = 0; bool cflg = true; bool wflg = false; unsigned subsysID = 8; //oamcpp while ((c = getopt(argc, argv, "s:cwih")) != EOF) switch (c) { case 'c': cflg = true; wflg = false; break; case 'w': cflg = false; wflg = true; break; case 'i': cflg = false; wflg = false; break; case 's': subsysID = strtoul(optarg, 0, 0); break; case 'h': case '?': default: usage(); return (c == 'h' ? 0 : 1); break; } if ((argc - optind) < 1) { usage(); return 1; } Message::MessageID mid = strtoul(argv[optind++], 0, 0);; Message::Args args; for (int i = optind; i < argc; i++) args.add(argv[optind++]); LoggingID logInfo(subsysID); Message msg(mid); msg.format(args); MessageLog log(logInfo); if (cflg) log.logCriticalMessage(msg); else if (wflg) log.logWarningMessage(msg); else log.logInfoMessage(msg); return 0; }
void DDLIndexPopulator::logError(const string& msg, int error) { Message::Args args; Message message(9); args.add((string)__FILE__ + ": "); args.add(msg); if (error) { args.add("Error number: "); args.add(error); } message.format( args ); fResult.result = DDLPackageProcessor::CREATE_ERROR; fResult.message = message; }
void m4() { LoggingID lid1(100, 200, 300, 400); MessageLog ml1(lid1); Message::Args args; Message* m; args.add("hello"); args.add("world"); args.add(123); args.add(1234.55); for (int i = 0; i < 4; i++) { m = new Message(i); m->format(args); ml1.logDebugMessage(*m); delete m; } Config::deleteInstanceMap(); }
void ColumnCommand::createCommand(ByteStream &bs) { uint8_t tmp8; bs >> tmp8; // eat the Command bs >> tmp8; _isScan = tmp8; bs >> traceFlags; bs >> filterString; #if 0 cout << "filter string: "; for (uint i = 0; i < filterString.length(); ++i) cout << (int) filterString.buf()[i] << " "; cout << endl; #endif bs >> tmp8; colType.colDataType = (execplan::CalpontSystemCatalog::ColDataType) tmp8; bs >> tmp8; colType.colWidth = tmp8; bs >> tmp8; colType.scale = tmp8; bs >> tmp8; colType.compressionType = tmp8; bs >> BOP; bs >> filterCount; bs >> tmp8; // UDF ordinal number fUdfFuncPtr = ord2Fptr(tmp8); if (tmp8 != 0 && fUdfFuncPtr == 0) { Message::Args args; args.add(tmp8); mlp->logMessage(logging::M0069, args); } deserializeInlineVector(bs, lastLbid); // cout << "lastLbid count=" << lastLbid.size() << endl; // for (uint i = 0; i < lastLbid.size(); i++) // cout << " " << lastLbid[i]; //cout << "CreateCommand() o:" << getOID() << " lastLbid: " << lastLbid << endl; Command::createCommand(bs); parsedColumnFilter = bpp->pp.parseColumnFilter(filterString.buf(), colType.colWidth, colType.colDataType, filterCount, BOP); /* OR hack */ emptyFilter = bpp->pp.parseColumnFilter(filterString.buf(), colType.colWidth, colType.colDataType, 0, BOP); /* XXXPAT: for debugging only */ // bs >> colType.columnOID; // cout << "got filterCount " << filterCount << endl; // cout << "made a ColumnCommand OID = " << OID << endl; }
/****************************************************************************************** * @brief sendMsgShutdownServer * * purpose: send a Message to Shutdown server * ******************************************************************************************/ void sendMsgShutdownServer() { Oam oam; //Log this event LoggingID lid; MessageLog ml(lid); Message msg; Message::Args args; args.add("Fatal Hardware Alarm detected, Server being shutdown"); msg.format(args); ml.logCriticalMessage(msg); string serverName; oamServerInfo_t st; try { st = oam.getServerInfo(); serverName = boost::get<0>(st); } catch (...) { // o well, let's take out own action if( IPMI_SUPPORT == 0) system("init 0"); } try { oam.shutdownServer(serverName, FORCEFUL, ACK_NO); } catch (exception& e) { // o well, let's take out own action if( IPMI_SUPPORT == 0) system("init 0"); } }
/****************************************************************************************** * @brief sendResourceAlarm * * purpose: send a trap and log the process information * ******************************************************************************************/ bool ServerMonitor::sendResourceAlarm(string alarmItem, ALARMS alarmID, int action, int usage) { ServerMonitor serverMonitor; Oam oam; //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add(alarmItem); args.add(" usage at percentage of "); args.add(usage); // get current module name string moduleName; oamModuleInfo_t st; try { st = oam.getModuleInfo(); moduleName = boost::get<0>(st); } catch (...) { moduleName = "Unknown Server"; } // check if there is an active alarm above the reporting theshold // that needs to be cleared if (alarmItem == "CPU") serverMonitor.checkCPUAlarm(alarmItem, alarmID); else if (alarmItem == "Local Disk" || alarmItem == "External") serverMonitor.checkDiskAlarm(alarmItem, alarmID); else if (alarmItem == "Local Memory") serverMonitor.checkMemoryAlarm(alarmItem, alarmID); else if (alarmItem == "Local Swap") serverMonitor.checkSwapAlarm(alarmItem, alarmID); // don't issue an alarm on thge dbroots is already issued by this or another server if ( alarmItem.find(startup::StartUp::installDir() + "/data") == 0 ) { // check if Alarm is already active from any module, don't resend if ( !( oam.checkActiveAlarm(alarmID, "*", alarmItem)) ) { SNMPManager alarmMgr; // send alarm alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); args.add(", Alarm set: "); args.add(alarmID); msg.format(args); ml.logInfoMessage(msg); return true; } else return false; } else { // check if Alarm is already active from this module, don't resend if ( !( oam.checkActiveAlarm(alarmID, moduleName, alarmItem)) ) { SNMPManager alarmMgr; // send alarm alarmMgr.sendAlarmReport(alarmItem.c_str(), alarmID, action); args.add(", Alarm set: "); args.add(alarmID); msg.format(args); ml.logInfoMessage(msg); return true; } else return false; } return true; }
int main(int argc, char* argv[]) { // get and set locale language string systemLang = "C"; systemLang = funcexp::utf8::idb_setlocale(); setupCwd(); WriteEngine::WriteEngineWrapper::init( WriteEngine::SUBSYSTEM_ID_DDLPROC ); #ifdef _MSC_VER // In windows, initializing the wrapper (A dll) does not set the static variables // in the main program idbdatafile::IDBPolicy::configIDBPolicy(); #endif ResourceManager rm; Dec = DistributedEngineComm::instance(rm); #ifndef _MSC_VER /* set up some signal handlers */ struct sigaction ign; memset(&ign, 0, sizeof(ign)); ign.sa_handler = added_a_pm; sigaction(SIGHUP, &ign, 0); ign.sa_handler = SIG_IGN; sigaction(SIGPIPE, &ign, 0); #endif ddlprocessor::DDLProcessor ddlprocessor(5, 10); { Oam oam; try { oam.processInitComplete("DDLProc", ACTIVE); } catch (...) { } } try { ddlprocessor.process(); } catch (std::exception& ex) { cerr << ex.what() << endl; Message::Args args; Message message(8); args.add("DDLProc failed on: "); args.add(ex.what()); message.format( args ); } catch (...) { cerr << "Caught unknown exception!" << endl; Message::Args args; Message message(8); args.add("DDLProc failed on: "); args.add("receiving DDLPackage"); message.format( args ); } return 0; }
int main(int argc, char* argv[]) { // get and set locale language systemLang = funcexp::utf8::idb_setlocale(); if ( systemLang != "en_US.UTF-8" && systemLang.find("UTF") != string::npos ) utf8 = true; Config* cf = Config::makeConfig(); setupSignalHandlers(); setupCwd(cf); mlp = new primitiveprocessor::Logger(); int rc; rc = setupResources(); if (rc) { Message::Args args; args.add(rc); //mlp->logMessage(logging::M0016, args); } int serverThreads = 1; int serverQueueSize = 10; int processorWeight = 8*1024; int processorQueueSize = 10*1024; int BRPBlocksPct = 70; uint32_t BRPBlocks = 1887437; int BRPThreads = 16; int cacheCount = 1; int maxBlocksPerRead = 256; // 1MB bool rotatingDestination = false; uint32_t deleteBlocks = 128; bool PTTrace = false; int temp; string strTemp; int priority = -1; const string primitiveServers("PrimitiveServers"); const string jobListStr("JobList"); const string dbbc("DBBC"); const string ExtentMapStr("ExtentMap"); uint64_t extentRows = 8*1024*1024; uint64_t MaxExtentSize = 0; double prefetchThreshold; uint64_t PMSmallSide = 67108864; BPPCount = 16; int numCores = -1; int configNumCores = -1; uint32_t highPriorityPercentage, medPriorityPercentage, lowPriorityPercentage; utils::CGroupConfigurator cg; gDebugLevel = primitiveprocessor::NONE; temp = toInt(cf->getConfig(primitiveServers, "ServerThreads")); if (temp > 0) serverThreads = temp; temp = toInt(cf->getConfig(primitiveServers, "ServerQueueSize")); if (temp > 0) serverQueueSize = temp; temp = toInt(cf->getConfig(primitiveServers, "ProcessorThreshold")); if (temp > 0) processorWeight = temp; temp = toInt(cf->getConfig(primitiveServers, "ProcessorQueueSize")); if (temp > 0) processorQueueSize = temp; temp = toInt(cf->getConfig(primitiveServers, "DebugLevel")); if (temp > 0) gDebugLevel = (DebugLevel)temp; highPriorityPercentage = 0; temp = toInt(cf->getConfig(primitiveServers, "HighPriorityPercentage")); if (temp >= 0) highPriorityPercentage = temp; medPriorityPercentage = 0; temp = toInt(cf->getConfig(primitiveServers, "MediumPriorityPercentage")); if (temp >= 0) medPriorityPercentage = temp; lowPriorityPercentage = 0; temp = toInt(cf->getConfig(primitiveServers, "LowPriorityPercentage")); if (temp >= 0) lowPriorityPercentage = temp; temp = toInt(cf->getConfig(ExtentMapStr, "ExtentRows")); if (temp > 0) extentRows = temp; temp = toInt(cf->getConfig(primitiveServers, "ConnectionsPerPrimProc")); if (temp > 0) connectionsPerUM = temp; else connectionsPerUM = 1; // set to smallest extent size // do not allow to read beyond the end of an extent const int MaxReadAheadSz = (extentRows)/BLOCK_SIZE; //defaultBufferSize = 512 * 1024; // @bug 2627 - changed default dict buffer from 256K to 512K, allows for cols w/ length of 61. defaultBufferSize = 100*1024; // 1/17/12 - made the dict buffer dynamic, max size for a numeric col is 80k + ovrhd // This parm controls whether we rotate through the output sockets // when deciding where to send response messages, or whether to simply // send the response to the socket of origin. Should normally be set // to 'y', for install types 1 and 3. string strVal = cf->getConfig(primitiveServers, "RotatingDestination"); //XXX: Permanently disable for now... strVal = "N"; if ((strVal == "y") || (strVal == "Y")) { rotatingDestination = true; // Disable destination rotation if UM and PM are running on same // server, because we could accidentally end up sending DMLProc // responses to ExeMgr and vice versa, if we rotated socket dest. temp = toInt(cf->getConfig("Installation", "ServerTypeInstall")); if ((temp == oam::INSTALL_COMBINE_DM_UM_PM) || (temp == oam::INSTALL_COMBINE_PM_UM)) rotatingDestination = false; } string strBlockPct = cf->getConfig(dbbc, "NumBlocksPct"); temp = atoi(strBlockPct.c_str()); if (temp > 0) BRPBlocksPct = temp; #ifdef _MSC_VER MEMORYSTATUSEX memStat; memStat.dwLength = sizeof(memStat); if (GlobalMemoryStatusEx(&memStat) == 0) //FIXME: Assume 2GB? BRPBlocks = 2621 * BRPBlocksPct; else { #ifndef _WIN64 memStat.ullTotalPhys = std::min(memStat.ullTotalVirtual, memStat.ullTotalPhys); #endif //We now have the total phys mem in bytes BRPBlocks = memStat.ullTotalPhys / (8 * 1024) / 100 * BRPBlocksPct; } #else // _SC_PHYS_PAGES is in 4KB units. Dividing by 200 converts to 8KB and gets ready to work in pct // _SC_PHYS_PAGES should always be >> 200 so we shouldn't see a total loss of precision //BRPBlocks = sysconf(_SC_PHYS_PAGES) / 200 * BRPBlocksPct; BRPBlocks = ((BRPBlocksPct/100.0) * (double) cg.getTotalMemory()) / 8192; #endif #if 0 temp = toInt(cf->getConfig(dbbc, "NumThreads")); if (temp > 0) BRPThreads = temp; #endif temp = toInt(cf->getConfig(dbbc, "NumCaches")); if (temp > 0) cacheCount = temp; temp = toInt(cf->getConfig(dbbc, "NumDeleteBlocks")); if (temp > 0) deleteBlocks = temp; if ((uint32_t)(.01 * BRPBlocks) < deleteBlocks) deleteBlocks = (uint32_t)(.01 * BRPBlocks); temp = toInt(cf->getConfig(primitiveServers, "ColScanBufferSizeBlocks")); if (temp > (int) MaxReadAheadSz || temp < 1) maxBlocksPerRead = MaxReadAheadSz; else if (temp > 0) maxBlocksPerRead = temp; temp = toInt(cf->getConfig(primitiveServers, "ColScanReadAheadBlocks")); if (temp > (int) MaxReadAheadSz || temp < 0) blocksReadAhead = MaxReadAheadSz; else if (temp > 0) { //make sure we've got an integral factor of extent size for (; (MaxExtentSize%temp)!=0; ++temp); blocksReadAhead=temp; } temp = toInt(cf->getConfig(primitiveServers, "PTTrace")); if (temp > 0) PTTrace = true; temp = toInt(cf->getConfig(primitiveServers, "PrefetchThreshold")); if (temp < 0 || temp > 100) prefetchThreshold = 0; else prefetchThreshold = temp/100.0; int maxPct = 0; //disable by default temp = toInt(cf->getConfig(primitiveServers, "MaxPct")); if (temp >= 0) maxPct = temp; // @bug4507, configurable pm aggregation AggregationMemoryCheck // We could use this same mechanism for other growing buffers. int aggPct = 95; temp = toInt(cf->getConfig("SystemConfig", "MemoryCheckPercent")); if (temp >= 0) aggPct = temp; //...Start the thread to monitor our memory usage new boost::thread(utils::MonitorProcMem(maxPct, aggPct, 28)); // config file priority is 40..1 (highest..lowest) string sPriority = cf->getConfig(primitiveServers, "Priority"); if (sPriority.length() > 0) temp = toInt(sPriority); else temp = 21; // convert config file value to setpriority(2) value (-20..19, -1 is the default) if (temp > 0) priority = 20 - temp; else if (temp < 0) priority = 19; if (priority < -20) priority = -20; #ifdef _MSC_VER //FIXME: #else setpriority(PRIO_PROCESS, 0, priority); #endif //..Instantiate UmSocketSelector singleton. Disable rotating destination //..selection if no UM IP addresses are in the Calpo67108864LLnt.xml file. UmSocketSelector* pUmSocketSelector = UmSocketSelector::instance(); if (rotatingDestination) { if (pUmSocketSelector->ipAddressCount() < 1) rotatingDestination = false; } //See if we want to override the calculated #cores temp = toInt(cf->getConfig(primitiveServers, "NumCores")); if (temp > 0) configNumCores = temp; if (configNumCores <= 0) { //count the actual #cores numCores = cg.getNumCores(); if (numCores == 0) numCores = 8; } else numCores = configNumCores; //based on the #cores, calculate some thread parms if (numCores > 0) { BRPThreads = 2 * numCores; //there doesn't seem much benefit to having more than this, and sometimes it causes problems. //DBBC.NumThreads can override this cap BRPThreads = std::min(BRPThreads, 32); } // the default is ~10% low, 30% medium, 60% high, (where 2*cores = 100%) if (highPriorityPercentage == 0 && medPriorityPercentage == 0 && lowPriorityPercentage == 0) { lowPriorityThreads = max(1, (2*numCores)/10); medPriorityThreads = max(1, (2*numCores)/3); highPriorityThreads = (2 * numCores) - lowPriorityThreads - medPriorityThreads; } else { uint32_t totalThreads = (uint32_t) ((lowPriorityPercentage + medPriorityPercentage + highPriorityPercentage) / 100.0 * (2*numCores)); if (totalThreads == 0) totalThreads = 1; lowPriorityThreads = (uint32_t) (lowPriorityPercentage/100.0 * (2*numCores)); medPriorityThreads = (uint32_t) (medPriorityPercentage/100.0 * (2*numCores)); highPriorityThreads = totalThreads - lowPriorityThreads - medPriorityThreads; } BPPCount = highPriorityThreads + medPriorityThreads + lowPriorityThreads; // let the user override if they want temp = toInt(cf->getConfig(primitiveServers, "BPPCount")); if (temp > 0 && temp < (int) BPPCount) BPPCount = temp; temp = toInt(cf->getConfig(dbbc, "NumThreads")); if (temp > 0) BRPThreads = temp; #ifndef _MSC_VER // @bug4598, switch for O_DIRECT to support gluster fs. // directIOFlag == O_DIRECT, by default strVal = cf->getConfig(primitiveServers, "DirectIO"); if ((strVal == "n") || (strVal == "N")) directIOFlag = 0; #endif IDBPolicy::configIDBPolicy(); // no versionbuffer if using HDFS for performance reason if (IDBPolicy::useHdfs()) noVB = 1; cout << "Starting PrimitiveServer: st = " << serverThreads << ", sq = " << serverQueueSize << ", pw = " << processorWeight << ", pq = " << processorQueueSize << ", nb = " << BRPBlocks << ", nt = " << BRPThreads << ", nc = " << cacheCount << ", ra = " << blocksReadAhead << ", db = " << deleteBlocks << ", mb = " << maxBlocksPerRead << ", rd = " << rotatingDestination << ", tr = " << PTTrace << ", ss = " << PMSmallSide << ", bp = " << BPPCount << endl; PrimitiveServer server(serverThreads, serverQueueSize, processorWeight, processorQueueSize, rotatingDestination, BRPBlocks, BRPThreads, cacheCount, maxBlocksPerRead, blocksReadAhead, deleteBlocks, PTTrace, prefetchThreshold, PMSmallSide); #ifdef QSIZE_DEBUG thread* qszMonThd; if (gDebugLevel >= STATS) { #ifdef _MSC_VER ofstream* qszLog = new ofstream("C:/Calpont/log/trace/ppqsz.dat"); #else ofstream* qszLog = new ofstream("/var/log/Calpont/trace/ppqsz.dat"); #endif if (!qszLog->good()) { qszLog->close(); delete qszLog; qszLog = 0; } qszMonThd = new thread(QszMonThd(&server, qszLog)); } #endif #ifdef DUMP_CACHE_CONTENTS { //Need to use pthreads API here... pthread_t thd1; pthread_attr_t attr1; pthread_attr_init(&attr1); pthread_attr_setdetachstate(&attr1, PTHREAD_CREATE_DETACHED); pthread_create(&thd1, &attr1, waitForSIGUSR1, reinterpret_cast<void*>(cacheCount)); } #endif server.start(); cerr << "server.start() exited!" << endl; return 1; }
CreateTableProcessor::DDLResult CreateTableProcessor::processPackage( ddlpackage::CreateTableStatement& createTableStmt) { SUMMARY_INFO("CreateTableProcessor::processPackage"); DDLResult result; BRM::TxnID txnID; txnID.id= fTxnid.id; txnID.valid= fTxnid.valid; result.result = NO_ERROR; int rc1 = 0; rc1 = fDbrm->isReadWrite(); if (rc1 != 0 ) { Message::Args args; Message message(9); args.add("Unable to execute the statement due to DBRM is read only"); message.format(args); result.result = CREATE_ERROR; result.message = message; fSessionManager.rolledback(txnID); return result; } DETAIL_INFO(createTableStmt); ddlpackage::TableDef& tableDef = *createTableStmt.fTableDef; //If schema = CALPONTSYS, do not create table boost::algorithm::to_lower(tableDef.fQualifiedName->fSchema); if (tableDef.fQualifiedName->fSchema == CALPONT_SCHEMA) { //release the transaction fSessionManager.rolledback(txnID); return result; } // Commit current transaction. // all DDL statements cause an implicut commit VERBOSE_INFO("Getting current txnID"); //Check whether the table is existed already boost::shared_ptr<CalpontSystemCatalog> systemCatalogPtr = CalpontSystemCatalog::makeCalpontSystemCatalog(createTableStmt.fSessionID); execplan::CalpontSystemCatalog::TableName tableName; tableName.schema = tableDef.fQualifiedName->fSchema; tableName.table = tableDef.fQualifiedName->fName; execplan::CalpontSystemCatalog::ROPair roPair; roPair.objnum = 0; ByteStream::byte rc = 0; /** @Bug 217 */ /** @Bug 225 */ try { roPair = systemCatalogPtr->tableRID(tableName); } catch (IDBExcept &ie) { // TODO: What is and is not an error here? if (ie.errorCode() == ERR_DATA_OFFLINE) { //release transaction fSessionManager.rolledback(txnID); // Return the error for display to user Message::Args args; Message message(9); args.add(ie.what()); message.format(args); result.result = CREATE_ERROR; result.message = message; return result; } else if ( ie.errorCode() == ERR_TABLE_NOT_IN_CATALOG) { roPair.objnum = 0; } else //error out { //release transaction fSessionManager.rolledback(txnID); // Return the error for display to user Message::Args args; Message message(9); args.add(ie.what()); message.format(args); result.result = CREATE_ERROR; result.message = message; return result; } } catch (std::exception& ex) //error out { //release transaction fSessionManager.rolledback(txnID); // Return the error for display to user Message::Args args; Message message(9); args.add(ex.what()); message.format(args); result.result = CREATE_ERROR; result.message = message; return result; } catch (...) //error out { //release transaction fSessionManager.rolledback(txnID); // Return the error for display to user Message::Args args; Message message(9); args.add("Unknown exception caught when checking if the table name is already in use."); message.format(args); result.result = CREATE_ERROR; result.message = message; return result; } //This is a current db bug, it should not turn OID is it cannot find if (roPair.objnum >= 3000) { #ifdef _MSC_VER //FIXME: Why do we need to do this??? systemCatalogPtr->flushCache(); try { roPair = systemCatalogPtr->tableRID(tableName); } catch (...) { roPair.objnum = 0; } if (roPair.objnum < 3000) goto keepGoing; #endif Message::Args args; Message message(9); args.add("Internal create table error for"); args.add(tableName.toString()); args.add(": table already exists"); args.add("(your schema is probably out-of-sync)"); message.format(args); result.result = CREATE_ERROR; result.message = message; //release the transaction fSessionManager.rolledback(txnID); return result; } #ifdef _MSC_VER keepGoing: #endif // Start a new transaction VERBOSE_INFO("Starting a new transaction"); string stmt = createTableStmt.fSql + "|" + tableDef.fQualifiedName->fSchema +"|"; SQLLogger logger(stmt, fDDLLoggingId, createTableStmt.fSessionID, txnID.id); std::string err; execplan::ObjectIDManager fObjectIDManager; OamCache * oamcache = OamCache::makeOamCache(); string errorMsg; //get a unique number uint64_t uniqueId = 0; //Bug 5070. Added exception handling try { uniqueId = fDbrm->getUnique64(); } catch (std::exception& ex) { Message::Args args; Message message(9); args.add(ex.what()); message.format(args); result.result = CREATE_ERROR; result.message = message; fSessionManager.rolledback(txnID); return result; } catch ( ... ) { Message::Args args; Message message(9); args.add("Unknown error occured while getting unique number."); message.format(args); result.result = CREATE_ERROR; result.message = message; fSessionManager.rolledback(txnID); return result; } fWEClient->addQueue(uniqueId); try { //Allocate tableoid table identification VERBOSE_INFO("Allocating object ID for table"); // Allocate a object ID for each column we are about to create VERBOSE_INFO("Allocating object IDs for columns"); uint32_t numColumns = tableDef.fColumns.size(); uint32_t numDictCols = 0; for (unsigned i=0; i < numColumns; i++) { int dataType; dataType = convertDataType(tableDef.fColumns[i]->fType->fType); if ( (dataType == CalpontSystemCatalog::CHAR && tableDef.fColumns[i]->fType->fLength > 8) || (dataType == CalpontSystemCatalog::VARCHAR && tableDef.fColumns[i]->fType->fLength > 7) || (dataType == CalpontSystemCatalog::VARBINARY && tableDef.fColumns[i]->fType->fLength > 7) ) numDictCols++; } fStartingColOID = fObjectIDManager.allocOIDs(numColumns+numDictCols+1); //include column, oids,dictionary oids and tableoid #ifdef IDB_DDL_DEBUG cout << "Create table allocOIDs got the stating oid " << fStartingColOID << endl; #endif if (fStartingColOID < 0) { result.result = CREATE_ERROR; errorMsg = "Error in getting objectid from oidmanager."; Message::Args args; Message message(9); args.add("Create table failed due to "); args.add(errorMsg); message.format(args); result.message = message; fSessionManager.rolledback(txnID); return result; } // Write the table metadata to the systemtable VERBOSE_INFO("Writing meta data to SYSTABLE"); ByteStream bytestream; bytestream << (ByteStream::byte)WE_SVR_WRITE_SYSTABLE; bytestream << uniqueId; bytestream << (uint32_t) createTableStmt.fSessionID; bytestream << (uint32_t)txnID.id; bytestream << (uint32_t)fStartingColOID; bytestream << (uint32_t)createTableStmt.fTableWithAutoi; uint16_t dbRoot; BRM::OID_t sysOid = 1001; //Find out where systable is rc = fDbrm->getSysCatDBRoot(sysOid, dbRoot); if (rc != 0) { result.result =(ResultCode) rc; Message::Args args; Message message(9); args.add("Error while calling getSysCatDBRoot "); args.add(errorMsg); message.format(args); result.message = message; //release transaction fSessionManager.rolledback(txnID); return result; } int pmNum = 1; bytestream << (uint32_t)dbRoot; tableDef.serialize(bytestream); boost::shared_ptr<messageqcpp::ByteStream> bsIn; boost::shared_ptr<std::map<int, int> > dbRootPMMap = oamcache->getDBRootToPMMap(); pmNum = (*dbRootPMMap)[dbRoot]; try { fWEClient->write(bytestream, (unsigned)pmNum); #ifdef IDB_DDL_DEBUG cout << "create table sending We_SVR_WRITE_SYSTABLE to pm " << pmNum << endl; #endif while (1) { bsIn.reset(new ByteStream()); fWEClient->read(uniqueId, bsIn); if ( bsIn->length() == 0 ) //read error { rc = NETWORK_ERROR; errorMsg = "Lost connection to Write Engine Server while updating SYSTABLES"; break; } else { *bsIn >> rc; if (rc != 0) { errorMsg.clear(); *bsIn >> errorMsg; #ifdef IDB_DDL_DEBUG cout << "Create table We_SVR_WRITE_CREATETABLEFILES: " << errorMsg << endl; #endif } break; } } } catch (runtime_error& ex) //write error { #ifdef IDB_DDL_DEBUG cout << "create table got exception" << ex.what() << endl; #endif rc = NETWORK_ERROR; errorMsg = ex.what(); } catch (...) { rc = NETWORK_ERROR; #ifdef IDB_DDL_DEBUG cout << "create table got unknown exception" << endl; #endif } if (rc != 0) { result.result =(ResultCode) rc; Message::Args args; Message message(9); args.add("Create table failed due to "); args.add(errorMsg); message.format( args ); result.message = message; if (rc != NETWORK_ERROR) { rollBackTransaction( uniqueId, txnID, createTableStmt.fSessionID ); //What to do with the error code } //release transaction fSessionManager.rolledback(txnID); return result; } VERBOSE_INFO("Writing meta data to SYSCOLUMN"); bytestream.restart(); bytestream << (ByteStream::byte)WE_SVR_WRITE_CREATE_SYSCOLUMN; bytestream << uniqueId; bytestream << (uint32_t) createTableStmt.fSessionID; bytestream << (uint32_t)txnID.id; bytestream << numColumns; for (unsigned i = 0; i <numColumns; ++i) { bytestream << (uint32_t)(fStartingColOID+i+1); } bytestream << numDictCols; for (unsigned i = 0; i <numDictCols; ++i) { bytestream << (uint32_t)(fStartingColOID+numColumns+i+1); } uint8_t alterFlag = 0; int colPos = 0; bytestream << (ByteStream::byte)alterFlag; bytestream << (uint32_t)colPos; sysOid = 1021; //Find out where syscolumn is rc = fDbrm->getSysCatDBRoot(sysOid, dbRoot); if (rc != 0) { result.result =(ResultCode) rc; Message::Args args; Message message(9); args.add("Error while calling getSysCatDBRoot "); args.add(errorMsg); message.format(args); result.message = message; //release transaction fSessionManager.rolledback(txnID); return result; } bytestream << (uint32_t)dbRoot; tableDef.serialize(bytestream); pmNum = (*dbRootPMMap)[dbRoot]; try { fWEClient->write(bytestream, (uint32_t)pmNum); #ifdef IDB_DDL_DEBUG cout << "create table sending We_SVR_WRITE_SYSTABLE to pm " << pmNum << endl; #endif while (1) { bsIn.reset(new ByteStream()); fWEClient->read(uniqueId, bsIn); if ( bsIn->length() == 0 ) //read error { rc = NETWORK_ERROR; errorMsg = "Lost connection to Write Engine Server while updating SYSTABLES"; break; } else { *bsIn >> rc; if (rc != 0) { errorMsg.clear(); *bsIn >> errorMsg; #ifdef IDB_DDL_DEBUG cout << "Create table We_SVR_WRITE_CREATETABLEFILES: " << errorMsg << endl; #endif } break; } } } catch (runtime_error& ex) //write error { #ifdef IDB_DDL_DEBUG cout << "create table got exception" << ex.what() << endl; #endif rc = NETWORK_ERROR; errorMsg = ex.what(); } catch (...) { rc = NETWORK_ERROR; #ifdef IDB_DDL_DEBUG cout << "create table got unknown exception" << endl; #endif } if (rc != 0) { result.result =(ResultCode) rc; Message::Args args; Message message(9); args.add("Create table failed due to "); args.add(errorMsg); message.format( args ); result.message = message; if (rc != NETWORK_ERROR) { rollBackTransaction( uniqueId, txnID, createTableStmt.fSessionID ); //What to do with the error code } //release transaction fSessionManager.rolledback(txnID); return result; } //Get the number of tables in the database, the current table is included. int tableCount = systemCatalogPtr->getTableCount(); //Calculate which dbroot the columns should start DBRootConfigList dbRootList = oamcache->getDBRootNums(); uint16_t useDBRootIndex = tableCount % dbRootList.size(); //Find out the dbroot# corresponding the useDBRootIndex from oam uint16_t useDBRoot = dbRootList[useDBRootIndex]; VERBOSE_INFO("Creating column files"); ColumnDef* colDefPtr; ddlpackage::ColumnDefList tableDefCols = tableDef.fColumns; ColumnDefList::const_iterator iter = tableDefCols.begin(); bytestream.restart(); bytestream << (ByteStream::byte)WE_SVR_WRITE_CREATETABLEFILES; bytestream << uniqueId; bytestream << (uint32_t)txnID.id; bytestream << (numColumns + numDictCols); unsigned colNum = 0; unsigned dictNum = 0; while (iter != tableDefCols.end()) { colDefPtr = *iter; CalpontSystemCatalog::ColDataType dataType = convertDataType(colDefPtr->fType->fType); if (dataType == CalpontSystemCatalog::DECIMAL || dataType == CalpontSystemCatalog::UDECIMAL) { if (colDefPtr->fType->fPrecision == -1 || colDefPtr->fType->fPrecision == 0) { colDefPtr->fType->fLength = 8; } else if ((colDefPtr->fType->fPrecision > 0) && (colDefPtr->fType->fPrecision < 3)) { colDefPtr->fType->fLength = 1; } else if (colDefPtr->fType->fPrecision < 5 && (colDefPtr->fType->fPrecision > 2)) { colDefPtr->fType->fLength = 2; } else if (colDefPtr->fType->fPrecision > 4 && colDefPtr->fType->fPrecision < 10) { colDefPtr->fType->fLength = 4; } else if (colDefPtr->fType->fPrecision > 9 && colDefPtr->fType->fPrecision < 19) { colDefPtr->fType->fLength = 8; } } bytestream << (fStartingColOID + (colNum++) + 1); bytestream << (uint8_t) dataType; bytestream << (uint8_t) false; bytestream << (uint32_t) colDefPtr->fType->fLength; bytestream << (uint16_t) useDBRoot; bytestream << (uint32_t) colDefPtr->fType->fCompressiontype; if ( (dataType == CalpontSystemCatalog::CHAR && colDefPtr->fType->fLength > 8) || (dataType == CalpontSystemCatalog::VARCHAR && colDefPtr->fType->fLength > 7) || (dataType == CalpontSystemCatalog::VARBINARY && colDefPtr->fType->fLength > 7) ) { bytestream << (uint32_t) (fStartingColOID+numColumns+(dictNum++)+1); bytestream << (uint8_t) dataType; bytestream << (uint8_t) true; bytestream << (uint32_t) colDefPtr->fType->fLength; bytestream << (uint16_t) useDBRoot; bytestream << (uint32_t) colDefPtr->fType->fCompressiontype; } ++iter; } //@Bug 4176. save oids to a log file for cleanup after fail over. std::vector <CalpontSystemCatalog::OID> oidList; for (unsigned i = 0; i <numColumns; ++i) { oidList.push_back(fStartingColOID+i+1); } bytestream << numDictCols; for (unsigned i = 0; i <numDictCols; ++i) { oidList.push_back(fStartingColOID+numColumns+i+1); } try { createWriteDropLogFile( fStartingColOID, uniqueId, oidList ); } catch (std::exception& ex) { result.result =(ResultCode) rc; Message::Args args; Message message(9); args.add("Create table failed due to "); args.add(ex.what()); message.format( args ); result.message = message; if (rc != NETWORK_ERROR) { rollBackTransaction( uniqueId, txnID, createTableStmt.fSessionID ); //What to do with the error code } //release transaction fSessionManager.rolledback(txnID); return result; } pmNum = (*dbRootPMMap)[useDBRoot]; try { fWEClient->write(bytestream, pmNum); while (1) { bsIn.reset(new ByteStream()); fWEClient->read(uniqueId, bsIn); if ( bsIn->length() == 0 ) //read error { rc = NETWORK_ERROR; errorMsg = "Lost connection to Write Engine Server while updating SYSTABLES"; break; } else { *bsIn >> rc; if (rc != 0) { errorMsg.clear(); *bsIn >> errorMsg; #ifdef IDB_DDL_DEBUG cout << "Create table We_SVR_WRITE_CREATETABLEFILES: " << errorMsg << endl; #endif } break; } } if (rc != 0) { //drop the newly created files bytestream.restart(); bytestream << (ByteStream::byte) WE_SVR_WRITE_DROPFILES; bytestream << uniqueId; bytestream << (uint32_t)(numColumns+numDictCols); for (unsigned i = 0; i < (numColumns+numDictCols); i++) { bytestream << (uint32_t)(fStartingColOID + i + 1); } fWEClient->write(bytestream, pmNum); while (1) { bsIn.reset(new ByteStream()); fWEClient->read(uniqueId, bsIn); if ( bsIn->length() == 0 ) //read error { break; } else { break; } } //@Bug 5464. Delete from extent map. fDbrm->deleteOIDs(oidList); } } catch (runtime_error&) { errorMsg = "Lost connection to Write Engine Server"; } if (rc != 0) { rollBackTransaction( uniqueId, txnID, createTableStmt.fSessionID); //What to do with the error code fSessionManager.rolledback(txnID); } else { commitTransaction(uniqueId, txnID); fSessionManager.committed(txnID); fWEClient->removeQueue(uniqueId); deleteLogFile(DROPTABLE_LOG, fStartingColOID, uniqueId); } // Log the DDL statement. logDDL(createTableStmt.fSessionID, txnID.id, createTableStmt.fSql, createTableStmt.fOwner); }
//------------------------------------------------------------------------------ // This method is intended to test the messages used to profile db performance. // The method also provides an example on how to use these log messages. // Test can be verified by viewing /var/log/erydb/debug.log. // // Message types are: // // 26 Start Transaction // 27 End Transaction // 28 Start Statement // 29 End Statement // 30 Start Step // 31 End Step // 32 I/O Reads // // The messages should be logged in the following way: // // 1. The application should log a StartTransaction message at the beginning // of a database transaction. // 2. When the application begins processing a statement, the StartStatement // message should be logged. // 3. As each primitive step is executed, it's start time should be recorded // by logging a StartStep message. // 4. During the execution of a step, 1 or more I/O Read messages should be // logged to record the I/O block count used in accessing each object. If // necessary, more than 1 message can be logged for the same object and // step. In these cases, the script that post-processes the syslog will // add up the block counts for the same object and step. // 5. Upon completion of each primitive step, an EndStep message should be // logged. If multiple steps are executing in parallel, the EndStep msg // should be logged as each step completes. After all these stpes are // completed and logged, if needed, a new set of StartStep messages can // be logged (for the same statement), for a new set of parallel steps. // 6. Upon completion of each statement, an EndStatement msg should be logged. // 7. Upon completion of each transaction, an EndTransaction msg should // be logged. // // Some possible enhancements to simplify this profile logging for the // application programmer: // // 1. Define enum or const ints in a common header file to represent the // list of valid subSystem ids. // // 2. Add set of helper methods to MessageLog class to reduce the // amount of work for the application. For example a method like: // // void logStartStatement ( int statement, int ver, string SQL ); // // would allow the application to log a StartStatement by simply doing; // // Message msgStartStatement ( M0028 ); // int statementId = 11; // int versionId = 22; // string sql ("SELECT column1, column2 FROM table1 WHERE ... // msgStartStatement.logStartStatement ( statementId, versionId, sql); // // 3. Could also do something similar to #2 except instead of adding helper // methods to MessageLog, we could add specialized classes that derive // from MessageLog or contain a MessageLog. One advantage of doing it // this way is that a derived class like MessageLogStatement "could" be // implemented to log the StartStatement, and its desctructor could then // log the EndStatement automatically, on behalf of the application. // //------------------------------------------------------------------------------ void m5() { int subSystem = 5; // joblist subSystem int session = 100; int transaction = 1; int thread = 0; LoggingID lid1 ( subSystem, session, transaction, thread ); MessageLog msgLog ( lid1 ); Message::Args args; // Log the start time of a transaction Message msgStartTrans ( M0026 ); msgStartTrans.format ( args ); msgLog.logDebugMessage ( msgStartTrans ); // Log the start of execution time for a SQL statement Message msgStartStatement ( M0028 ); int statementId = 11; int versionId = 22; string sql ("SELECT column1, column2 FROM table1 WHERE column1 = 345"); args.reset ( ); args.add ( statementId ); args.add ( versionId ); args.add ( sql ); msgStartStatement.format ( args ); msgLog.logDebugMessage ( msgStartStatement ); const string stepNames[] = { "steponeA", "steptwoA" ,"stepthreeA", "stepfourB","stepfiveB","stepsixB" }; // To process this SQL statement, simulate executing 2 job steps, // with each job step consisting of of 3 parallel primitive steps for (int jobStep=0; jobStep<2; jobStep++) { int primStep1 = jobStep * 3; // Log 3 parallel steps starting to execute for (int i=primStep1; i<(primStep1+3); i++) { Message msgStartStep ( M0030 ); // Start Step int stepId = i+1; string stepName = stepNames[i]; args.reset ( ); args.add ( statementId ); args.add ( stepId ); args.add ( stepName ); msgStartStep.format ( args ); msgLog.logDebugMessage ( msgStartStep ); } // Record I/O block count for 0 or more objects per step; // for this example we just record I/O for 1 object per step. // Then log the completion of each step. for (int i=primStep1; i<(primStep1+3); i++) { Message msgBlockCount ( M0032 ); // I/O block count int stepId = i+1; int objectId = stepId * 20; int phyCount = stepId * 30; int logCount = phyCount + 5; args.reset ( ); args.add ( statementId ); args.add ( stepId ); args.add ( objectId ); args.add ( phyCount ); args.add ( logCount ); msgBlockCount.format ( args ); msgLog.logDebugMessage ( msgBlockCount ); Message msgEndStep ( M0031 ); // End Step args.reset ( ); args.add ( statementId ); args.add ( stepId ); msgEndStep.format ( args ); msgLog.logDebugMessage ( msgEndStep ); } } // Log the completion time of the SQL statement Message msgEndStatement ( M0029 ); args.reset ( ); args.add ( statementId ); msgEndStatement.format ( args ); msgLog.logDebugMessage ( msgEndStatement ); // Log the completion time of the transaction Message msgEndTrans ( M0027 ); args.reset ( ); args.add ( string("COMMIT") ); msgEndTrans.format ( args ); msgLog.logDebugMessage ( msgEndTrans ); Config::deleteInstanceMap(); }
/***************************************************************************************** * @brief main function * * purpose: Parse incoming ALARM statement into calAlarm class * *****************************************************************************************/ int main (int argc, char *argv[]) { if (argc != 2) exit (0); Alarm calAlarm; char buf[100]; char* alarmData; char* token; bool successFlag = false; if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("trapHandler Launched"); msg.format(args); ml.logDebugMessage(msg); } // read alarm data while (cin.getline(buf,100)) { // Alarm data if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("Alarm Data:"); args.add(buf); msg.format(args); ml.logDebugMessage(msg); } // search for CALALARM if ((alarmData = strstr(buf, "CALALARM")) == NULL) continue; successFlag = true; token = strtok (alarmData, DELIM); // alarmData format: CALALARM|alarmID|componentID|1(set)/0(clear)|server|process // alarmID try { token = getNextToken(); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("getNextToken error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } calAlarm.setAlarmID (atoi(token)); // componentID try { token = getNextToken(); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("getNextToken error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } calAlarm.setComponentID (token); // state try { token = getNextToken(); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("getNextToken error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } calAlarm.setState (atoi(token)); // sname try { token = getNextToken(); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("getNextToken error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } calAlarm.setSname (token); // pname try { token = getNextToken(); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("getNextToken error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } calAlarm.setPname (token); // distinguish agent trap and process trap. // agent trap set pid and tid 0. if (strcmp (argv[1], AGENT_TRAP) == 0) { calAlarm.setPid (0); calAlarm.setTid (0); } // process trap continues to get pid from alarm data else { // pid try { token = getNextToken(); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("getNextToken error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } calAlarm.setPid (atoi(token)); // tid try { token = getNextToken(); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("getNextToken error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } calAlarm.setTid (atoi(token)); } if (::DEBUG){ LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("Alarm Info:"); args.add(calAlarm.getAlarmID()); args.add(calAlarm.getComponentID()); args.add(calAlarm.getState()); msg.format(args); ml.logDebugMessage(msg); cout << calAlarm.getAlarmID() << ":" << calAlarm.getComponentID() << ":" << calAlarm.getState() << endl; } // break while loop. ignore the other info carried by // the trap. May need to retrieve more info in the future. break; } // not valid alarm data if no "CALALARM" found if (!successFlag){ LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("Error: not valid alarm data if no 'CALALARM' found"); msg.format(args); ml.logDebugMessage(msg); exit(1); } // Get alarm configuration try { configAlarm (calAlarm); } catch (runtime_error& e) { if (::DEBUG) { LoggingID lid(11); MessageLog ml(lid); Message msg; Message::Args args; args.add("configAlarm error:"); args.add(e.what()); msg.format(args); ml.logDebugMessage(msg); } exit(1); } return 0; }
void m1() { Message::Args args; args.add("hello"); args.add("world"); args.add(123); args.add(1234.55); Message m(100); m.fMsg = "%1% %2% %3% %4%"; m.format(args); //CPPUNIT_ASSERT(m.msg() == "hello world 123 1234.55"); m.reset(); m.fMsg = "%1% %2% %3% %4%"; args.reset(); m.format(args); //CPPUNIT_ASSERT(m.msg() == " "); LoggingID lid(7); MessageLog ml(lid); m.reset(); args.add("hello"); args.add("world"); args.add(123); args.add(1234.55); m.format(args); ml.logDebugMessage(m); args.reset(); args.add("begin CEP generation"); m.reset(); m.format(args); ml.logInfoMessage(m); args.reset(); args.add("end CEP generation"); m.reset(); m.format(args); ml.logInfoMessage(m); args.reset(); args.add("something took too long"); m.reset(); m.format(args); ml.logWarningMessage(m); args.reset(); args.add("something seriously took too long"); m.reset(); m.format(args); ml.logSeriousMessage(m); ml.logErrorMessage(m); args.reset(); args.add("something critical took too long"); m.reset(); m.format(args); ml.logCriticalMessage(m); LoggingID lid1; MessageLog ml1(lid1); args.reset(); m.reset(); args.add("subsystem 0 = erydb test"); m.format(args); ml1.logDebugMessage(m); LoggingID lid2(1000); MessageLog ml2(lid2); args.reset(); m.reset(); args.add("subsystem above MAX = erydb test"); m.format(args); ml2.logDebugMessage(m); LoggingID lid3(7); MessageLog ml3(lid3); args.reset(); m.reset(); args.add("subsystem 7 = eryadmin test"); m.format(args); ml3.logDebugMessage(m); Config::deleteInstanceMap(); }
/***************************************************************************************** * @brief diskMonitor Thread * * purpose: Get current Local and External disk usage and report alarms * *****************************************************************************************/ void diskMonitor() { ServerMonitor serverMonitor; Oam oam; SystemConfig systemConfig; ModuleTypeConfig moduleTypeConfig; typedef std::vector<std::string> LocalFileSystems; LocalFileSystems lfs; struct statvfs buf; // set defaults int localDiskCritical = 90, localDiskMajor = 80, localDiskMinor = 70, ExternalDiskCritical = 90, ExternalDiskMajor = 80, ExternalDiskMinor = 70; // get module types string moduleType; int moduleID=-1; string moduleName; oamModuleInfo_t t; try { t = oam.getModuleInfo(); moduleType = boost::get<1>(t); moduleID = boost::get<2>(t); moduleName = boost::get<0>(t); } catch (exception& e) {} bool Externalflag = false; //check for external disk DBrootList dbrootList; if (moduleType == "pm") { systemStorageInfo_t t; t = oam.getStorageConfig(); if ( boost::get<0>(t) == "external") Externalflag = true; // get dbroot list and storage type from config file DBRootConfigList dbrootConfigList; oam.getPmDbrootConfig(moduleID, dbrootConfigList); DBRootConfigList::iterator pt = dbrootConfigList.begin(); for( ; pt != dbrootConfigList.end() ; pt++) { int dbrootID = *pt; string dbroot = "DBRoot" + oam.itoa(dbrootID); string dbootdir; try{ oam.getSystemConfig(dbroot, dbootdir); } catch(...) {} if ( dbootdir.empty() || dbootdir == "" ) continue; DBrootData dbrootData; dbrootData.dbrootDir = dbootdir; dbrootData.downFlag = false; dbrootList.push_back(dbrootData); } } string cloud = oam::UnassignedName; try { oam.getSystemConfig( "Cloud", cloud); } catch(...) { cloud = oam::UnassignedName; } //get Gluster Config setting string GlusterConfig = "n"; try { oam.getSystemConfig( "GlusterConfig", GlusterConfig); } catch(...) { GlusterConfig = "n"; } int diskSpaceCheck = 0; while(true) { SystemStatus systemstatus; try { oam.getSystemStatus(systemstatus); } catch (exception& ex) {} if (systemstatus.SystemOpState != oam::ACTIVE ) { sleep(5); continue; } // Get Local/External Disk Mount points to monitor and associated thresholds try { oam.getSystemConfig (moduleTypeConfig); localDiskCritical = moduleTypeConfig.ModuleDiskCriticalThreshold; localDiskMajor = moduleTypeConfig.ModuleDiskMajorThreshold; localDiskMinor = moduleTypeConfig.ModuleDiskMinorThreshold; DiskMonitorFileSystems::iterator p = moduleTypeConfig.FileSystems.begin(); for( ; p != moduleTypeConfig.FileSystems.end() ; p++) { string fs = *p; lfs.push_back(fs); if (DISK_DEBUG) { //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Local Config File System to monitor ="); args.add(fs); msg.format(args); ml.logDebugMessage(msg); } } } catch (...) { sleep(5); continue; } // get External info try { oam.getSystemConfig(systemConfig); } catch (...) { sleep(5); continue; } if (Externalflag) { // get External info try { ExternalDiskCritical = systemConfig.ExternalCriticalThreshold; ExternalDiskMajor = systemConfig.ExternalMajorThreshold; ExternalDiskMinor = systemConfig.ExternalMinorThreshold; } catch (...) { sleep(5); continue; } } //check for local file systems LocalFileSystems::iterator p = lfs.begin(); while(p != lfs.end()) { string deviceName = *p; ++p; string fileName; // check local if ( deviceName == "/") { fileName = deviceName + "usr/local/Calpont/releasenum"; } else { fileName = deviceName + "/000.dir"; } uint64_t totalBlocks; uint64_t usedBlocks; if (!statvfs(fileName.c_str(), &buf)) { uint64_t blksize, blocks, freeblks, free; blksize = buf.f_bsize; blocks = buf.f_blocks; freeblks = buf.f_bfree; totalBlocks = blocks * blksize; free = freeblks * blksize; usedBlocks = totalBlocks - free; } else continue; int64_t diskUsage = 0; if ( totalBlocks == 0 ) { diskUsage = 0; //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Total Disk Usage is set to 0"); msg.format(args); ml.logWarningMessage(msg); } else diskUsage = (usedBlocks / (totalBlocks / 100)) + 1; SMSystemDisk sd; sd.deviceName = deviceName; sd.usedPercent = diskUsage; sd.totalBlocks = totalBlocks; sd.usedBlocks = usedBlocks; sdl.push_back(sd); if (DISK_DEBUG) cout << "Disk Usage for " << deviceName << " is " << diskUsage << endl; if ( diskSpaceCheck == 0 ) { if (diskUsage >= localDiskCritical && localDiskCritical > 0 ) { //adjust if over 100% if ( diskUsage > 100 ) diskUsage = 100; if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_HIGH, SET, (int) diskUsage) ) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Local Disk above Critical Disk threshold with a percentage of "); args.add((int) diskUsage); msg.format(args); ml.logInfoMessage(msg); } } else if (diskUsage >= localDiskMajor && localDiskMajor > 0 ) { if (serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_MED, SET, (int) diskUsage)) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Local Disk above Major Disk threshold with a percentage of "); args.add((int) diskUsage); msg.format(args); ml.logInfoMessage(msg); } } else if (diskUsage >= localDiskMinor && localDiskMinor > 0 ) { if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_LOW, SET, (int) diskUsage)) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Local Disk above Minor Disk threshold with a percentage of "); args.add((int) diskUsage); msg.format(args); ml.logInfoMessage(msg); } } else serverMonitor.checkDiskAlarm(deviceName); } //check for external file systems/devices if (Externalflag || (!Externalflag && GlusterConfig == "y" && moduleType == "pm") ){ try { DBRootConfigList dbrootConfigList; oam.getPmDbrootConfig(moduleID, dbrootConfigList); DBRootConfigList::iterator pt = dbrootConfigList.begin(); for( ; pt != dbrootConfigList.end() ; pt++) { int dbroot = *pt; string deviceName = systemConfig.DBRoot[dbroot-1]; string fileName = deviceName + "/000.dir"; if (DISK_DEBUG) { //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("DBRoots monitoring"); args.add(dbroot); args.add(" ,file system =" ); args.add(fileName); msg.format(args); ml.logDebugMessage(msg); } uint64_t totalBlocks; uint64_t usedBlocks; if (!statvfs(fileName.c_str(), &buf)) { uint64_t blksize, blocks, freeblks, free; blksize = buf.f_bsize; blocks = buf.f_blocks; freeblks = buf.f_bfree; totalBlocks = blocks * blksize; free = freeblks * blksize; usedBlocks = totalBlocks - free; } else { SMSystemDisk sd; sd.deviceName = deviceName; sd.usedPercent = 0; sd.totalBlocks = 0; sd.usedBlocks = 0; sdl.push_back(sd); continue; } int diskUsage = 0; if ( totalBlocks == 0 ) { diskUsage = 0; //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Total Disk Usage is set to 0"); msg.format(args); ml.logWarningMessage(msg); } else diskUsage = (usedBlocks / (totalBlocks / 100)) + 1; SMSystemDisk sd; sd.deviceName = deviceName; sd.usedPercent = diskUsage; sd.totalBlocks = totalBlocks; sd.usedBlocks = usedBlocks; sdl.push_back(sd); if (DISK_DEBUG) cout << "Disk Usage for " << deviceName << " is " << diskUsage << endl; if (diskUsage >= ExternalDiskCritical && ExternalDiskCritical > 0 ) { //adjust if over 100% if ( diskUsage > 100 ) diskUsage = 100; if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_HIGH, SET, diskUsage)) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Disk usage for"); args.add(deviceName); args.add(" above Critical Disk threshold with a percentage of "); args.add((int) diskUsage); msg.format(args); ml.logInfoMessage(msg); } } else if (diskUsage >= ExternalDiskMajor && ExternalDiskMajor > 0 ) { if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_MED, SET, diskUsage)) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Disk usage for"); args.add(deviceName); args.add(" above Major Disk threshold with a percentage of "); args.add((int) diskUsage); msg.format(args); ml.logInfoMessage(msg); } } else if (diskUsage >= ExternalDiskMinor && ExternalDiskMinor > 0 ) { if ( serverMonitor.sendResourceAlarm(deviceName, DISK_USAGE_LOW, SET, diskUsage)) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Disk usage for"); args.add(deviceName); args.add(" above Minor Disk threshold with a percentage of "); args.add((int) diskUsage); msg.format(args); ml.logInfoMessage(msg); } } else serverMonitor.checkDiskAlarm(deviceName); } } catch (exception& e) { cout << endl << "**** getPmDbrootConfig Failed : " << e.what() << endl; } } } //check OAM dbroot test flag to validate dbroot exist if on pm if ( moduleName.find("pm") != string::npos ) { //check OAM dbroot test flag to validate dbroot exist if ( dbrootList.size() != 0 ) { DBrootList::iterator p = dbrootList.begin(); while ( p != dbrootList.end() ) { //get dbroot directory string dbrootDir = (*p).dbrootDir; string dbrootName; string dbrootID; //get dbroot name string::size_type pos = dbrootDir.rfind("/",80); if (pos != string::npos) dbrootName = dbrootDir.substr(pos+1,80); //get ID dbrootID = dbrootName.substr(4,80); string fileName = dbrootDir + "/OAMdbrootCheck"; // retry in case we hit the remount window for ( int retry = 0 ; ; retry++ ) { bool fail = false; //first test, check if OAMdbrootCheck exists ifstream file (fileName.c_str()); if (!file) fail = true; else { //second test for amazon, check volume status if ( cloud != oam::UnassignedName ) { string volumeNameID = "PMVolumeName" + dbrootID; string volumeName = oam::UnassignedName; try { oam.getSystemConfig( volumeNameID, volumeName); } catch(...) {} if ( volumeName.empty() || volumeName == oam::UnassignedName ) fail = false; else { string status = oam.getEC2VolumeStatus(volumeName); if ( status == "attached" ) fail = false; else { fail = true; LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("dbroot monitoring: Volume not attached"); args.add(volumeName); args.add("/"); args.add(dbrootName); msg.format(args); ml.logCriticalMessage(msg); } } } else fail = false; } if (fail) { //double check system status before reporting any error BUG 5078 SystemStatus systemstatus; try { oam.getSystemStatus(systemstatus); } catch (exception& ex) {} if (systemstatus.SystemOpState != oam::ACTIVE ) { break; } if ( retry < 10 ) { sleep(3); continue; } else { if ( !(*p).downFlag ) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("dbroot monitoring: Lost access to "); args.add(dbrootDir); msg.format(args); ml.logCriticalMessage(msg); oam.sendDeviceNotification(dbrootName, DBROOT_DOWN, moduleName); (*p).downFlag = true; try{ oam.setDbrootStatus(dbrootID, oam::AUTO_OFFLINE); } catch (exception& ex) {} break; } } } else { if ( (*p).downFlag ) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("dbroot monitoring: Access back to "); args.add(dbrootDir); msg.format(args); ml.logInfoMessage(msg); oam.sendDeviceNotification(dbrootName, DBROOT_UP, moduleName); (*p).downFlag = false; try{ oam.setDbrootStatus(dbrootID, oam::ACTIVE); } catch (exception& ex) {} } file.close(); break; } } p++; } } } //do Gluster status check, if configured if ( GlusterConfig == "y") { bool pass = true; string errmsg = "unknown"; try { string arg1 = ""; string arg2 = ""; int ret = oam.glusterctl(oam::GLUSTER_STATUS, arg1, arg2, errmsg); if ( ret != 0 ) { cerr << "FAILURE: Status check error: " + errmsg << endl; pass = false; } } catch (exception& e) { cerr << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Status check error" << endl; pass = false; } catch (...) { cerr << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Status check error" << endl; pass = false; } if ( !pass ) { // issue log and alarm LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Gluster Status check failure error msg: "); args.add(errmsg); msg.format(args); ml.logWarningMessage(msg); serverMonitor.sendResourceAlarm(errmsg, GLUSTER_DISK_FAILURE, SET, 0); } } // sleep 10 seconds sleep(MONITOR_PERIOD/6); //check disk space every 10 minutes diskSpaceCheck++; if ( diskSpaceCheck >= 60 ) diskSpaceCheck = 0; lfs.clear(); sdl.clear(); } // end of while loop }
void hardwareMonitor(int IPMI_SUPPORT) { ServerMonitor serverMonitor; string data[10]; string SensorName; float SensorValue; string Units; string SensorStatus; float lowFatal; float lowCritical; float lowWarning; float highWarning; float highCritical; float highFatal; char *p; if( IPMI_SUPPORT == 0) { int returnCode = system("ipmitool sensor list > /tmp/harwareMonitor.txt"); if (returnCode) { // System error, Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Error running ipmitool sensor list!!!"); msg.format(args); ml.logWarningMessage(msg); while(TRUE) sleep(10000); } } else { while(TRUE) sleep(10000); } // register for Heartbeat monitoring /* try { ProcHeartbeat procheartbeat; procheartbeat.registerHeartbeat(HW_HEARTBEAT_ID); } catch (exception& ex) { string error = ex.what(); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on registerHeartbeat: "); args.add(error); msg.format(args); ml.logErrorMessage(msg); } catch(...) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); msg.format(args); ml.logErrorMessage(msg); } */ // loop forever reading the hardware status while(TRUE) { // parse output file ifstream File ("/tmp/harwareMonitor.txt"); if (!File){ // System error, Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Error opening /tmp/harwareMonitor.txt!!!"); msg.format(args); ml.logWarningMessage(msg); sleep(300); continue; } char line[200]; while (File.getline(line, 200)) { // parse the line int f = 0; p = strtok(line,"|"); while (p) { data[f]=p; data[f] = serverMonitor.StripWhitespace(data[f]); p = strtok (NULL, "|"); f++; } if( f == 0 ) // nothing on this line, skip continue; SensorName = data[0]; SensorValue = atof(data[1].c_str()); Units = data[2]; SensorStatus = data[3]; lowFatal = atof(data[4].c_str()); lowCritical = atof(data[5].c_str()); lowWarning = atof(data[6].c_str()); highWarning = atof(data[7].c_str()); highCritical = atof(data[8].c_str()); highFatal = atof(data[9].c_str()); // check status and issue apporiate alarm if needed if ( (SensorStatus != "ok") && (SensorStatus != "nr") && (SensorStatus != "na") ) { // Status error, check for warning or critical levels if ( SensorValue >= highFatal ) { // issue critical alarm and send message to shutdown Server serverMonitor.sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); serverMonitor.sendMsgShutdownServer(); } else if ( (SensorValue < highFatal) && (SensorValue >= highCritical) ) // issue major alarm serverMonitor.sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); else if ( (SensorValue < highCritical ) && (SensorValue >= highWarning) ) // issue minor alarm serverMonitor.sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); else if ( (SensorValue <= lowWarning) && (SensorValue > lowCritical) ) // issue minor alarm serverMonitor.sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); else if ( (SensorValue <= lowCritical) && (SensorValue > lowFatal) ) // issue major alarm serverMonitor.sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); else if ( SensorValue <= lowFatal ) { // issue critical alarm and send message to shutdown Server serverMonitor.sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); serverMonitor.sendMsgShutdownServer(); } else // check if there are any active alarms that needs to be cleared serverMonitor.checkAlarm(SensorName); } else // check if there are any active alarms that needs to be cleared serverMonitor.checkAlarm(SensorName); } //end of parsing file while File.close(); // send heartbeat message /* try { ProcHeartbeat procheartbeat; procheartbeat.sendHeartbeat(HW_HEARTBEAT_ID); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Sent Heartbeat Msg"); msg.format(args); ml.logDebugMessage(msg); } catch (exception& ex) { string error = ex.what(); if ( error.find("Disabled") == string::npos ) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on sendHeartbeat: "); args.add(error); msg.format(args); ml.logErrorMessage(msg); } } catch(...) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); msg.format(args); ml.logErrorMessage(msg); } */ // sleep sleep(MONITOR_PERIOD); } //end of forever while loop }
/************************************************************************************************************ * @brief main function * * purpose: Get current hardware status and report alarms * * Parses file generated by the ipmitool * * pattern = what it is | value | units | status | value 1 | value 2 | value 3 | value 4 | value 5 | value 6 * data(0) = what it is * data(1) = value * data(2) = units * data(3) = status * data(4)-data(9) = barrier values * data(4) - low non-recoverable, i.e. fatal * data(5) - low critical * data(6) - low warning * data(7) - high warning * data(8) - high critical * data(9) - high non-recoverable, i.e. fatal * ************************************************************************************************************/ int main (int argc, char** argv) { string data[10]; string SensorName; float SensorValue; string Units; string SensorStatus; float lowFatal; float lowCritical; float lowWarning; float highWarning; float highCritical; float highFatal; char *p; // check for IPMI_SUPPORT FLAG passed in if(argc > 1) IPMI_SUPPORT = atoi(argv[1]); // loop forever reading the hardware status while(true) { if( IPMI_SUPPORT == 0) { int returnCode = system("ipmitool sensor list > /tmp/harwareMonitor.txt"); if (returnCode) { // System error, Log this event LoggingID lid; MessageLog ml(lid); Message msg; Message::Args args; args.add("Error running ipmitool sensor list!!!"); msg.format(args); ml.logWarningMessage(msg); sleep(300); continue; } } // parse output file ifstream File ("/tmp/harwareMonitor.txt"); if (!File){ // System error, Log this event LoggingID lid; MessageLog ml(lid); Message msg; Message::Args args; args.add("Error opening /tmp/harwareMonitor.txt!!!"); msg.format(args); ml.logWarningMessage(msg); sleep(300); continue; } char line[200]; while (File.getline(line, 200)) { // parse the line int f = 0; p = strtok(line,"|"); while (p) { data[f]=p; data[f] = StripWhitespace(data[f]); p = strtok (NULL, "|"); f++; } if( f == 0 ) // nothing on this line, skip continue; SensorName = data[0]; SensorValue = atof(data[1].c_str()); Units = data[2]; SensorStatus = data[3]; lowFatal = atof(data[4].c_str()); lowCritical = atof(data[5].c_str()); lowWarning = atof(data[6].c_str()); highWarning = atof(data[7].c_str()); highCritical = atof(data[8].c_str()); highFatal = atof(data[9].c_str()); // check status and issue apporiate alarm if needed if ( (SensorStatus != "ok") && (SensorStatus != "nr") && (SensorStatus != "na") ) { // Status error, check for warning or critical levels if ( SensorValue >= highFatal ) { // issue critical alarm and send message to shutdown Server sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); sendMsgShutdownServer(); } else if ( (SensorValue < highFatal) && (SensorValue >= highCritical) ) // issue major alarm sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); else if ( (SensorValue < highCritical ) && (SensorValue >= highWarning) ) // issue minor alarm sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); else if ( (SensorValue <= lowWarning) && (SensorValue > lowCritical) ) // issue minor alarm sendAlarm(SensorName, HARDWARE_LOW, SET, SensorValue); else if ( (SensorValue <= lowCritical) && (SensorValue > lowFatal) ) // issue major alarm sendAlarm(SensorName, HARDWARE_MED, SET, SensorValue); else if ( SensorValue <= lowFatal ) { // issue critical alarm and send message to shutdown Server sendAlarm(SensorName, HARDWARE_HIGH, SET, SensorValue); sendMsgShutdownServer(); } else // check if there are any active alarms that needs to be cleared checkAlarm(SensorName); } else // check if there are any active alarms that needs to be cleared checkAlarm(SensorName); } //end of parsing file while File.close(); // sleep for 1 minute sleep(60); } //end of forever while loop }
void procmonMonitor() { ServerMonitor serverMonitor; Oam oam; //wait before monitoring is started sleep(60); // get current server name string moduleName; oamModuleInfo_t st; try { st = oam.getModuleInfo(); moduleName = boost::get<0>(st); } catch (...) { // Critical error, Log this event and exit LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Failed to read local module Info"); msg.format(args); ml.logCriticalMessage(msg); exit(-1); } string msgPort = moduleName + "_ProcessMonitor"; int heartbeatCount = 0; // loop forever monitoring Local Process Monitor while(true) { ByteStream msg; ByteStream::byte requestID = LOCALHEARTBEAT; msg << requestID; try { MessageQueueClient mqRequest(msgPort); mqRequest.write(msg); // wait 10 seconds for response ByteStream::byte returnACK; ByteStream::byte returnRequestID; ByteStream::byte requestStatus; ByteStream receivedMSG; struct timespec ts = { 10, 0 }; try { receivedMSG = mqRequest.read(&ts); if (receivedMSG.length() > 0) { receivedMSG >> returnACK; receivedMSG >> returnRequestID; receivedMSG >> requestStatus; if ( returnACK == oam::ACK && returnRequestID == requestID) { // ACK for this request heartbeatCount = 0; } } else { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: ProcMon Msg timeout!!!"); msg.format(args); ml.logWarningMessage(msg); heartbeatCount++; if ( heartbeatCount > 2 ) { //Process Monitor not responding, restart it system("pkill ProcMon"); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: Restarting ProcMon"); msg.format(args); ml.logWarningMessage(msg); sleep(60); heartbeatCount = 0; } } mqRequest.shutdown(); } catch (SocketClosed &ex) { string error = ex.what(); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: " + error); msg.format(args); ml.logErrorMessage(msg); } catch (...) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("procmonMonitor: EXCEPTION ERROR on mqRequest.read: Caught unknown exception"); msg.format(args); ml.logErrorMessage(msg); } } catch (exception& ex)
/***************************************************************************************** * @brief cpuMonitor Thread * * purpose: Get current CPU usage, average over 5 readings and report alarms * *****************************************************************************************/ void cpuMonitor() { ServerMonitor serverMonitor; // register for Heartbeat monitoring /* try { ProcHeartbeat procheartbeat; procheartbeat.registerHeartbeat(CPU_HEARTBEAT_ID); } catch (exception& ex) { string error = ex.what(); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on registerHeartbeat: "); args.add(error); msg.format(args); ml.logErrorMessage(msg); } catch(...) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); msg.format(args); ml.logErrorMessage(msg); } */ int periodCount = 5; float cpuPeriod[periodCount]; int periodCounter = 0; float averageCpuUsage = 0; currentCpuUsage = 0; // set defaults unsigned int cpuCritical = 0, cpuMajor = 0, cpuMinor = 0, cpuMinorClear = 0; // initial cpu Period table for (int i =0;i < periodCount; i++) { cpuPeriod[i] = 0; } while(true) { // Get CPU usage water mark from server configuration and compare ModuleTypeConfig moduleTypeConfig; Oam oam; try { oam.getSystemConfig(moduleTypeConfig); cpuCritical = moduleTypeConfig.ModuleCPUCriticalThreshold; cpuMajor = moduleTypeConfig.ModuleCPUMajorThreshold; cpuMinor = moduleTypeConfig.ModuleCPUMinorThreshold; cpuMinorClear = moduleTypeConfig.ModuleCPUMinorClearThreshold; } catch (...) { sleep(5); continue; } if (RESOURCE_DEBUG) cout << "critical water: " << moduleTypeConfig.ModuleCPUCriticalThreshold << endl; pthread_mutex_lock(&CPU_LOCK); // // get Process and System CPU usage // serverMonitor.getCPUdata(); // store and get average cpuPeriod[periodCounter] = currentCpuUsage; averageCpuUsage = 0; for (int i =0;i < periodCount; i++) { averageCpuUsage += cpuPeriod[i]; } averageCpuUsage = averageCpuUsage / periodCount; // serverMonitor.logCPUactive(averageCpuUsage); if (CPU_DEBUG) { cout << "Current CPU Usage: " << currentCpuUsage << endl; cout << "Average CPU Usage: " << averageCpuUsage << endl; } if (averageCpuUsage >= cpuCritical && cpuCritical > 0 ) { serverMonitor.sendResourceAlarm("CPU", CPU_USAGE_HIGH, SET, (int) averageCpuUsage); } else if (averageCpuUsage >= cpuMajor && cpuMajor > 0 ) serverMonitor.sendResourceAlarm("CPU", CPU_USAGE_MED, SET, (int) averageCpuUsage); else if (averageCpuUsage >= cpuMinor && cpuMinor > 0 ) serverMonitor.sendResourceAlarm("CPU", CPU_USAGE_LOW, SET, (int) averageCpuUsage); else if (averageCpuUsage >= cpuMinorClear && cpuMinorClear > 0 ) { serverMonitor.checkCPUAlarm("CPU", CPU_USAGE_LOW); //Log this event LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Current CPU usage = "); args.add((int) currentCpuUsage); args.add(", Average CPU usage = "); args.add((int) averageCpuUsage); msg.format(args); ml.logInfoMessage(msg); } else serverMonitor.checkCPUAlarm("CPU"); // // check CPU usage by process // ProcessCPUList::iterator p = pcl.begin(); while(p != pcl.end()) { string processName = (*p).processName; double cpuUsage = (*p).usedPercent; p++; if (CPU_DEBUG) { cout << "Process Name : " << processName << endl; cout << "CPU Usage: " << cpuUsage << endl; } // check if a Calpont Process, if so alarm is over thresholds // if not, just log if over thresholds if (cpuUsage >= cpuCritical && cpuCritical > 0) { /* try { t = oam.getMyProcessStatus(processID); processName = boost::get<1>(t); serverMonitor.sendResourceAlarm(processName, CPU_USAGE_HIGH, SET, (int) cpuUsage); } catch (...) { */ LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Process"); args.add(processName); args.add(" above Critical CPU threshold with a percentage of "); args.add((int) cpuUsage); msg.format(args); ml.logInfoMessage(msg); // } } else if (cpuUsage >= cpuMajor && cpuMajor > 0) { /* try { t = oam.getMyProcessStatus(processID); processName = boost::get<1>(t); serverMonitor.sendResourceAlarm(processName, CPU_USAGE_MED, SET, (int) cpuUsage); } catch (...) { */ LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Process"); args.add(processName); args.add(" above Major CPU threshold with a percentage of "); args.add((int) cpuUsage); msg.format(args); ml.logInfoMessage(msg); // } } else if (cpuUsage >= cpuMinor && cpuMinor > 0) { /* try { t = oam.getMyProcessStatus(processID); processName = boost::get<1>(t); serverMonitor.sendResourceAlarm(processName, CPU_USAGE_LOW, SET, (int) cpuUsage); } catch (...) { */ LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Process"); args.add(processName); args.add(" above Minor CPU threshold with a percentage of "); args.add((int) cpuUsage); msg.format(args); ml.logInfoMessage(msg); // } } /* else if (cpuUsage >= cpuMinorClear) { try { t = oam.getMyProcessStatus(processID); processName = boost::get<1>(t); serverMonitor.checkCPUAlarm(processName, CPU_USAGE_LOW); } catch (...) {} } else serverMonitor.checkCPUAlarm(processName); */ } // send heartbeat message /* try { ProcHeartbeat procheartbeat; procheartbeat.sendHeartbeat(CPU_HEARTBEAT_ID); LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("Sent Heartbeat Msg"); msg.format(args); ml.logInfoMessage(msg); } catch (exception& ex) { string error = ex.what(); if ( error.find("Disabled") == string::npos ) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on sendHeartbeat: "); args.add(error); msg.format(args); ml.logErrorMessage(msg); } } catch(...) { LoggingID lid(SERVER_MONITOR_LOG_ID); MessageLog ml(lid); Message msg; Message::Args args; args.add("EXCEPTION ERROR on sendHeartbeat: Caught unknown exception!"); msg.format(args); ml.logErrorMessage(msg); } */ pthread_mutex_unlock(&CPU_LOCK); // sleep, 5 minutes sleep(MONITOR_PERIOD*5); ++periodCounter; if ( periodCounter >= periodCount ) periodCounter = 0; } // end of while loop }
DropTableProcessor::DDLResult DropTableProcessor::processPackage(ddlpackage::DropTableStatement& dropTableStmt) { SUMMARY_INFO("DropTableProcessor::processPackage"); DDLResult result; result.result = NO_ERROR; std::string err; VERBOSE_INFO(dropTableStmt); // Commit current transaction. // all DDL statements cause an implicit commit VERBOSE_INFO("Getting current txnID"); ByteStream::byte rc = 0; BRM::TxnID txnID; txnID.id= fTxnid.id; txnID.valid= fTxnid.valid; int rc1 = 0; rc1= fDbrm->isReadWrite(); if (rc1 != 0 ) { Message::Args args; Message message(9); args.add("Unable to execute the statement due to DBRM is read only"); message.format(args); result.result = DROP_ERROR; result.message = message; fSessionManager.rolledback(txnID); return result; } string stmt = dropTableStmt.fSql + "|" + dropTableStmt.fTableName->fSchema +"|"; SQLLogger logger(stmt, fDDLLoggingId, dropTableStmt.fSessionID, txnID.id); std::vector <CalpontSystemCatalog::OID> oidList; CalpontSystemCatalog::RIDList tableColRidList; CalpontSystemCatalog::DictOIDList dictOIDList; execplan::CalpontSystemCatalog::ROPair roPair; std::string errorMsg; ByteStream bytestream; uint64_t uniqueId = 0; //Bug 5070. Added exception handling try { uniqueId = fDbrm->getUnique64(); } catch (std::exception& ex) { Message::Args args; Message message(9); args.add(ex.what()); message.format(args); result.result = DROP_ERROR; result.message = message; fSessionManager.rolledback(txnID); return result; } catch ( ... ) { Message::Args args; Message message(9); args.add("Unknown error occured while getting unique number."); message.format(args); result.result = DROP_ERROR; result.message = message; fSessionManager.rolledback(txnID); return result; } fWEClient->addQueue(uniqueId); int pmNum = 1; boost::shared_ptr<messageqcpp::ByteStream> bsIn; uint64_t tableLockId = 0; OamCache* oamcache = OamCache::makeOamCache(); std::vector<int> moduleIds = oamcache->getModuleIds(); try { //check table lock CalpontSystemCatalog *systemCatalogPtr = CalpontSystemCatalog::makeCalpontSystemCatalog(dropTableStmt.fSessionID); systemCatalogPtr->identity(CalpontSystemCatalog::EC); systemCatalogPtr->sessionID(dropTableStmt.fSessionID); CalpontSystemCatalog::TableName tableName; tableName.schema = dropTableStmt.fTableName->fSchema; tableName.table = dropTableStmt.fTableName->fName; roPair = systemCatalogPtr->tableRID( tableName ); u_int32_t processID = ::getpid(); int32_t txnid = txnID.id; int32_t sessionId = dropTableStmt.fSessionID; std::string processName("DDLProc"); int i = 0; std::vector<uint> pms; for (unsigned i=0; i < moduleIds.size(); i++) { pms.push_back((uint)moduleIds[i]); } try { tableLockId = fDbrm->getTableLock(pms, roPair.objnum, &processName, &processID, &sessionId, &txnid, BRM::LOADING ); } catch (std::exception&) { throw std::runtime_error(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); } if ( tableLockId == 0 ) { int waitPeriod = 10; int sleepTime = 100; // sleep 100 milliseconds between checks int numTries = 10; // try 10 times per second waitPeriod = WriteEngine::Config::getWaitPeriod(); numTries = waitPeriod * 10; struct timespec rm_ts; rm_ts.tv_sec = sleepTime/1000; rm_ts.tv_nsec = sleepTime%1000 *1000000; for (; i < numTries; i++) { #ifdef _MSC_VER Sleep(rm_ts.tv_sec * 1000); #else struct timespec abs_ts; do { abs_ts.tv_sec = rm_ts.tv_sec; abs_ts.tv_nsec = rm_ts.tv_nsec; } while(nanosleep(&abs_ts,&rm_ts) < 0); #endif try { processID = ::getpid(); txnid = txnID.id; sessionId = dropTableStmt.fSessionID;; processName = "DDLProc"; tableLockId = fDbrm->getTableLock(pms, roPair.objnum, &processName, &processID, &sessionId, &txnid, BRM::LOADING ); } catch (std::exception&) { throw std::runtime_error(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); } if (tableLockId > 0) break; } if (i >= numTries) //error out { Message::Args args; args.add(processName); args.add((uint64_t)processID); args.add(sessionId); throw std::runtime_error(IDBErrorInfo::instance()->errorMsg(ERR_TABLE_LOCKED,args)); } } // 1. Get the OIDs for the columns // 2. Get the OIDs for the dictionaries // 3. Save the OIDs to a log file // 4. Remove the Table from SYSTABLE // 5. Remove the columns from SYSCOLUMN // 6. Commit the changes made to systables // 7. Flush PrimProc Cache // 8. Update extent map // 9. Remove the column and dictionary files // 10.Return the OIDs CalpontSystemCatalog::TableName userTableName; userTableName.schema = dropTableStmt.fTableName->fSchema; userTableName.table = dropTableStmt.fTableName->fName; tableColRidList = systemCatalogPtr->columnRIDs( userTableName ); dictOIDList = systemCatalogPtr->dictOIDs( userTableName ); Oam oam; //Save qualified tablename, all column, dictionary OIDs, and transaction ID into a file in ASCII format for ( unsigned i=0; i < tableColRidList.size(); i++ ) { if ( tableColRidList[i].objnum > 3000 ) oidList.push_back( tableColRidList[i].objnum ); } for ( unsigned i=0; i < dictOIDList.size(); i++ ) { if ( dictOIDList[i].dictOID > 3000 ) oidList.push_back( dictOIDList[i].dictOID ); } //get a unique number VERBOSE_INFO("Removing the SYSTABLE meta data"); #ifdef IDB_DDL_DEBUG cout << "Removing the SYSTABLEs meta data" << endl; #endif bytestream << (ByteStream::byte)WE_SVR_DELETE_SYSTABLES; bytestream << uniqueId; bytestream << (u_int32_t) dropTableStmt.fSessionID; bytestream << (u_int32_t)txnID.id; bytestream << dropTableStmt.fTableName->fSchema; bytestream << dropTableStmt.fTableName->fName; //Find out where systable is BRM::OID_t sysOid = 1001; ByteStream::byte rc = 0; u_int16_t dbRoot; rc = fDbrm->getSysCatDBRoot(sysOid, dbRoot); if (rc != 0) { result.result =(ResultCode) rc; Message::Args args; Message message(9); args.add("Error while calling getSysCatDBRoot"); args.add(errorMsg); result.message = message; //release transaction fSessionManager.rolledback(txnID); return result; } boost::shared_ptr<std::map<int, int> > dbRootPMMap = oamcache->getDBRootToPMMap(); pmNum = (*dbRootPMMap)[dbRoot]; try { //cout << "deleting systable entries with txnid " << txnID.id << endl; fWEClient->write(bytestream, (uint)pmNum); #ifdef IDB_DDL_DEBUG cout << "Drop table sending WE_SVR_DELETE_SYSTABLES to pm " << pmNum << endl; #endif while (1) { bsIn.reset(new ByteStream()); fWEClient->read(uniqueId, bsIn); if ( bsIn->length() == 0 ) //read error { rc = NETWORK_ERROR; errorMsg = "Lost connection to Write Engine Server while updating SYSTABLES"; break; } else { *bsIn >> rc; if (rc != 0) { *bsIn >> errorMsg; } break; } } } catch (runtime_error& ex) //write error { #ifdef IDB_DDL_DEBUG cout << "Drop table got exception" << endl; #endif rc = NETWORK_ERROR; errorMsg = ex.what(); } catch (...) { rc = NETWORK_ERROR; #ifdef IDB_DDL_DEBUG cout << "Drop table got unknown exception" << endl; #endif } if (rc != 0) { Message::Args args; Message message(9); args.add("Error in dropping table from systables."); args.add(errorMsg); message.format(args); result.result = (ResultCode)rc; result.message = message; //release table lock and session fSessionManager.rolledback(txnID); (void)fDbrm->releaseTableLock(tableLockId); fWEClient->removeQueue(uniqueId); return result; } rc = commitTransaction(uniqueId, txnID); //cout << "commiting transaction " << txnID.id << " and valid is " << txnID.valid << endl; if (rc != 0) fSessionManager.rolledback(txnID); else fSessionManager.committed(txnID); if (rc != 0) { Message::Args args; Message message(9); ostringstream oss; oss << " Commit failed with error code " << rc; args.add(oss.str()); fSessionManager.rolledback(txnID); (void)fDbrm->releaseTableLock(tableLockId); message.format(args); result.result = (ResultCode)rc; result.message = message; fWEClient->removeQueue(uniqueId); return result; } // Log the DDL statement logDDL(dropTableStmt.fSessionID, txnID.id, dropTableStmt.fSql, dropTableStmt.fOwner); } catch (std::exception& ex) { result.result = DROP_ERROR; Message::Args args; Message message(9); args.add("Drop table failed due to "); args.add(ex.what()); fSessionManager.rolledback(txnID); try { (void)fDbrm->releaseTableLock(tableLockId); } catch (std::exception&) { args.add(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); } message.format( args ); result.message = message; fWEClient->removeQueue(uniqueId); return result; } catch (...) { result.result = DROP_ERROR; errorMsg = "Error in getting information from system catalog or from dbrm."; Message::Args args; Message message(9); args.add("Drop table failed due to "); args.add(errorMsg); fSessionManager.rolledback(txnID); try { (void)fDbrm->releaseTableLock(tableLockId); } catch (std::exception&) { args.add(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); } message.format( args ); result.message = message; fWEClient->removeQueue(uniqueId); return result; } try { (void)fDbrm->releaseTableLock(tableLockId); } catch (std::exception&) { result.result = DROP_ERROR; Message::Args args; Message message(9); args.add("Drop table failed due to "); args.add(IDBErrorInfo::instance()->errorMsg(ERR_HARD_FAILURE)); fSessionManager.rolledback(txnID); message.format( args ); result.message = message; fWEClient->removeQueue(uniqueId); return result; } //Save the oids to a file try { createWriteDropLogFile( roPair.objnum, uniqueId, oidList ); } catch (std::exception& ex) { result.result = WARNING; Message::Args args; Message message(9); args.add("Drop table failed due to "); args.add(ex.what()); message.format(args); result.message = message; fSessionManager.rolledback(txnID); fWEClient->removeQueue(uniqueId); return result; } // Bug 4208 Drop the PrimProcFDCache before droping the column files // FOr Windows, this ensures (most likely) that the column files have // no open handles to hinder the deletion of the files. rc = cacheutils::dropPrimProcFdCache(); //Drop files bytestream.restart(); bytestream << (ByteStream::byte)WE_SVR_WRITE_DROPFILES; bytestream << uniqueId; bytestream << (uint32_t) oidList.size(); for (unsigned i=0; i < oidList.size(); i++) { bytestream << (uint32_t) oidList[i]; } #ifdef IDB_DDL_DEBUG cout << "Drop table removing column files" << endl; #endif uint msgRecived = 0; try { fWEClient->write_to_all(bytestream); bsIn.reset(new ByteStream()); ByteStream::byte tmp8; while (1) { if (msgRecived == fWEClient->getPmCount()) break; fWEClient->read(uniqueId, bsIn); if ( bsIn->length() == 0 ) //read error { rc = NETWORK_ERROR; fWEClient->removeQueue(uniqueId); break; } else { *bsIn >> tmp8; rc = tmp8; if (rc != 0) { *bsIn >> errorMsg; fWEClient->removeQueue(uniqueId); break; } else msgRecived++; } }