CmdLineExpander::CmdLineExpander (const KKStr& _applicationName, RunLog& _log, const KKStr& _cmdLine ): applicationName (_applicationName), log (_log) { VectorKKStr initialParameters; KKStr cmdLine (_cmdLine); cmdLine.TrimLeft (); while (!cmdLine.Empty ()) { KKStr nextField = cmdLine.ExtractQuotedStr ("\n\r\t ", false); // false = Do not decode escape characters nextField.TrimRight (); if (!nextField.Empty ()) initialParameters.push_back (nextField); cmdLine.TrimLeft (); } BuildCmdLineParameters (initialParameters); BuildExpandedParameterPairs (); }
void DataBaseServer::ParseParameterStr (const KKStr& parameterStr) { VectorKKStr parameterPairs = parameterStr.Split ("\t"); VectorKKStr::iterator idx; for (idx = parameterPairs.begin (); idx != parameterPairs.end (); idx++) { VectorKKStr fields = (*idx).Split (":="); // Split by either ':' or '=' if (fields.size () < 2) { // Should be two fields; line must be malformed. continue; } KKStr parameterName = fields[0].ToUpper (); if ((parameterName == "EMBEDDED") || (parameterName == "EMB") || (parameterName == "E")) embedded = fields[1].ToBool (); else if ((parameterName == "MYSQLDATADIR") || (parameterName == "MYSQL") || (parameterName == "MDD")) mySqlDataDir = fields[1]; else if ((parameterName == "DESCRIPTION") || (parameterName == "DESC") || (parameterName == "D")) description = fields[1]; else if ((parameterName == "HOSTNAME") || (parameterName == "HOST") || (parameterName == "H")) hostName = fields[1]; else if ((parameterName == "USERNAME") || (parameterName == "USER") || (parameterName == "U")) userName = fields[1]; else if ((parameterName == "PASSWORD") || (parameterName == "PW") || (parameterName == "P")) passWord = fields[1]; else if ((parameterName == "PORTNUM") || (parameterName == "PN")) portNum = fields[1].ToUint32 (); else if ((parameterName == "DATABASENAME") || (parameterName == "DATABASE") || (parameterName == "DB")) dataBaseName = fields[1]; } if (description.EqualIgnoreCase ("Embedded")) embedded = true; } /* ParseParameterStr */
VectorKKStr KKStrParser::Split (const char* delStr) { VectorKKStr tokens; while (MoreTokens ()) tokens.push_back (this->GetNextToken (delStr)); return tokens; }
bool FileInStack (const KKStr& cmdFileName, const VectorKKStr& cmdFileStack ) { VectorKKStr::const_iterator idx; for (idx = cmdFileStack.begin (); idx != cmdFileStack.end (); idx++) { if (*idx == cmdFileName) return true; } return false; } /* FileInStack */
VectorKKStr Configuration::FormatErrorsWithLineNumbers () const { VectorKKStr errorMsgs; for (kkuint32 i = 0; i < formatErrors.size (); i++) { KKStr lineNumStr = " "; if (i < formatErrorsLineNums.size ()) lineNumStr = StrFormatInt (formatErrorsLineNums[i], "0000"); errorMsgs.push_back (lineNumStr + ":" + formatErrors[i]); } return errorMsgs; } /* FormatErrorsWithLineNumbers */
void KKJob::ProcessStatusStr (const KKStr& statusStr) { log.Level (30) << "KKJob::ProcessStatusStr[" << statusStr << "]" << endl; KKStr fieldName; KKStr fieldValue; VectorKKStr fields = statusStr.Split ('\t'); kkuint32 fieldNum = 0; while (fieldNum < fields.size ()) { fieldName = fields[fieldNum]; fieldNum++; if (fieldNum < fields.size ()) { fieldValue = fields[fieldNum]; fieldNum++; } else { fieldValue = ""; } fieldName.Upper (); fieldValue.TrimLeft ("\n\r\t "); fieldValue.TrimRight ("\n\r\t "); if (fieldName.CompareIgnoreCase ("JOBID") == 0) jobId = atoi (fieldValue.Str ()); else if (fieldName.CompareIgnoreCase ("PARENTID") == 0) parentId = atoi (fieldValue.Str ()); else if (fieldName.CompareIgnoreCase ("STATUS") == 0) status = JobStatusFromStr (fieldValue); else if (fieldName.CompareIgnoreCase ("NumProcessors") == 0) numProcessors = fieldValue.ToInt (); else if (fieldName.CompareIgnoreCase ("NumPorcessesAllowed") == 0) numPorcessesAllowed = fieldValue.ToInt (); else if (fieldName.CompareIgnoreCase ("Prerequisites") == 0) PrerequisitesFromStr (fieldValue); else { ProcessStatusField (fieldName, fieldValue); } } } /* ProcessStatusStr */
void KKJob::PrerequisitesFromStr (const KKStr& s) { prerequisites.clear (); if (s.CompareIgnoreCase ("None") != 0) { VectorKKStr fields = s.Split (','); for (kkuint32 x = 0; x < fields.size (); ++x) { kkint32 p = fields[x].ToInt (); prerequisites.push_back (p); } } } /* PrerequisitesFromStr */
void CmdLineExpander::ExtractParametersFromFile (const KKStr& cmdFileName, VectorKKStr& cmdFileParameters, bool& validFile ) { FILE* in = osFOPEN (cmdFileName.Str (), "r"); if (!in) { log.Level (-1) << endl << endl << endl << "ExtractParametersFromFile *** EROR ***" << endl << endl << " Invalid CmdFile[" << cmdFileName << "]" << endl << endl; validFile = false; return; } KKStr token; bool eof = false; token = osReadNextQuotedStr (in, " \n\r", eof); while (!eof) { cmdFileParameters.push_back (token); token = osReadNextQuotedStr (in, " \n\r", eof); } std::fclose (in); } /* ExtractParametersFromFile */
void CmdLineExpander::ExpandCmdLine (kkint32 argc, char** argv ) { parmsGood = true; VectorKKStr initialParameters; { kkint32 y; for (y = 1; y < argc; y++) initialParameters.push_back (argv[y]); } BuildCmdLineParameters (initialParameters); BuildExpandedParameterPairs (); return; } /* ExpandCmdLine */
void InstrumentDataPitchAndRoll::ProcessData (const KKStr& txt) { VectorKKStr fields = txt.Split (" \t\n\r"); if (fields.size () < 4) return; float pitch = -999.99f; float roll = -999.99f; KKStr fieldName = ""; KKStr fieldValue = ""; kkuint32 fieldNum = 0; while (fieldNum < fields.size ()) { fieldName = fields[fieldNum]; fieldName.Upper (); fieldNum++; if (fieldNum < fields.size ()) { fieldValue = fields[fieldNum]; fieldNum++; } else { fieldValue = ""; } if (fieldName == "R") { roll = fieldValue.ToFloat (); } else if (fieldName == "P") { pitch = fieldValue.ToFloat (); } } manager->PitchAndRollData (curTextLineStartScanLine, pitch, roll); } /* ProcessData */
void InstrumentDataBatteryMeter::ProcessBatteryData (const KKStr& txt) { // We will be expecting 5 fields; // <Current battery> <\t> <Bat 0 Voltage> <\t> .... <\t> <Bat-3 Voltage> VectorKKStr fields = txt.Split (','); if (fields.size () < (1 + numOfBatteries)) return; // activeBattery is '1' based that is batteries '1' - '4'; so // batteryLevels[0] = battery level for battery 1. kkint32 activeBattery = fields[0].ToInt (); if ((activeBattery < 1) || ((kkuint32)activeBattery > numOfBatteries)) return; kkuint32 x; for (x = 0; x < numOfBatteries; x++) batteryLevels[x] = fields[x + 1].ToFloat (); manager->BatteryData (curTextLineStartScanLine, activeBattery, batteryLevels); } /* ProcessBatteryData */
VectorKKStr FeatureFileIO::RegisteredDriverNames (bool canRead, bool canWrite ) { vector<FeatureFileIOPtr>* drivers = RegisteredDrivers (); VectorKKStr names; vector<FeatureFileIOPtr>::iterator idx; for (idx = drivers->begin (); idx != drivers->end (); idx++) { FeatureFileIOPtr driver = *idx; if (canRead && (!driver->CanRead ())) continue; if (canWrite && (!driver->CanWrite ())) continue; names.push_back (driver->DriverName ()); } return names; } /* RegisteredDriverNames */
void KKJobManager::ProcessJobXmlBlockOfText (const KKStr& startStr, istream& i ) { if ((startStr.SubStrPart (0, 4) != "<KKJob ") || (startStr.LastChar () != '>')) { log.Level (-1) << endl << "KKJobManager::ProcessJobXmlBlockOfText ***ERROR*** StartStr[" << startStr << "] is not a KKJob String." << endl << endl; return; } KKStr s = startStr.SubStrPart (5); s.TrimLeft (); s.ChopLastChar (); KKStr jobTypeStr = ""; kkint32 jobId = -1; VectorKKStr parameters = s.Split (','); for (kkuint32 x = 0; x < parameters.size (); ++x) { KKStr parameterStr = parameters[x]; parameterStr.TrimLeft (); parameterStr.TrimRight (); KKStr fieldName = parameterStr.ExtractToken2 ("="); fieldName.TrimLeft (); fieldName.TrimRight (); KKStr fieldValue = parameterStr.ExtractToken2 ("="); fieldValue.TrimLeft (); fieldValue.TrimRight (); if (fieldName.CompareIgnoreCase ("JobType") == 0) jobTypeStr = fieldValue; else if (fieldName.CompareIgnoreCase ("JobId") == 0) jobId = fieldValue.ToInt (); } if (jobTypeStr.Empty () || (jobId < 0)) { log.Level (-1) << endl << "KKJobManager::ProcessJobXmlBlockOfText ***ERROR*** StartStr[" << startStr << "]." << endl << " JobType and/or JobId were not provided." << endl << endl; return; } KKJobPtr j = jobs->LookUpByJobId (jobId); if (j == NULL) { // We do not have this job in memory yet. We will have to create it now. KKStr emptyStatusStr = "JobId\t" + StrFormatInt (jobId, "ZZZZ0"); j = KKJob::CallAppropriateConstructor (this, jobTypeStr, emptyStatusStr); } j->CompletedJobDataRead (i); } /* ProcessJobXmlBlockOfText */
/** * @brief Constructs a Feature Encoder object. * @param[in] _fileDesc * @param[in] _class1 * @param[in] _class2 * @param[in] _log A log-file stream. All important events will be output to this stream */ FeatureEncoder::FeatureEncoder (FileDescConstPtr _fileDesc, MLClassPtr _class1, MLClassPtr _class2, const FeatureNumList& _selectedFeatures, SVM_EncodingMethod _encodingMethod, double _c_Param ): cardinalityDest (NULL), class1 (_class1), class2 (_class2), codedNumOfFeatures (0), c_Param (_c_Param), destFeatureNums (NULL), destFileDesc (NULL), destWhatToDo (NULL), encodingMethod (_encodingMethod), fileDesc (_fileDesc), numEncodedFeatures (0), numOfFeatures (0), selectedFeatures (_selectedFeatures), srcFeatureNums (NULL), xSpaceNeededPerExample (0) { numOfFeatures = selectedFeatures.NumOfFeatures (); xSpaceNeededPerExample = 0; srcFeatureNums = new kkint32[numOfFeatures]; cardinalityDest = new kkint32[numOfFeatures]; destFeatureNums = new kkint32[numOfFeatures]; destWhatToDo = new FeWhatToDo[numOfFeatures]; VectorKKStr destFieldNames; kkint32 x; for (x = 0; x < numOfFeatures; x++) { kkint32 srcFeatureNum = selectedFeatures[x]; srcFeatureNums [x] = srcFeatureNum; destFeatureNums [x] = xSpaceNeededPerExample; cardinalityDest [x] = 1; destWhatToDo [x] = FeWhatToDo::FeAsIs; const Attribute& attribute = fileDesc->GetAAttribute (srcFeatureNum); AttributeType attributeType = attribute.Type (); kkint32 cardinality = attribute.Cardinality (); switch (encodingMethod) { case SVM_EncodingMethod::Binary: if ((attributeType == AttributeType::Nominal) || (attributeType == AttributeType::Symbolic)) { destWhatToDo [x] = FeWhatToDo::FeBinary; cardinalityDest [x] = cardinality; xSpaceNeededPerExample += cardinalityDest[x]; numEncodedFeatures += cardinalityDest[x]; for (kkint32 zed = 0; zed < cardinalityDest[x]; zed++) { KKStr fieldName = attribute.Name () + "_" + attribute.GetNominalValue (zed); destFieldNames.push_back (fieldName); } } else { xSpaceNeededPerExample++; numEncodedFeatures++; destWhatToDo [x] = FeWhatToDo::FeAsIs; destFieldNames.push_back (attribute.Name ()); } break; case SVM_EncodingMethod::Scaled: xSpaceNeededPerExample++; numEncodedFeatures++; if ((attributeType == AttributeType::Nominal) || (attributeType == AttributeType::Symbolic) ) destWhatToDo [x] = FeWhatToDo::FeScale; else destWhatToDo [x] = FeWhatToDo::FeAsIs; destFieldNames.push_back (attribute.Name ()); break; case SVM_EncodingMethod::NoEncoding: default: xSpaceNeededPerExample++; numEncodedFeatures++; destWhatToDo [x] = FeWhatToDo::FeAsIs; destFieldNames.push_back (attribute.Name ()); break; } } codedNumOfFeatures = xSpaceNeededPerExample; destFileDesc = FileDesc::NewContinuousDataOnly (destFieldNames); xSpaceNeededPerExample++; // Add one more for the terminating (-1) }
void AbundanceCorrectionStatsBuilder::Main () { if (Abort ()) return; if (reportFileName.Empty ()) { DateTime d = osGetLocalDateTime (); KKStr reportDir = osAddSlash (SipperVariables::PicesReportDir ()) + "AbundanceAdjustments"; osCreateDirectoryPath (reportDir); if (configFileName.Empty ()) reportFileName = osAddSlash (reportDir) + "NoConfigFile" + "_" + d.YYYYMMDDHHMMSS () + ".txt"; else reportFileName = osAddSlash (reportDir) + osGetRootName (configFileName) + "_" + d.YYYYMMDDHHMMSS () + ".txt"; } report = new ofstream (reportFileName.Str ()); PrintComandLineParameters (); if (configFileName.Empty ()) { log.Level (-1) << endl << endl << "AbundanceCorrectionStatsBuilder::Main ***ERROR*** Configuration File was not specified." << endl << endl; Abort (true); *report << endl << "*** NO CONFIGURATION FILE SPECIFIED ***" << endl << endl; return; } delete config; config = new TrainingConfiguration2 (fileDesc, configFileFullPath, log, true /**< 'true' = validateDirectories. */ ); if (!config->FormatGood ()) { log.Level (-1) << endl << "AbundanceCorrectionStatsBuilder::Main Config[" << configFileName << "] has invalid format." << endl << endl; VectorKKStr errors = config->FormatErrorsWithLineNumbers (); VectorKKStr::const_iterator idx; log.Level (-1) << endl; for (idx = errors.begin (); idx != errors.end (); ++idx) log.Level (-1) << (*idx) << endl; log.Level (-1) << endl << endl; *report << endl << endl << "*** Configuratiuon file[" << configFileName << " contains formatting errors." << endl << endl; config->PrintFormatErrors (*report); return; } bool changesMadeToTrainingLibraries = false; bool cancelFlag = false; DateTime latestImageTimeStamp; delete trainLibData; trainLibData = config->LoadFeatureDataFromTrainingLibraries (latestImageTimeStamp, changesMadeToTrainingLibraries, cancelFlag); if (!trainLibData) { log.Level (-1) << endl << "AbundanceCorrectionStatsBuilder::Main ***ERROR*** No training data was loaded." << endl << endl; *report << endl << "*** Failed to load training data ***" << endl; return; } otherClass = config->OtherClass (); if (!otherClass) otherClass = MLClass::CreateNewMLClass ("Other", -1); configClasses = config->ExtractClassList (); configClasses->SortByName (); if (configClasses->PtrToIdx (otherClass) >= 0) { log.Level (-1) << endl << "AbundanceCorrectionStatsBuilder::Main ***ERROR*** OtherClass[" << otherClass->Name () << "] is specified as a Training Class; it must be swepcified separatly." << endl << endl; *report << endl << "*** Failed to load other class data ***" << endl; return; } trainLibDataClasses = trainLibData->ExtractMLClassConstList (); trainLibDataClasses->SortByName (); if ((*configClasses) != (*trainLibDataClasses)) { Abort (true); log.Level (-1) << endl << "AbundanceCorrectionStatsBuilder::Main ***ERROR*** Class make up of training data does not correspond to configuration file." << endl << endl; *report << endl << "*** Training data contains different classes that Confg File ***" << endl; return; } allClasses = new MLClassConstList (*trainLibDataClasses); allClasses->PushOnBack (otherClass); otherClassData = config->LoadOtherClasssExamples (); if ((!otherClassData) || (otherClassData->QueueSize () < 1)) { Abort (true); log.Level (-1) << endl << "AbundanceCorrectionStatsBuilder::Main ***ERROR*** No Other[" << otherClass->Name () << "] class examples found in training library." << endl << endl; *report << endl << "*** No other class data loaded. ***" << endl; return; } PrintStartStatistics (); RemoveDuplicateImages (); normalizationParms = new NormalizationParms (config, *trainLibData, log); normalizationParms->NormalizeImages (trainLibData); normalizationParms->NormalizeImages (otherClassData); CreateInitialThreadInstaces (); ManageThreads (); if (oneOrMoreThreadsCrashed) { GenerateCrashReport (); } else { GenerateReportAndStats (); } } /* Main */
void ImportGPSDataGPGGA (const KKStr& fileName) { RunLog log; DataBasePtr dbConn = new DataBase (log); ifstream i (fileName.Str ()); if (!i.is_open ()) { log.Level (-1) << endl << endl << "ImpotrtGPSData Could not open file[" << fileName << "]" << endl << endl; return; } log.Level (10) << endl << endl << endl << endl << endl << "ImpotrtGPSData FileName[" << fileName << "]" << endl << endl << endl; char buff[20480]; bool firstPass = true; int lastMinute = 0; int linesRead = 0; KKStr ln (256); DateTime lastDateTime; while (i.getline (buff, sizeof (buff))) { linesRead++; ln = buff; ln.TrimLeft (); if (!ln.LocateStr ("GPGGA")) continue; VectorKKStr fields = ln.Parse (","); if (fields.size () < 8) continue; if (!fields[2].EqualIgnoreCase ("$GPGGA")) continue; /* 0 1 2 3 4 5 6 7 8 06/01/2010, 23:59:59.818, $GPGGA, 235958, 2840.927, N, 08828.458, W, 2, 09,22.10,0,M,,,14,0000*12 06/02/2010, 00:00:10.818, $GPGGA, 000009, 2840.931, N, 08828.482, W, 1, 09,0.89,0,M,,,,*2D 06/02/2010, 00:00:21.802, $GPGGA, 000020, 2840.929, N, 08828.505, W, 1, 09,0.89,0,M,,,,*21 06/02/2010, 00:00:31.818, $GPGGA, 000030, 2840.924, N, 08828.526, W, 1, 09,0.89,0,M,,,,*2C 06/02/2010, 00:00:42.818, $GPGGA, 000041, 2840.917, N, 08828.547, W, 1, 09,0.89,0,M,,,,*2D 06/02/2010, 00:00:53.802, $GPGGA, 000052, 2840.906, N, 08828.568, W, 1, 09,1.00,0,M,,,,*22 06/02/2010, 00:01:03.802, $GPGGA, 000102, 2840.895, N, 08828.585, W, 1, 09,0.89,0,M,,,,*2E 06/02/2010, 00:01:13.818, $GPGGA, 000112, 2840.883, N, 08828.600, W, 1, 09,0.89,0,M,,,,*26 */ KKStr dateStr = fields[0]; KKStr timeStr = fields[1]; KKStr latStr = fields[4]; KKStr logStr = fields[6]; auto x = latStr.LocateCharacter ('.'); if (!x) continue; KKStr latMinStr = latStr.SubStrPart (x - 2); KKStr latDegStr = latStr.SubStrSeg (0, x - 2); double latitude = latDegStr.ToDouble () + latMinStr.ToDouble () / 60.0; if (fields[5].EqualIgnoreCase ("S")) latitude = 0.0 - latitude; x = logStr.LocateCharacter ('.'); if (!x) continue; KKStr logMinStr = logStr.SubStrPart (x - 2); KKStr logDegStr = logStr.SubStrSeg (0, x - 2); double longitude = logDegStr.ToDouble () + logMinStr.ToDouble () / 60.0; if (fields[7].EqualIgnoreCase ("W")) longitude = 0.0 - longitude; DateType gmtDate (dateStr); TimeType gmtTime (timeStr); DateTime gmtDateTime (gmtDate, gmtTime); DateTime localTime = gmtDateTime; localTime.HoursAdd (-4); DateTime startDT = localTime; DateTime endDT = localTime; if (firstPass) { firstPass = false; startDT.SecondsAdd (-180); } else { DateTime deltaDT = localTime - lastDateTime; long deltaSecs = (long)deltaDT.Seconds (); startDT.SecondsAdd (-(deltaSecs / 2)); } endDT.SecondsAdd (30); if (gmtTime.Minute () != lastMinute) { lastMinute = gmtTime.Minute (); log.Level (10) << "LinesRead[" << linesRead << "] File[" << osGetRootName (fileName) << "] GMT Time[" << gmtDate.MMM_DD_YYYY () << " - " << gmtTime.HH_MM_SS () << "]" << endl; } if ((endDT.Month () < 6) && (endDT.Day () < 28)) { } else { dbConn->InstrumentDataUpdateLatitudeAndLongitude (startDT, endDT, latitude, longitude); } lastDateTime = localTime; } i.close (); delete dbConn; dbConn = NULL; } /* ImportGPSDataGPGGA */
void Test () { RunLog log; DataBasePtr dbConn = new DataBase (log); // InstrumentDataPtr id = dbConn->InstrumentDataGetByScanLine ("TestCruise_01", 4022); //{ // ImageFeaturesPtr fv = NULL; // KKStr imageFileName = "TestCruise_01_00006156_3701"; // DataBaseImagePtr dbi = dbConn->ImageLoad (imageFileName); // if (dbi) // fv = dbConn->FeatureDataRecLoad (dbi); // delete fv; // delete dbi; //} { SipperCruiseListPtr cruises = dbConn->SipperCruiseLoadAllCruises (); delete cruises; } bool cancelFlag = false; { DataBaseImageGroupPtr existingGroup = dbConn->ImageGroupLoad ("TestGroup2"); if (existingGroup) { VectorUint* depthStats = dbConn->ImageGetDepthStatistics (existingGroup, "", // sipperFileName 10.0f, // depthIncrements, NULL, // mlClass, 'P', // 'p' = Use Predicted Class 0.0f, 1.0f, // minProb, maxProb, 0, 0 // minSize, maxSize ); delete depthStats; depthStats = NULL; ClassStatisticListPtr stats = dbConn->ImageGetClassStatistics (existingGroup, "ETP2008_8A_06", NULL, 'P', // 'P' = Use Predicted Class 0.0f, 1.0f, // MinProb, MaxProb 0, 0, // MinSize, MaxSize 0.0f, 0.0f // MinDepth, MaxDepth ); delete stats; stats = NULL; } DataBaseImageListPtr images = dbConn->ImagesQuery (existingGroup, true, cancelFlag); } DataBaseImageGroupPtr g = new DataBaseImageGroup (-1, "TestGroup2", "Description of group", 0); dbConn->ImageGroupInsert (*g); if (dbConn->DuplicateKey ()) { DataBaseImageGroupPtr existingGroup = dbConn->ImageGroupLoad (g->Name ()); if (existingGroup) { g->ImageGroupId (existingGroup->ImageGroupId ()); dbConn->ImageGroupDelete (existingGroup->ImageGroupId ()); dbConn->ImageGroupInsert (*g); delete existingGroup; existingGroup = NULL; } } DataBaseImageListPtr images = dbConn->ImagesQuery (NULL, "ETP2008", "8", "A", NULL, 'P', // 'P' = Use Predicted Class 0.0f, 1.0f, // MinProb, MaxProb 0, 0, // MinSize, MaxSize 290.0f, 293.0f, // MinDepth, MaxDepth 0, // Restart Image -1, // unlimited Limit true, // true=Include ThumbNail cancelFlag ); VectorKKStr fileNames; { DataBaseImageList::iterator idx; for (idx = images->begin (); idx != images->end (); idx++) fileNames.push_back ((*idx)->ImageFileName ()); } dbConn->ImageGroupEntriesInsert (g->ImageGroupId (), fileNames); delete dbConn; } /* Test */
void OurNeighbors::RandomReport (ImageFeaturesList& images) { double allClassesMeanNNDAnyClass = 0.0f; double allClassesMeanStdDevAnyClass = 0.0f; ClassSummaryList classSummaries (log); MLClassList::iterator classIdx; VectorKKStr zScoreSummaryLines; for (classIdx = mlClasses->begin (); classIdx != mlClasses->end (); classIdx++) { MLClassPtr mlClass = *classIdx; if (fromPlankton && (fromPlankton != mlClass)) continue; double randomMeanNND = 0.0f; double randomStdDevNND = 0.0f; double realDataU2Stat = 0.0f; double sampleMeanNND = 0.0f; double sampleStdDevNND = 0.0f; double sampleMaxDist = 0.0f; double sampleMinDist = 0.0f; ImageFeaturesListPtr imagesInClass = images.ExtractExamplesForAGivenClass (mlClass); if (imagesInClass->QueueSize () > 0) { // We are going to make a list of images that has duplicate instances of 'ImageFeatures' objects // This way when we Randomize the locations of each 'sfCentroidRow' and 'sfCentroidCol' we do not // imapct on the original data. ImageFeaturesListPtr imagesToRandomize = new ImageFeaturesList (*imagesInClass, true // 'true means we want to own the data so new instances will be created. ); LLoydsEntryListPtr lloydsEntries = DeriveAllLLoydsBins (*imagesToRandomize); { // We will now get mean and stdDev of nnd for this class NeighborList neighbors (*imagesToRandomize, log); neighbors.FindNearestNeighbors (neighborType, mlClass); neighbors.CalcStatistics (sampleMeanNND, sampleStdDevNND, sampleMaxDist, sampleMinDist); } KKStr randomReportFileName; if (reportFileName.Empty ()) randomReportFileName = "RandomDistanceReport"; else randomReportFileName = osRemoveExtension (reportFileName) + "_Random"; randomReportFileName << "_" << mlClass->Name () << ".txt"; ofstream randomReport (randomReportFileName.Str ()); randomReport << "Random Distribution Report" << endl << endl; randomReport << "Source Directory [" << sourceRootDirPath << "]" << endl; randomReport << "Class [" << mlClass->Name () << "]" << endl; RandomNND randomizeLocations (lastScanLine, *imagesToRandomize, numOfIterations, numOfBuckets, bucketSize, randomReport, log ); randomizeLocations.GenerateReport (); randomMeanNND = randomizeLocations.NND_Mean (); randomStdDevNND = randomizeLocations.NND_StdDev (); realDataU2Stat = randomizeLocations.RealDataU2Stat (); //double sampleMeanNND = 0.0f; //double sampleStdDevNND = 0.0f; double z_Score = Z_Score (sampleMeanNND, randomMeanNND, randomStdDevNND, imagesToRandomize->QueueSize ()); randomReport << endl << endl << endl << "Z-Score" << endl << "=======" << endl << endl << "SampleMeanNND " << "\t" << sampleMeanNND << endl << "SampleStdDevNND " << "\t" << sampleStdDevNND << endl << "RandomMeanNND " << "\t" << randomMeanNND << endl << "RandomStdDevNND " << "\t" << randomStdDevNND << endl << "------- Z-Score " << "\t" << z_Score << endl << endl; KKStr zScoreSummaryLine = mlClass->Name () + "\t" + StrFormatDouble (sampleMeanNND, "###,##0.00") + "\t" + StrFormatDouble (sampleStdDevNND, "###,##0.00") + "\t" + StrFormatDouble (randomMeanNND, "###,##0.00") + "\t" + StrFormatDouble (randomStdDevNND, "###,##0.00") + "\t" + StrFormatDouble (z_Score, "###,##0.000"); zScoreSummaryLines.push_back (zScoreSummaryLine); // The new instance on 'ClassSummary' that is aboiut to be created will take ownership // of lloydsBins. classSummaries.PushOnBack (new ClassSummary (mlClass, lloydsEntries, (float)realDataU2Stat, (float)z_Score)); delete imagesToRandomize; imagesToRandomize = NULL; } delete imagesInClass; imagesInClass = NULL; } if (!fromPlankton) { LLoydsEntryListPtr allClassesLLoydsEntries = DeriveAllLLoydsBins (images); // Create a report for all classes KKStr randomReportFileName; if (reportFileName.Empty ()) randomReportFileName = "RandomDistanceReport_All.txt"; else randomReportFileName = osRemoveExtension (reportFileName) + "_Random_All.txt"; ofstream randomReport (randomReportFileName.Str ()); randomReport << "Source Directory [" << sourceRootDirPath << "]" << endl; randomReport << "Class [" << "All" << "]" << endl; { // Find the mean and stddev of Nearest Neighbor regardless of class. NeighborList allClassesNeighbors (images, log); allClassesNeighbors.FindNearestNeighbors (NeighborType::AnyPlankton, fromPlankton); double allClassesMinDistAnyClass = 0.0f; double allClassesMaxDistAnyClass = 0.0f; allClassesNeighbors.CalcStatistics (allClassesMeanNNDAnyClass, allClassesMeanStdDevAnyClass, allClassesMinDistAnyClass, allClassesMaxDistAnyClass ); } RandomNND randomizeLocations (lastScanLine, images, numOfIterations, numOfBuckets, bucketSize, randomReport, log ); randomizeLocations.GenerateReport (); // All classes Z-Score double allClassesRandomMeanNND = randomizeLocations.NND_Mean (); double allClassesRandomStdDevNND = randomizeLocations.NND_StdDev (); double allClassesRealDataU2Stat = randomizeLocations.RealDataU2Stat (); double z_Score = Z_Score (allClassesMeanNNDAnyClass, allClassesRandomMeanNND, allClassesRandomStdDevNND, images.QueueSize ()); KKStr zScoreSummaryLine = KKStr ("All-Classes") + "\t" + StrFormatDouble (allClassesMeanNNDAnyClass, "###,##0.00") + "\t" + StrFormatDouble (allClassesMeanStdDevAnyClass, "###,##0.00") + "\t" + StrFormatDouble (allClassesRandomMeanNND, "###,##0.00") + "\t" + StrFormatDouble (allClassesRandomStdDevNND, "###,##0.00") + "\t" + StrFormatDouble (z_Score, "###,##0.00"); zScoreSummaryLines.push_back (zScoreSummaryLine); randomReport << endl << endl << endl << "Z-Score" << endl << "=======" << endl << endl << "SampleMeanNND " << "\t" << allClassesMeanNNDAnyClass << endl << "SampleStdDevNND " << "\t" << allClassesMeanStdDevAnyClass << endl << "RandomMeanNND " << "\t" << allClassesRandomMeanNND << endl << "RandomStdDevNND " << "\t" << allClassesRandomStdDevNND << endl << "------- Z-Score " << "\t" << z_Score << endl << endl; classSummaries.PushOnBack (new ClassSummary (MLClass::CreateNewMLClass (KKStr ("AllClasses")), allClassesLLoydsEntries, (float)allClassesRealDataU2Stat, (float)z_Score ) ); } { // Z-Score Summary Report KKB::kkuint32 x; *report << std::endl << std::endl << "Z-Score Summary By Class" << std::endl << std::endl << "ClassName" << "\t" << "SampleMean" << "\t" << "SampleStdDev" << "\t" << "RandomMean" << "\t" << "RandomStdDev" << "\t" << "Z-Score" << std::endl << "=========" << "\t" << "==========" << "\t" << "============" << "\t" << "==========" << "\t" << "============" << "\t" << "=======" << std::endl; for (x = 0; x < zScoreSummaryLines.size (); x++) *report << zScoreSummaryLines[x] << std::endl; } *report << endl << endl << endl; classSummaries.SummaryReport (*report); *report << endl << endl << endl; classSummaries.SpatialOverlapReport (*report); classSummaries.SaveLLoydsBinsData (lloydsBinsFileName, sourceRootDirPath, lastScanLine, baseLLoydsBinSize); } /* RandomReport */
void CmdLineExpander::BuildCmdLineParameters (const VectorKKStr& argv) { kkuint32 x = 0; while (x < argv.size ()) { KKStr s = argv[x]; x++; KKStr sUpper = s.ToUpper(); if ((sUpper == "-L") || (sUpper == "-LOGFILE")) { if (x < argv.size ()) { if (argv[x][(kkint16)0] != '-') { logFileName = argv[x]; if (!logFileName.Empty ()) log.AttachFile (logFileName); x++; } } if (logFileName.Empty ()) { log.Level (-1) << std::endl << std::endl; log.Level (-1) << applicationName << " - Invalid Log File Parameter (-L)." << endl; log.Level (-1) << " Name of log file required." << endl; log.Level (-1) << endl; parmsGood = false; } } else if (sUpper == "-CMDFILE") { KKStr cmdFileName = ""; if (x < argv.size ()) { if (argv[x][(kkint16)0] != '-') { cmdFileName = argv[x]; x++; } } if (cmdFileName.Empty ()) { log.Level (-1) << endl << endl << endl << applicationName << " " << "BuildCmdLineParameters *** ERROR ***" << endl << endl << "-CMDFILE option did not define a file name." << endl << endl; parmsGood = false; } else { if (FileInStack (cmdFileName, cmdFileStack)) { log.Level (-1) << endl << endl << endl << applicationName << " BuildCmdLineParameters *** ERROR ***" << endl << endl << "-CMDFILE [" << cmdFileName << "] is being called recursively." << endl << endl; parmsGood = false; } else { bool validFile = true; cmdFileStack.push_back (cmdFileName); VectorKKStr cmdFileParameters; ExtractParametersFromFile (cmdFileName, cmdFileParameters, validFile); BuildCmdLineParameters (cmdFileParameters); cmdFileStack.pop_back (); if (!validFile) parmsGood = false; } } } else { expandedParameters.push_back (s); } } } /* BuildCmdLineParameters */
/** *@brief Constructs a Feature Encoder object. *@param[in] _param *@param[in] _fileDesc *@param[in] _log A log file stream. All important events will be output to this stream */ FeatureEncoder2::FeatureEncoder2 (const ModelParam& _param, FileDescConstPtr _fileDesc ): attributeVector (_fileDesc->AttributeVector ()), cardinalityDest (NULL), cardinalityVector (_fileDesc->CardinalityVector ()), codedNumOfFeatures (0), destFeatureNums (NULL), destWhatToDo (NULL), encodedFileDesc (NULL), encodingMethod (ModelParam::EncodingMethodType::NoEncoding), fileDesc (_fileDesc), numOfFeatures (0), srcFeatureNums (NULL), param (_param) { FeatureNumListConstPtr selectedFeatures = param.SelectedFeatures (); numOfFeatures = param.SelectedFeatures ()->NumOfFeatures (); encodingMethod = param.EncodingMethod (); srcFeatureNums = new kkuint16 [numOfFeatures]; cardinalityDest = new kkint32 [numOfFeatures]; destFeatureNums = new kkint32 [numOfFeatures]; destWhatToDo = new FeWhatToDo[numOfFeatures]; VectorKKStr destFieldNames; kkint32 x; for (x = 0; x < numOfFeatures; x++) { kkuint16 srcFeatureNum = (*selectedFeatures)[x]; srcFeatureNums [x] = srcFeatureNum; destFeatureNums [x] = codedNumOfFeatures; cardinalityDest [x] = 1; destWhatToDo [x] = FeWhatToDo::FeAsIs; Attribute srcAttribute = (fileDesc->Attributes ())[srcFeatureNum]; switch (encodingMethod) { case ModelParam::EncodingMethodType::Binary: if ((attributeVector[srcFeatureNum] == AttributeType::Nominal) || (attributeVector[srcFeatureNum] == AttributeType::Symbolic) ) { destWhatToDo [x] = FeWhatToDo::FeBinary; cardinalityDest [x] = cardinalityVector[srcFeatureNums [x]]; codedNumOfFeatures += cardinalityDest[x]; for (kkint32 zed = 0; zed < cardinalityDest[x]; zed++) { KKStr fieldName = srcAttribute.Name () + "_" + srcAttribute.GetNominalValue (zed); destFieldNames.push_back (fieldName); } } else { codedNumOfFeatures++; destWhatToDo [x] = FeWhatToDo::FeAsIs; destFieldNames.push_back (srcAttribute.Name ()); } break; case ModelParam::EncodingMethodType::Scaled: codedNumOfFeatures++; if ((attributeVector[srcFeatureNums[x]] == AttributeType::Nominal) || (attributeVector[srcFeatureNums[x]] == AttributeType::Symbolic) ) destWhatToDo [x] = FeWhatToDo::FeScale; else destWhatToDo [x] = FeWhatToDo::FeAsIs; destFieldNames.push_back (srcAttribute.Name ()); break; case ModelParam::EncodingMethodType::NoEncoding: default: codedNumOfFeatures++; destWhatToDo [x] = FeWhatToDo::FeAsIs; destFieldNames.push_back (srcAttribute.Name ()); break; } } encodedFileDesc = FileDesc::NewContinuousDataOnly (destFieldNames); }
void ClassificationBiasMatrix::ReadSimpleConfusionMatrix (istream& sr, MLClassListPtr fileClasses ) { // 'classes' - The class order that the owner of this object is expecting. // 'fileClasses' - The order that the classes are stored in the text file. if ((classes == NULL) || (fileClasses == NULL)) { KKStr errMsg = "ReadSimpleConfusionMatrix ***ERROR*** The 'Classes' line was never provided."; runLog.Level (-1) << errMsg << endl; valid = false; throw KKException (errMsg); } kkint32 classesColIdx = 0; char buff[10240]; KKStr l; while (!sr.eof ()) { sr.getline (buff, sizeof (buff)); l = buff; l.TrimLeft (); l.TrimRight (); if (l.CompareIgnoreCase ("</SimpleConfusionMatrix>") == 0) break; KKStr lineName = l.ExtractToken2 ("\t"); if (lineName.CompareIgnoreCase ("DataRow") == 0) { if (fileClasses == NULL) { KKStr errMsg = "ReadSimpleConfusionMatrix ***ERROR*** 'Classes' was not provided before 'DataRow'."; runLog.Level (-1) << errMsg << endl; valid = false; throw KKException (errMsg); } KKStr className = l.ExtractToken2 ("\t"); KKStr data = l.ExtractToken2 ("\t"); MLClassPtr pc = MLClass::CreateNewMLClass (className); kkint32 classesIdx = classes->PtrToIdx (pc); kkint32 fileClassesIdx = fileClasses->PtrToIdx (pc); if (classesIdx < 0) { KKStr errMsg = "ReadSimpleConfusionMatrix ***ERROR*** DataRow specifies class[" + className + "] which is not defined by caller"; runLog.Level (-1) << errMsg << endl; valid = false; throw KKException (errMsg); } if (fileClassesIdx < 0) { KKStr errMsg = "ReadSimpleConfusionMatrix ***ERROR*** DataRow specifies class[" + className + "] was not defined in 'Classes' line."; runLog.Level (-1) << errMsg << endl; valid = false; throw KKException (errMsg); } kkint32 classesRowIdx = classesIdx; VectorKKStr dataFields = data.Split (','); if (dataFields.size () != (kkuint32)numClasses) { KKStr errMsg = "ReadSimpleConfusionMatrix ***ERROR*** DataRow Class[" + className + "] number[" + StrFormatInt ((kkint32)dataFields.size (), "ZZZ0") + "] of values provided does not match number of Classes."; runLog.Level (-1) << errMsg << endl; valid = false; throw KKException (errMsg); } for (kkint32 c = 0; c < numClasses; c++) { pc = fileClasses->IdxToPtr (c); classesColIdx = classes->PtrToIdx (pc); VectorKKStr parts = dataFields[c].Split (':'); if (parts.size () > 1) { (*counts) [classesRowIdx][classesColIdx] = parts[0].ToDouble (); (*probabilities)[classesRowIdx][classesColIdx] = parts[1].ToDouble (); } } } } } /* ReadSimpleConfusionMatrix */
void RandomSplitJobManager::GenerateFinalResultsReport () { KKStr reportFileName = osGetRootName (ManagerName ()) + "_Results.html;"; ofstream f (reportFileName.Str ()); f << "Run Time Parameters" << endl << "Run Time" << "\t" << osGetLocalDateTime () << endl << "configFileName" << "\t" << configFileName << endl << "DataFileName" << "\t" << dataFileName << endl << "Format" << "\t" << format->DriverName () << endl << "DataIndexFileName" << "\t" << dataIndexFileName << endl << "NumFolds" << "\t" << numFolds << endl << "NumSplits" << "\t" << numSplits << endl << "splitFraction" << "\t" << splitFraction << endl << endl; KKJobList::const_iterator idx; ConfusionMatrix2 avgResults (*(this->MLClasses ())); KKB::uint x = 0; for (idx = Jobs ()->begin (); idx != Jobs ()->end (); idx++) { RandomSplitJobPtr j = dynamic_cast<RandomSplitJobPtr> (*idx); if (j->RandomSplitsResults () != NULL) { f << endl << "Random Split[" << j->SplitNum () << "]" << endl; j->RandomSplitsResults ()->PrintConfusionMatrixTabDelimited (f); f << endl << endl; j->PrintClassCounts (f); f << endl << endl; avgResults.AddIn (*(j->RandomSplitsResults ()), log); x++; } } f << endl << "Mean Average of all random Splits." << endl; avgResults.FactorCounts (1.0 / (double)x); avgResults.PrintConfusionMatrixTabDelimited (f); f << endl << endl << endl << endl << "Class Counts" << endl << endl; kkuint32 numClasses = (kkuint32)mlClasses->size (); VectorFloat classAccs; VectorDouble knownCounts; VectorDouble predCounts; VectorDouble adjCounts; VectorDouble adjCountsStdError; VectorDouble predDelta; VectorDouble adjDelta; KKStr l1, l2, l3; mlClasses->ExtractThreeTitleLines (l1, l2, l3); VectorKKStr knownCountLines; VectorKKStr predCountLines; VectorKKStr adjCountLines; VectorKKStr deltaPredCountLines; VectorKKStr deltaAdjCountLines; VectorKKStr accLines; ConfusionMatrix2 totalCM (*MLClasses ()); int totalCmCount = 0; // Known Counts for (idx = Jobs ()->begin (); idx != Jobs ()->end (); idx++) { RandomSplitJobPtr j = dynamic_cast<RandomSplitJobPtr> (*idx); if (j->RandomSplitsResults () != NULL) { KKStr splitNumStr = StrFormatInt (j->SplitNum (), "ZZZ0"); j->GetClassCounts (classAccs, knownCounts, predCounts, adjCounts, adjCountsStdError, predDelta, adjDelta); totalCM.AddIn (*(j->RandomSplitsResults ()), log); totalCmCount++; KKStr accLine = "Acc By Class\t" + splitNumStr; KKStr knownLine = "Known\t" + splitNumStr; KKStr predLine = "Predicted\t" + splitNumStr; KKStr adjLine = "Adjusted\t" + splitNumStr; KKStr deltaPredLine = "Delta Pred\t" + splitNumStr; KKStr deltaAdjLine = "Delta Adj\t" + splitNumStr; double totalAcc = 0.0; double totalDeltaPred = 0.0; double totalDeltaAdj = 0.0; for (x = 0; x < numClasses; x++) { accLine << "\t" << StrFormatDouble (classAccs [x], "zz0.00") << "%"; knownLine << "\t" << StrFormatDouble (knownCounts [x], "-Z,ZZZ,ZZ0.0"); predLine << "\t" << StrFormatDouble (predCounts [x], "-Z,ZZZ,ZZ0.0"); adjLine << "\t" << StrFormatDouble (adjCounts [x], "-Z,ZZZ,ZZ0.0"); deltaPredLine << "\t" << StrFormatDouble (predDelta [x], "-Z,ZZZ,ZZ0.0"); deltaAdjLine << "\t" << StrFormatDouble (adjDelta [x], "-Z,ZZZ,ZZ0.0"); totalAcc += classAccs [x]; totalDeltaPred += fabs (predDelta[x]); totalDeltaAdj += fabs (adjDelta[x]); } accLine << "\t" << StrFormatDouble ((totalAcc / (double)classAccs.size ()), "ZZ0.00") << "%"; deltaPredLine << "\t" << StrFormatDouble ((totalDeltaPred / (double)predDelta.size ()), "ZZ0.00"); deltaAdjLine << "\t" << StrFormatDouble ((totalDeltaAdj / (double)adjDelta.size ()), "ZZ0.00"); accLines.push_back (accLine); knownCountLines.push_back (knownLine); predCountLines.push_back (predLine); adjCountLines.push_back (adjLine); deltaPredCountLines.push_back (deltaPredLine); deltaAdjCountLines.push_back (deltaAdjLine); } } double factor = 0.0; if (totalCmCount > 0) factor = 1.0 / (double)totalCmCount; totalCM.FactorCounts (factor); f << endl << endl << "Average Confusion Matrix" << endl << endl; totalCM.PrintConfusionMatrixTabDelimited (f); f << "" << "\t" << "" << "\t" << l1 << endl << "" << "\t" << "Split" << "\t" << l2 << endl << "Description" << "\t" << "Num" << "\t" << l3 << endl; f << endl << endl; for (x = 0; x < knownCountLines.size (); x++) f << knownCountLines[x] << endl; f << endl << endl; for (x = 0; x < predCountLines.size (); x++) f << predCountLines[x] << endl; f << endl << endl; for (x = 0; x < adjCountLines.size (); x++) f << adjCountLines[x] << endl; f << endl << endl; for (x = 0; x < deltaPredCountLines.size (); x++) f << deltaPredCountLines[x] << endl; f << endl << endl; for (x = 0; x < deltaAdjCountLines.size (); x++) f << deltaAdjCountLines[x] << endl; f << endl << endl; for (x = 0; x < knownCountLines.size (); x++) f << accLines[x] << endl; VectorFloat avgAccuracies = totalCM.AccuracyByClass (); f << "Avg-Accuracies"; for (x = 0; x < avgAccuracies.size (); x++) f << "\t" << StrFormatDouble (avgAccuracies[x], "zz0.00") << "%"; f << "\t" << StrFormatDouble (totalCM.Accuracy (), "zz0.00") << "%"; f << endl; f << endl << endl; f.close (); } /* GenerateFinalResultsReport */