LLoydsEntryPtr OurNeighbors::DeriveLLoydsBins (const ImageFeaturesList& examples, kkint32 lloydsBinSize ) { double lloydsIndex = 0.0; kkint32 numLLoydsBins = kkint32 (lastScanLine + lloydsBinSize - 1) / kkint32 (lloydsBinSize); VectorIntPtr lloydsBins = new VectorInt (numLLoydsBins, 0); ImageFeaturesList::const_iterator idx; for (idx = examples.begin (); idx != examples.end (); idx++) { const ImageFeaturesPtr i = *idx; kkint32 lloydsBin = kkint32 (i->SfCentroidRow () / double (lloydsBinSize)); if (lloydsBin >= numLLoydsBins) { // This can not happen; but if it does; then I must of screwed up the programming. log.Level (-1) << endl << endl << endl << "OurNeighbors::DeriveLLoydsBins **** ERROR ****" << endl << endl << " An invalid lloydsBin[" << lloydsBin << "] was derived." << endl << " must be in range of [0 - " << (numLLoydsBins - 1) << "]" << endl << endl; lloydsBin = numLLoydsBins - 1; } (*lloydsBins)[lloydsBin]++; } lloydsIndex = LLoydsIndexOfPatchiness (*lloydsBins); return new LLoydsEntry (lloydsBinSize, lloydsBins, lloydsIndex); } /* DeriveLLoydsBins */
kkint32 OurNeighbors::LastScanLine (const ImageFeaturesList& images) const { kkint32 lastScanLine = 0; ImageFeaturesList::const_iterator idx; for (idx = images.begin (); idx != images.end (); idx++) { const ImageFeaturesPtr image = *idx; if (image->SfCentroidRow () > lastScanLine) lastScanLine = (kkint32)(image->SfCentroidRow () + 0.5f); } return lastScanLine; } /* LastScanLine */
SipperFileListPtr GetListOfSipperFiles (DataBasePtr dbConn, ImageFeaturesListPtr examples, RunLog& log ) { map<KKStr,KKStr> sipperFiles; map<KKStr,KKStr>::iterator sipperFilesIdx; SipperFileListPtr files = new SipperFileList (true); ImageFeaturesList::iterator idx; for (idx = examples->begin (); idx != examples->end (); idx++) { ImageFeaturesPtr i = *idx; KKStr imageFileName = i->ExampleFileName (); KKStr sipperFileName; kkuint32 scanLineNum = 0; kkuint32 scanCol = 0; PicesVariables::ParseImageFileName (imageFileName, sipperFileName, scanLineNum, scanCol); sipperFilesIdx = sipperFiles.find (sipperFileName); if (sipperFilesIdx == sipperFiles.end ()) { sipperFiles.insert (pair<KKStr, KKStr>(sipperFileName, sipperFileName)); SipperFilePtr sf = dbConn->SipperFileRecLoad (sipperFileName); if (!sf) { sf = new SipperFile (sipperFileName); sf->CtdExt0 ("TRN"); sf->CtdExt1 ("OXG"); sf->CtdExt2 ("FLO"); sf->CtdExt3 ("TUR"); sf->Sp0 (Instrument::LookUpByShortName ("CTD")); sf->Sp1 (Instrument::LookUpByShortName ("P-R")); sf->Sp2 (Instrument::LookUpByShortName ("BAY")); dbConn->SipperFileInsert (*sf); } files->PushOnBack (sf); } } return files; } /* GetListOfSipperFiles */
void OurNeighbors::RemoveExcludedClasses (ImageFeaturesListPtr& examples) { bool keepClass = true; MLClassPtr oldClass = NULL; examples->SortByClass (); ImageFeaturesListPtr examplesToKeep = new ImageFeaturesList (examples->FileDesc (), true // true = We will own images, ); examples->Owner (false); ImageFeaturesList::iterator idx; for (idx = examples->begin (); idx != examples->end (); idx++) { ImageFeaturesPtr i = *idx; if (oldClass != i->MLClass ()) { oldClass = i->MLClass (); keepClass = (excludedClasses->LookUpByName (oldClass->Name ()) == NULL); } if (keepClass) { examplesToKeep->PushOnBack (i); } else { delete i; } } delete examples; examples = examplesToKeep; } /* RemoveExcludedClasses */
void ParameterSearchBinaryCombo::ProcessTwoClassCombo (ParameterProcessPtr curProcess) { log.Level (10) << "ParameterSearchBinaryCombo::ProcessClassCombo - Started" << endl; ImageFeaturesListPtr twoClassImages = new ImageFeaturesList (false, log, 5000); { ImageFeaturesPtr image = NULL; ImageFeaturesListIterator idx (images); for (idx.Reset (); image = idx.CurPtr (); ++idx) { if ((image->ImageClass () == curProcess->Class0 ()) || (image->ImageClass () == curProcess->Class1 ()) ) { twoClassImages->PushOnBack (image); } } } ImageClassListPtr twoClasses = new ImageClassList (false, log); twoClasses->PushOnBack (curProcess->Class0 ()); twoClasses->PushOnBack (curProcess->Class1 ()); bool weAreAllDone = false; while (!weAreAllDone) { #ifndef WIN32 double la; if (LoadAverage () > 7.2) { while ((la = LoadAverage ()) > 6.6) { // The system is pretty Busy so to be Fair to everyone else lets go to sleep // for a while. log.Level (20) << "ParameterSearchBinaryCombo::LetsDoIt - Load Average[" << la << "] is greater than 4.8." << endl; log.Level (20) << " Will go to sleep for one minute and try again." << endl; weWereSleeping = true; int randSleepTime = rand () % 20; sleep (20 + randSleepTime); } } weWereSleeping = false; #endif ParameterJobPtr curJob = new ParameterJob (log, ourProcessNum, curProcess->JobId (), curProcess->CParm (), curProcess->GammaParm (), curProcess->AParm () ); ProcessNextJob (curProcess, curJob, twoClassImages, twoClasses); delete curJob; curJob = NULL; if (curProcess->CParm () > 17000) { weAreAllDone = true; } } delete twoClasses; delete twoClassImages; } /* ProcessTwoClassCombo */
void GradeClassification::GradeExamplesAgainstGroundTruth (FeatureVectorListPtr examplesToGrade, FeatureVectorListPtr groundTruth ) { log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth" << endl; groundTruth->SortByRootName (); MLClassConstPtr unknownClass = mlClasses->GetUnKnownClass (); MLClassConstListPtr classes = NULL; { MLClassConstListPtr examplesToGradeClasses = examplesToGrade->ExtractMLClassConstList (); MLClassConstListPtr groundTruthClasses = groundTruth->ExtractMLClassConstList (); classes = MLClassConstList::MergeClassList (*examplesToGradeClasses, *groundTruthClasses); delete examplesToGradeClasses; delete groundTruthClasses; } uint16 maxHierarchialLevel = 0; { MLClassConstList::iterator idx; for (idx = classes->begin (); idx != classes->end (); idx++) { MLClassConstPtr c = *idx; maxHierarchialLevel = Max (maxHierarchialLevel, c->NumHierarchialLevels ()); } } // Create ConfusionMatrix objects for each posible level of Hierarchy. The 'resultsSummary' vector will // end up owning the instances of 'ConfusionMatrix2' and th edestructr will be responable for deleting them. uint curLevel = 0; vector<ConfusionMatrix2Ptr> cmByLevel; for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { MLClassConstListPtr classesThisLevel = classes->ExtractListOfClassesForAGivenHierarchialLevel (curLevel); ConfusionMatrix2Ptr cm = new ConfusionMatrix2 (*classesThisLevel); cmByLevel.push_back (cm); } ConfusionMatrix2 cm (*classes); ImageFeaturesList::iterator idx; for (idx = examplesToGrade->begin (); idx != examplesToGrade->end (); idx++) { ImageFeaturesPtr exampleToGrade = *idx; MLClassConstPtr predictedClass = exampleToGrade->MLClass (); float origSize = exampleToGrade->OrigSize (); float probability = exampleToGrade->Probability (); KKStr rootName = osGetRootName (exampleToGrade->ImageFileName ()); FeatureVectorPtr groundTruthExample = groundTruth->LookUpByRootName (rootName); MLClassConstPtr groundTruthClass = unknownClass; if (groundTruthExample) groundTruthClass = groundTruthExample->MLClass (); cm.Increment (groundTruthClass, predictedClass, (int)origSize, probability, log); for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { MLClassConstPtr groundTruthClasssThisLevel = groundTruthClass->MLClassForGivenHierarchialLevel (curLevel); MLClassConstPtr predictedClassThisLevel = predictedClass->MLClassForGivenHierarchialLevel (curLevel); cmByLevel[curLevel]->Increment (groundTruthClasssThisLevel, predictedClassThisLevel, (int)origSize, probability, log); } } //cm.PrintTrueFalsePositivesTabDelimited (*report); { // report Hierarchial results for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth Printing Level[" << curLevel << "]" << endl; *report << endl << endl << endl << "Confusion Matrix Training Level[" << maxHierarchialLevel << "] Preduction Level[" << (curLevel + 1) << "]" << endl << endl; cmByLevel[curLevel]->PrintConfusionMatrixTabDelimited (*report); resultsSummary.push_back (SummaryRec (maxHierarchialLevel, curLevel + 1, cmByLevel[curLevel])); } *report << endl << endl << endl; } log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth Exiting" << endl; } /* GradeExamplesAgainstGroundTruth */
void OurNeighbors::LookForNeighbors () { ImageFeaturesListPtr currentImageFeatures = NULL; KKStr relativeDir; log.Level (10) << "OurNeighbors::LookForNeighbors" << endl; /* * create an image feature list from the source directory that corresponds to the * current locations of the actual image files. Where possible, the feature data * file will be used. However, if an image has been moved it's features will have * to be recalculated (which is handled by the function call) and we'll have to * look in the origImageFeatures list for the original predicted class. We must do * this since the predicted class for an image file should NEVER change between * classification runs. */ FeatureFileIOPices::Driver ()->LoadInSubDirectoryTree (PicesFVProducerFactory::Factory (&log), sourceRootDirPath, *mlClasses, false, // useDirectoryNameForClassName, DB (), cancelFlag, false, // rewiteRootFeatureFile log ); currentImageFeatures->FixSipperFileScanLineAndColFields (); lastScanLine = LastScanLine (*currentImageFeatures); { // Make sure Class name matches subdirectory that Example was found in. ImageFeaturesList::iterator idx; for (idx = currentImageFeatures->begin (); idx != currentImageFeatures->end (); idx++) { ImageFeaturesPtr image = *idx; MLClassPtr mlClass = DetermineClassFromFileName (image->ExampleFileName ()); if (mlClass) image->MLClass (mlClass); } } if (excludedClasses) { if (excludedClasses->QueueSize () > 0) RemoveExcludedClasses (currentImageFeatures); } //if (randomizeLocations) // RandomizeLocations (*currentImageFeatures); if (!fromPlanktonName.Empty ()) { fromPlankton = mlClasses->LookUpByName (fromPlanktonName); if (!fromPlankton) { log.Level (-1) << endl << endl << endl << "LookForNeighbors ****** ERROR *******" << endl << endl << "No images that are of PlanktonName[" << fromPlanktonName << "]" << endl << endl; osWaitForEnter (); exit (-1); } } // We will now build Neighbor List NeighborList neighbors (*currentImageFeatures, log); neighbors.FindNearestNeighbors (neighborType, fromPlankton); double allClassesMeanNND = 0.0f; double allClassesMeanStdDev = 0.0f; double allClassesMinDist = 0.0f; double allClassesMaxDist = 0.0f; neighbors.CalcStatistics (allClassesMeanNND, allClassesMeanStdDev, allClassesMinDist, allClassesMaxDist ); if (fromPlankton) neighbors.ReportClassRowRestricted (mlClasses, *report, fromPlankton); else neighbors.ReportClassRow (mlClasses, *report); neighbors.ReportClassNeighbor (mlClasses, *report); if (randomizeLocations) RandomReport (*currentImageFeatures); log.Level (10) << "OurNeighbors::LookForNeighbors Exiting" << endl; } /* LookForNeighbors */
void DeleteDuplicateImages::DeleteImages () { PrintStandardHeaderInfo (*r); *r << "Root Directory :" << rootDir << "]" << endl; *r << "Report File :" << reportFileName << "]" << endl; *r << endl; ImageFeaturesListPtr images = FeatureFileIOPices::Driver ()->LoadInSubDirectoryTree (PicesFVProducerFactory::Factory (&log), rootDir, mlClasses, false, // false = DONT _useDirectoryNameForClassName DB (), cancelFlag, false, // false = DONT _rewiteRootFeatureFile log ); KKStr rootFeatureFileName = osAddSlash (rootDir) + osGetRootName (rootDir) + ".data"; //images->WriteImageFeaturesToFile (rootFeatureFileName, RawFormat, FeatureNumList::AllFeatures ()); bool successful = false; uint numExamplesWritten = 0; FeatureFileIOPices::Driver ()->SaveFeatureFile (rootFeatureFileName, images->AllFeatures (), *images, numExamplesWritten, cancelFlag, successful, log); *r << "Class Statistics" << endl; *r << images->ClassStatisticsStr (); *r << endl; // We can now look for duplicates in list. DuplicateImagesPtr dupLocator = new DuplicateImages (images, log); DuplicateImageListPtr setsOfDupImages = dupLocator->DupExamples (); *r << "Number of Duplicate Sets [" << setsOfDupImages->QueueSize () << "]" << endl; *r << endl; int dupsDeleted = 0; int dupsFailedToDelete = 0; DuplicateImageList::iterator dsIDX; for (dsIDX = setsOfDupImages->begin (); dsIDX != setsOfDupImages->end (); ++dsIDX) { DuplicateImagePtr dupSet = *dsIDX; ImageFeaturesListPtr dups = new ImageFeaturesList (*(dupSet->DuplicatedImages ()), false); ImageFeaturesPtr imageToKeep = (ImageFeaturesPtr)dupSet->ExampleWithSmallestScanLine (); ImageFeaturesPtr firstImage = NULL; bool allTheSameClass = dupSet->AllTheSameClass (); if (!allTheSameClass) { imageToKeep = NULL; *r << endl << "Images in this set of duplicates are of different classes, will delete all of them." << endl; } else { *r << endl << "Duplicate Class[" << imageToKeep->MLClassName () << "] FileName[" << imageToKeep->ExampleFileName () << "] Keeping." << endl; } for (ImageFeaturesList::iterator iIDX = dups->begin (); iIDX != dups->end (); iIDX++) { ImageFeaturesPtr i = *iIDX; if (!firstImage) firstImage = i; if (i != imageToKeep) { KKStr fullFileName = osAddSlash (rootDir) + i->ExampleFileName (); *r << " Class[" << i->MLClassName () << "] FileName[" << i->ExampleFileName () << "] "; if (!duplicateImagesDir.Empty ()) { // We will move image to duplicates directory int dupCount = 0; KKStr newFileName; do { if (imageToKeep) { newFileName = osAddSlash (duplicateImagesDir) + osGetRootName (imageToKeep->ExampleFileName ()) + "-" + osGetRootName (i->ExampleFileName ()); } else { newFileName = osAddSlash (duplicateImagesDir) + osGetRootName (firstImage->ExampleFileName ()) + "-" + osGetRootName (i->ExampleFileName ()); } if (dupCount > 0) newFileName << "-" << dupCount; newFileName << ".bmp"; dupCount++; } while (osFileExists (newFileName)); osCopyFile (fullFileName, newFileName); *r << "moved to[" << newFileName << "] "; cout << "Coping [" << fullFileName << "] to Duplicates List" << endl; } bool deleted = osDeleteFile (fullFileName); if (deleted) { *r << "* DELETED *" << endl; dupsDeleted++; images->DeleteEntry (i); } else { *r << "--- Failed To Delete ---" << endl; dupsFailedToDelete++; } } } } *r << endl; *r << "Duplicates Successfully Deleted[" << dupsDeleted << "]" << endl; *r << "Duplicates Failed to Delete [" << dupsFailedToDelete << "]" << endl; *r << endl; // We will write out the feature file again with the dup images removed. //images->WriteImageFeaturesToFile (rootFeatureFileName, RawFormat, FeatureNumList::AllFeatures ()); numExamplesWritten= 0; FeatureFileIOPices::Driver ()->SaveFeatureFile (rootFeatureFileName, images->AllFeatures (), *images, numExamplesWritten, cancelFlag, successful, log); delete dupLocator; dupLocator = NULL; delete images; images = NULL; } /* DeleteDuplicateImages */