MLClassListPtr FeatureVectorList::ExtractListOfClasses () const { MLClassPtr lastClass = NULL; map<MLClassPtr,MLClassPtr> ptrIndex; map<MLClassPtr,MLClassPtr>::iterator ptrIndexItr; FeatureVectorList::const_iterator idx; for (idx = begin (); idx != end (); ++idx) { FeatureVectorPtr example = *idx; MLClassPtr newClass = example->MLClass (); if (newClass == lastClass) continue; lastClass = newClass; ptrIndexItr = ptrIndex.find (newClass); if (ptrIndexItr == ptrIndex.end ()) { lastClass = newClass; ptrIndex.insert (pair<MLClassPtr,MLClassPtr> (newClass, newClass)); } } MLClassListPtr classes = new MLClassList (); for (ptrIndexItr = ptrIndex.begin (); ptrIndexItr != ptrIndex.end (); ++ptrIndexItr) classes->PushOnBack (ptrIndexItr->first); return classes; } /* ExtractListOfClasses */
FeatureVectorListPtr FeatureEncoder::CreateEncodedFeatureVector (FeatureVectorList& srcData) { if (srcData.AllFieldsAreNumeric ()) return srcData.DuplicateListAndContents (); FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (destFileDesc, true); FeatureVectorList::iterator idx; for (idx = srcData.begin (); idx != srcData.end (); idx++) { FeatureVectorPtr srcExample = *idx; XSpacePtr encodedData = EncodeAExample (srcExample); kkint32 zed = 0; FeatureVectorPtr encodedFeatureVector = new FeatureVector (codedNumOfFeatures); while (encodedData[zed].index != -1) { encodedFeatureVector->AddFeatureData (encodedData[zed].index, (float)encodedData[zed].value); zed++; } encodedFeatureVector->MLClass (srcExample->MLClass ()); encodedFeatureVectorList->PushOnBack (encodedFeatureVector); delete encodedData; encodedData = NULL; } return encodedFeatureVectorList; } /* CreateEncodedFeatureVector */
void FeatureVectorList::ResetNumOfFeaturs (kkint32 newNumOfFeatures) { numOfFeatures = newNumOfFeatures; for (iterator idx = begin (); idx != end (); idx++) { FeatureVectorPtr i = *idx; i->ResetNumOfFeatures (newNumOfFeatures); } } /* ResetNumOfFeaturs */
kkMemSize FeatureVectorList::MemoryConsumedEstimated () const { kkMemSize memoryConsumedEstimated = sizeof (FeatureVectorList) + fileName.MemoryConsumedEstimated (); FeatureVectorList::const_iterator idx; for (idx = begin (); idx != end (); ++idx) { FeatureVectorPtr fv = *idx; memoryConsumedEstimated += fv->MemoryConsumedEstimated (); } return memoryConsumedEstimated; } /* MemoryConsumedEstimated */
void Strip () { bool cancelFlag = false; bool successful = false; bool changesMade = false; RunLog log; FeatureFileIOPtr driver = FeatureFileIO::FileFormatFromStr ("C45"); MLClassList mlClasses; FeatureVectorListPtr data = driver->LoadFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedDataNorm.data", mlClasses, -1, cancelFlag, successful, changesMade, log ); FeatureVectorListPtr stripped = new FeatureVectorList (data->FileDesc (), false); FeatureVectorList::const_iterator idx; for (idx = data->begin (); idx != data->end (); ++idx) { FeatureVectorPtr fv = *idx; KKStr fn = fv->ExampleFileName (); if (fn.StartsWith ("SML") || (fn.StartsWith ("SMP"))) { } else { stripped->PushOnBack (fv); } } kkuint32 numExamplesWritten = 90; driver->SaveFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedData1209.data", data->AllFeatures (), *stripped, numExamplesWritten, cancelFlag, successful, log ); }
void FeatureVectorList::ResetFileDesc (FileDescConstPtr newFileDesc) { KKCheck (fileDesc, "FeatureVector::ResetFileDesc ***ERROR*** newFileDesc == NULL.") fileDesc = newFileDesc; numOfFeatures = fileDesc->NumOfFields (); for (iterator idx = begin (); idx != end (); idx++) { FeatureVectorPtr i = *idx; i->ResetNumOfFeatures (numOfFeatures); } } /* ResetFileDesc */
kkint32 ImageFeaturesNodeKey::CompareTwoExamples (const FeatureVectorPtr i1, const FeatureVectorPtr i2 ) const { const float* f1 = i1->FeatureDataConst (); const float* f2 = i2->FeatureDataConst (); for (kkint32 x = 0; x < i1->NumOfFeatures (); x++) { if (f1[x] < f2[x]) return -1; else if (f1[x] > f2[x]) return 1; } return 0; } /* CompareTwoImageFeaturesObjects */
FeatureVectorListPtr FeatureEncoder2::EncodedFeatureVectorList (const FeatureVectorList& srcData) const { if (srcData.AllFieldsAreNumeric ()) return srcData.DuplicateListAndContents (); FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (encodedFileDesc, true); FeatureVectorList::const_iterator idx; for (idx = srcData.begin (); idx != srcData.end (); idx++) { FeatureVectorPtr srcExample = *idx; FeatureVectorPtr encodedFeatureVector = EncodeAExample (srcExample); encodedFeatureVector->MLClass (srcExample->MLClass ()); encodedFeatureVectorList->PushOnBack (encodedFeatureVector); } return encodedFeatureVectorList; } /* EncodedFeatureVectorList */
kkint32 FeatureEncoder::DetermineNumberOfNeededXspaceNodes (FeatureVectorListPtr src) const { kkint32 xSpaceNodesNeeded = 0; FeatureVectorList::const_iterator idx; for (idx = src->begin (); idx != src->end (); ++idx) { FeatureVectorPtr fv = *idx; const float* featureData = fv->FeatureData (); for (kkint32 x = 0; x < numOfFeatures; x++) { float featureVal = featureData [srcFeatureNums[x]]; kkint32 y = destFeatureNums[x]; switch (destWhatToDo[x]) { case FeWhatToDo::FeAsIs: if (featureVal != 0.0) xSpaceNodesNeeded++; break; case FeWhatToDo::FeBinary: for (kkint32 z = 0; z < cardinalityDest[x]; z++) { float bVal = ((kkint32)featureVal == z); if (bVal != 0.0) xSpaceNodesNeeded++; y++; } break; case FeWhatToDo::FeScale: if (featureVal != (float)0.0) xSpaceNodesNeeded++; break; } } xSpaceNodesNeeded++; } return xSpaceNodesNeeded; } /* DetermineNumberOfNeededXspaceNodes */
void FeatureVectorList::RemoveEntriesWithMissingFeatures (RunLog& log) { log.Level (50) << "FeatureVectorList::RemoveEntriesWithMissingFeatures" << endl; vector<FeatureVectorPtr> entriesToBeDeleted; for (iterator idx = begin (); idx != end (); idx++) { FeatureVectorPtr example = *idx; if (example->MissingData ()) entriesToBeDeleted.push_back (example); } for (kkint32 x = 0; x < (kkint32)entriesToBeDeleted.size (); x++) { FeatureVectorPtr example = entriesToBeDeleted[x]; DeleteEntry (example); if (Owner ()) delete example; } } /* RemoveEntriesWithMissingFeatures */
void SplitForestCoverFile () { RunLog log; MLClassConstList mlClasses; bool cancelFlag = false; bool successful; bool changesMade = false; FeatureVectorListPtr images = FeatureFileIOC45::Driver ()->LoadFeatureFile ("covtype_alpha.data", mlClasses, -1, cancelFlag, successful, changesMade, log); FileDescPtr fileDesc = images->FileDesc (); images->RandomizeOrder (); images->RandomizeOrder (); images->RandomizeOrder (); images->RandomizeOrder (); images->RandomizeOrder (); MLClassConstPtr lodgepolePine = mlClasses.GetMLClassPtr ("Lodgepole_Pine"); MLClassConstPtr spruceFir = mlClasses.GetMLClassPtr ("Spruce_Fir"); int lodgepolePineTrainCount = 0; int spruceFirTrainCount = 0; FeatureVectorList::iterator idx; FeatureVectorListPtr trainData = new FeatureVectorList (fileDesc, false, log, 10000); FeatureVectorListPtr testData = new FeatureVectorList (fileDesc, false, log, 10000); int c = 0; for (idx = images->begin (); idx != images->end (); idx++) { FeatureVectorPtr i = *idx; if (c % 5000) cout << c << endl; if (i->MLClass () == lodgepolePine) { if (lodgepolePineTrainCount < 56404) { trainData->PushOnBack (i); lodgepolePineTrainCount++; } else { testData->PushOnBack (i); } } else if (i->MLClass () == spruceFir) { if (spruceFirTrainCount < 42480) { trainData->PushOnBack (i); spruceFirTrainCount++; } else { testData->PushOnBack (i); } } c++; } KKU::uint numExamplesWritten = 0; FeatureFileIOC45::Driver ()->SaveFeatureFile ("CovType_TwoClass.data", trainData->AllFeatures (), *trainData, numExamplesWritten, cancelFlag, successful, log ); FeatureFileIOC45::Driver ()->SaveFeatureFile ("CovType_TwoClass.test", testData->AllFeatures (), *testData, numExamplesWritten, cancelFlag, successful, log ); delete trainData; delete testData; delete images; } /* SplitForestCoverFile */
void FeatureFileConverter::ConvertData () { cout << endl << "Saving [" << data->QueueSize () << "] records to data file[" << destFileName << "]" << endl << endl; bool successful = false; int numOfFeatures = data->NumOfFeatures (); int numWithAllZeros = 0; { FeatureVectorListPtr newData = new FeatureVectorList (srcFileDesc, true, log); // Will store examples that have all zero's for all features in "zeroData" // container. This way they can be deleted from memory later and not result // in a memory leak. This has to be done because they are not going to // be placed into newData which is going to become the owner of all the // examples. FeatureVectorListPtr zeroData = new FeatureVectorList (srcFileDesc, true, log); // How many have all 0's for feature data. FeatureVectorList::iterator idx; for (idx = data->begin (); idx != data->end (); idx++) { FeatureVectorPtr i = *idx; bool allZeros = true; for (int featureNum = 0; featureNum < numOfFeatures; featureNum++) { allZeros = (i->FeatureData (featureNum) == 0.0f); if (!allZeros) break; } if (allZeros) { numWithAllZeros++; zeroData->PushOnBack (i); } else { newData->PushOnBack (i); } } data->Owner (false); delete data; data = newData; delete zeroData; } *report << endl << endl << "Num of data items with all zero feature data [" << numWithAllZeros << "]" << endl << endl; *report << data->ClassStatisticsStr (); *report << endl << endl << endl; if (statistics) { *report << "Class Statistics:" << endl; data->PrintClassStatistics (*report); *report << endl << endl; *report << "Feature Statistics:" << endl; data->PrintFeatureStatisticsByClass (*report); } if (enumerateClasses) { // We are going to change the name of the classes to numbers enumberated by className MLClassConstListPtr mlClasses = data->ExtractMLClassConstList (); mlClasses->SortByName (); MLClassConstListPtr newClassNames = new MLClassConstList (); int classIdx = 0; MLClassConstList::iterator idx; for (idx = mlClasses->begin (); idx != mlClasses->end (); idx++) { KKStr newName = StrFormatInt (classIdx, "zzz0"); MLClassConstPtr mlClass = newClassNames->GetMLClassPtr (newName); classIdx++; } FeatureVectorList::iterator idx2; for (idx2 = data->begin (); idx2 != data->end (); idx2++) { MLClassConstPtr c = (*idx2)->MLClass (); int classIndex = mlClasses->PtrToIdx (c); (*idx2)->MLClass (newClassNames->IdxToPtr (classIndex)); } delete mlClasses; mlClasses = NULL; delete newClassNames; newClassNames = NULL; } if (encodeFeatureData) { EncodeFeatureData (); } else { uint numExamplesWritten = 0; destFileFormat->SaveFeatureFile (destFileName, *features, *data, numExamplesWritten, cancelFlag, successful, log ); } } /* ConvertData */
void GradeClassification::ValidateThatBothListsHaveSameEntries (FeatureVectorList& groundTruth, FeatureVectorList& examplesToGrade, bool& theyAreTheSame ) { theyAreTheSame = true; // We will assume that they are the same to start with. int missingExamplesToGrade = 0; int missingGroundTruthExamples = 0; groundTruth.SortByRootName (); examplesToGrade.SortByRootName (); *report << endl << endl << endl << "Missing Examples To Grade" << endl << "=========================" << endl; ImageFeaturesList::iterator idx; for (idx = groundTruth.begin (); idx != groundTruth.end (); idx++) { FeatureVectorPtr groundTruthExample = *idx; KKStr rootName = osGetRootName (groundTruthExample->ImageFileName ()); FeatureVectorPtr exampleToGrade = examplesToGrade.LookUpByRootName (rootName); if (!exampleToGrade) { theyAreTheSame = false; *report << rootName << "\t" << "*** MISSING ***" << "\t" << groundTruthExample->ImageFileName () << endl; missingExamplesToGrade++; } } if (missingExamplesToGrade == 0) { *report << " *** None ***" << endl; } *report << endl << endl << endl << "Missing Ground Truth Examples" << endl << "=============================" << endl; for (idx = examplesToGrade.begin (); idx != examplesToGrade.end (); idx++) { FeatureVectorPtr exampleToGrade = *idx; KKStr rootName = osGetRootName (exampleToGrade->ImageFileName ()); FeatureVectorPtr groundTruthExample = groundTruth.LookUpByRootName (rootName); if (!groundTruthExample) { theyAreTheSame = false; *report << rootName << "\t" << "*** MISSING ***" << "\t" << exampleToGrade->ImageFileName () << "\t" << endl; missingGroundTruthExamples++; } } if (missingGroundTruthExamples == 0) { *report << " *** None ***" << endl; } } /* ValidateThatBothListsHaveSameEntries */
void FeatureVectorList::PushOnFront (FeatureVectorPtr example) { KKCheck (example->NumOfFeatures () == numOfFeatures, "FeatureVectorList::PushOnFront Mismatch numOfFeatures: " << numOfFeatures << " example->NumOfFeaturess: " << example->NumOfFeatures ()) KKQueue<FeatureVector>::PushOnFront (example); curSortOrder = IFL_SortOrder::IFL_UnSorted; } /* PushOnFront */
void GradeClassification::GradeExamplesAgainstGroundTruth (FeatureVectorListPtr examplesToGrade, FeatureVectorListPtr groundTruth ) { log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth" << endl; groundTruth->SortByRootName (); MLClassConstPtr unknownClass = mlClasses->GetUnKnownClass (); MLClassConstListPtr classes = NULL; { MLClassConstListPtr examplesToGradeClasses = examplesToGrade->ExtractMLClassConstList (); MLClassConstListPtr groundTruthClasses = groundTruth->ExtractMLClassConstList (); classes = MLClassConstList::MergeClassList (*examplesToGradeClasses, *groundTruthClasses); delete examplesToGradeClasses; delete groundTruthClasses; } uint16 maxHierarchialLevel = 0; { MLClassConstList::iterator idx; for (idx = classes->begin (); idx != classes->end (); idx++) { MLClassConstPtr c = *idx; maxHierarchialLevel = Max (maxHierarchialLevel, c->NumHierarchialLevels ()); } } // Create ConfusionMatrix objects for each posible level of Hierarchy. The 'resultsSummary' vector will // end up owning the instances of 'ConfusionMatrix2' and th edestructr will be responable for deleting them. uint curLevel = 0; vector<ConfusionMatrix2Ptr> cmByLevel; for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { MLClassConstListPtr classesThisLevel = classes->ExtractListOfClassesForAGivenHierarchialLevel (curLevel); ConfusionMatrix2Ptr cm = new ConfusionMatrix2 (*classesThisLevel); cmByLevel.push_back (cm); } ConfusionMatrix2 cm (*classes); ImageFeaturesList::iterator idx; for (idx = examplesToGrade->begin (); idx != examplesToGrade->end (); idx++) { ImageFeaturesPtr exampleToGrade = *idx; MLClassConstPtr predictedClass = exampleToGrade->MLClass (); float origSize = exampleToGrade->OrigSize (); float probability = exampleToGrade->Probability (); KKStr rootName = osGetRootName (exampleToGrade->ImageFileName ()); FeatureVectorPtr groundTruthExample = groundTruth->LookUpByRootName (rootName); MLClassConstPtr groundTruthClass = unknownClass; if (groundTruthExample) groundTruthClass = groundTruthExample->MLClass (); cm.Increment (groundTruthClass, predictedClass, (int)origSize, probability, log); for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { MLClassConstPtr groundTruthClasssThisLevel = groundTruthClass->MLClassForGivenHierarchialLevel (curLevel); MLClassConstPtr predictedClassThisLevel = predictedClass->MLClassForGivenHierarchialLevel (curLevel); cmByLevel[curLevel]->Increment (groundTruthClasssThisLevel, predictedClassThisLevel, (int)origSize, probability, log); } } //cm.PrintTrueFalsePositivesTabDelimited (*report); { // report Hierarchial results for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth Printing Level[" << curLevel << "]" << endl; *report << endl << endl << endl << "Confusion Matrix Training Level[" << maxHierarchialLevel << "] Preduction Level[" << (curLevel + 1) << "]" << endl << endl; cmByLevel[curLevel]->PrintConfusionMatrixTabDelimited (*report); resultsSummary.push_back (SummaryRec (maxHierarchialLevel, curLevel + 1, cmByLevel[curLevel])); } *report << endl << endl << endl; } log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth Exiting" << endl; } /* GradeExamplesAgainstGroundTruth */
FeatureVectorPtr GrayScaleImagesFVProducer::ComputeFeatureVector (const Raster& srcImage, const MLClassPtr knownClass, RasterListPtr intermediateImages, float priorReductionFactor, RunLog& runLog ) { FeatureVectorPtr fv = new FeatureVector (maxNumOfFeatures); fv->MLClass (knownClass); float* featureData = fv->FeatureDataAlter (); fv->Version (Version ()); kkint32 areaBeforeReduction = 0; float weighedSizeBeforeReduction = 0.0f; kkint32 row = 0; kkuint32 intensityHistBuckets[8]; srcImage.CalcAreaAndIntensityFeatures (areaBeforeReduction, weighedSizeBeforeReduction, intensityHistBuckets ); kkint32 srcHeight = srcImage.Height (); kkint32 srcWidth = srcImage.Width (); kkint32 reducedHeight = srcHeight; kkint32 reducedWidth = srcWidth; kkint32 reducedSquareArea = reducedHeight * reducedWidth; kkint32 reductionMultiple = 1; while (reducedSquareArea > totPixsForMorphOps) { ++reductionMultiple; reducedHeight = (srcHeight + reductionMultiple - 1) / reductionMultiple; reducedWidth = (srcWidth + reductionMultiple - 1) / reductionMultiple; reducedSquareArea = reducedHeight * reducedWidth; } float totalReductionMultiple = priorReductionFactor * (float)reductionMultiple; float totalReductionMultipleSquared = totalReductionMultiple * totalReductionMultiple; delete workRaster1Rows; workRaster1Rows = new uchar*[reducedHeight]; delete workRaster2Rows; workRaster2Rows = new uchar*[reducedHeight]; delete workRaster3Rows; workRaster3Rows = new uchar*[reducedHeight]; uchar* wp1 = workRaster1Area; uchar* wp2 = workRaster2Area; uchar* wp3 = workRaster3Area; for (row = 0; row < reducedHeight; ++row) { workRaster1Rows[row] = wp1; workRaster2Rows[row] = wp2; workRaster3Rows[row] = wp3; wp1 += reducedWidth; wp2 += reducedWidth; wp3 += reducedWidth; } Raster workRaster1 (reducedHeight, reducedWidth, workRaster1Area, workRaster1Rows); Raster workRaster2 (reducedHeight, reducedWidth, workRaster2Area, workRaster2Rows); Raster workRaster3 (reducedHeight, reducedWidth, workRaster3Area, workRaster3Rows); Raster const * initRaster = NULL; RasterPtr wr1 = NULL; RasterPtr wr2 = NULL; if (reductionMultiple > 1) { try { ReductionByMultiple (reductionMultiple, srcImage, workRaster1); } catch (...) { runLog.Level (-1) << endl << "GrayScaleImagesFVProducer::ComputeFeatureVector ***ERROR*** Exception calling 'ReductionByMultiple'." << endl << endl; return NULL; } initRaster = &workRaster1; wr1 = &workRaster2; wr2 = &workRaster3; } else { initRaster = &srcImage; wr1 = &workRaster1; wr2 = &workRaster2; } if (areaBeforeReduction < 20) { for (kkint32 tp = 0; tp < maxNumOfFeatures; tp++) featureData[tp] = 9999999; return fv; } float convexf = 0.0; float centralMoments[9]; float centralMomentsWeighted[9]; kkint32 pixelCountReduced = 0; float pixelCountWeightedReduced = 0.0f; initRaster->ComputeCentralMoments (pixelCountReduced, pixelCountWeightedReduced, centralMoments, centralMomentsWeighted); float edgeMomentf[9]; initRaster->Dilation (wr1); wr1->Dilation (wr2); wr2->FillHole (wr1); wr1->Erosion (wr2); wr2->Edge (wr1); wr1->CentralMoments (edgeMomentf); if (intermediateImages) { kkint32 numEdgePixelsFound = (kkint32)(edgeMomentf[0]); SaveIntermediateImage (*wr2, "Edge_Image_" + StrFormatInt (numEdgePixelsFound, "ZZZZ0"), intermediateImages); } kkint32 area = (kkint32)(centralMoments[0] + 0.5f); // Moment-0 is the same as the number of foreground pixels in example. float areaF = (float)area; { ConvexHullPtr ch = new ConvexHull (); ch->Filter (*initRaster, wr1); convexf = (float)ch->ConvexArea (); if (intermediateImages) { KKStr convexImageFileName = "ConvexHull_" + StrFormatInt ((kkint32)convexf, "ZZZZZ0"); SaveIntermediateImage (*wr1, convexImageFileName, intermediateImages); } //delete convexImage; //convexImage = NULL; delete ch; ch = NULL; } initRaster->Erosion (wr1); wr1->Dilation (wr2); float areaOpen3 = (float)(wr2->ForegroundPixelCount()); if (intermediateImages) SaveIntermediateImage (*wr2, "Opening3_" + StrFormatInt ((kkint32)areaOpen3, "ZZZZZZ0"), intermediateImages); initRaster->Erosion (wr1, MorphOp::MaskTypes::SQUARE5); wr1->Dilation (wr2, MorphOp::MaskTypes::SQUARE5); float areaOpen5 = (float)(wr2->ForegroundPixelCount ()); if (intermediateImages) SaveIntermediateImage (*wr2, "Opening5_" + StrFormatInt ((kkint32)areaOpen5, "ZZZZZZ0"), intermediateImages); initRaster->Erosion (wr1, MorphOp::MaskTypes::SQUARE7); wr1->Dilation (wr2, MorphOp::MaskTypes::SQUARE7); float areaOpen7 = (float)(wr2->ForegroundPixelCount ()); if (intermediateImages) SaveIntermediateImage (*wr2, "Opening7_" + StrFormatInt ((kkint32)areaOpen7, "ZZZZZZ0"), intermediateImages); wr2->Erosion (wr1, MorphOp::MaskTypes::SQUARE9); wr1->Dilation (wr2, MorphOp::MaskTypes::SQUARE9); float areaOpen9 = (float)(wr2->ForegroundPixelCount ()); if (intermediateImages) SaveIntermediateImage (*wr2, "Opening9_" + StrFormatInt ((kkint32)areaOpen9, "ZZZZZZ0"), intermediateImages); initRaster->Dilation (wr1); wr1->Erosion (wr2); float areaClose3 = (float)(wr2->ForegroundPixelCount ()); if (intermediateImages) SaveIntermediateImage (*wr2, "Close3_" + StrFormatInt ((kkint32)areaClose3, "ZZZZZZ0"), intermediateImages); wr2->FillHole (wr1); float tranf = (float)(wr2->ForegroundPixelCount ()); if (intermediateImages) SaveIntermediateImage (*wr2, "FillHole_" + StrFormatInt ((kkint32)tranf, "ZZZZZZ0"), intermediateImages); initRaster->Dilation (wr1, MorphOp::MaskTypes::SQUARE5); wr1->Erosion (wr2, MorphOp::MaskTypes::SQUARE5); float areaClose5 = (float)(wr2->ForegroundPixelCount ()); if (intermediateImages) SaveIntermediateImage (*wr2, "Close5_" + StrFormatInt ((kkint32)areaClose5, "ZZZZZZ0"), intermediateImages); initRaster->Dilation (wr1, MorphOp::MaskTypes::SQUARE7); wr1->Erosion (wr2, MorphOp::MaskTypes::SQUARE7); float areaClose7 = float (wr2->ForegroundPixelCount ()); if (intermediateImages) SaveIntermediateImage (*wr2, "Close7_" + StrFormatInt ((kkint32)areaClose7, "ZZZZZZ0"), intermediateImages); { featureData[SizeIndex] = float ((float)areaBeforeReduction * priorReductionFactor); featureData[Moment1Index] = float (centralMoments[1]); featureData[Moment2Index] = float (centralMoments[2]); featureData[Moment3Index] = float (centralMoments[3]); featureData[Moment4Index] = float (centralMoments[4]); featureData[Moment5Index] = float (centralMoments[5]); featureData[Moment6Index] = float (centralMoments[6]); featureData[Moment7Index] = float (centralMoments[7]); featureData[Moment8Index] = float (centralMoments[8]); featureData[WeighedMoment0Index] = centralMomentsWeighted[0] * totalReductionMultiple; featureData[WeighedMoment1Index] = centralMomentsWeighted[1]; featureData[WeighedMoment2Index] = centralMomentsWeighted[2]; featureData[WeighedMoment3Index] = centralMomentsWeighted[3]; featureData[WeighedMoment4Index] = centralMomentsWeighted[4]; featureData[WeighedMoment5Index] = centralMomentsWeighted[5]; featureData[WeighedMoment6Index] = centralMomentsWeighted[6]; featureData[WeighedMoment7Index] = centralMomentsWeighted[7]; featureData[WeighedMoment8Index] = centralMomentsWeighted[8]; featureData[EdgeSizeIndex] = (float)edgeMomentf[0] * totalReductionMultiple; featureData[EdgeMoment1Index] = (float)edgeMomentf[1]; featureData[EdgeMoment2Index] = (float)edgeMomentf[2]; featureData[EdgeMoment3Index] = (float)edgeMomentf[3]; featureData[EdgeMoment4Index] = (float)edgeMomentf[4]; featureData[EdgeMoment5Index] = (float)edgeMomentf[5]; featureData[EdgeMoment6Index] = (float)edgeMomentf[6]; featureData[EdgeMoment7Index] = (float)edgeMomentf[7]; featureData[EdgeMoment8Index] = (float)edgeMomentf[8]; } if ((area > convexf) && (convexf > 0)) featureData[TransparancyConvexHullIndex] = 1.0; else featureData[TransparancyConvexHullIndex] = (float)area / (float)convexf; featureData[TransparancyPixelCountIndex] = areaF / (float)tranf; featureData[TransparancyOpen3Index] = (float)(areaF - areaOpen3) / (float)area; featureData[TransparancyOpen5Index] = (float)(areaF - areaOpen5) / (float)area; featureData[TransparancyOpen7Index] = (float)(areaF - areaOpen7) / (float)area; featureData[TransparancyOpen9Index] = (float)(areaF - areaOpen9) / (float)area; featureData[TransparancyClose3Index] = (float)(areaF - areaClose3) / (float)area; featureData[TransparancyClose5Index] = (float)(areaF - areaClose5) / (float)area; featureData[TransparancyClose7Index] = (float)(areaF - areaClose7) / (float)area; featureData[ConvexAreaIndex] = convexf * totalReductionMultipleSquared; featureData[TransparancySizeIndex] = (float)(centralMoments[0] / convexf); featureData[TransparancyWtdIndex] = (float)(centralMomentsWeighted[0] / convexf); float areaD = float (areaBeforeReduction); featureData[IntensityHist1Index] = ((float)intensityHistBuckets[1] / areaD); featureData[IntensityHist2Index] = ((float)intensityHistBuckets[2] / areaD); featureData[IntensityHist3Index] = ((float)intensityHistBuckets[3] / areaD); featureData[IntensityHist4Index] = ((float)intensityHistBuckets[4] / areaD); featureData[IntensityHist5Index] = ((float)intensityHistBuckets[5] / areaD); featureData[IntensityHist6Index] = ((float)intensityHistBuckets[6] / areaD); featureData[IntensityHist7Index] = ((float)intensityHistBuckets[7] / areaD); { BinarizeImageByThreshold (200, 255, *initRaster, *wr1); wr1->Erosion (wr2, MorphOp::MaskTypes::SQUARE3); wr2->Erosion (wr1, MorphOp::MaskTypes::SQUARE3); KKB::BlobListPtr blobs = wr1->ExtractBlobs (3); int darkSpotFreq[10]; int x; for (x = 0; x < 10; ++x) darkSpotFreq[x] = 0; KKB::BlobList::iterator idx; for (idx = blobs->begin (); idx != blobs->end (); ++idx) { KKB::BlobPtr b = idx->second; double l = ::log ((double)(b->PixelCount ())); double logOfThree = ::log (3.0); int index = (int)floor (l / logOfThree); index = Min (Max (0, index), 9); ++(darkSpotFreq[index]); } featureData[DarkSpotCount0] = (float)darkSpotFreq[0]; featureData[DarkSpotCount1] = (float)darkSpotFreq[1]; featureData[DarkSpotCount2] = (float)darkSpotFreq[2]; featureData[DarkSpotCount3] = (float)darkSpotFreq[3]; featureData[DarkSpotCount4] = (float)darkSpotFreq[4]; featureData[DarkSpotCount5] = (float)darkSpotFreq[5]; featureData[DarkSpotCount6] = (float)darkSpotFreq[6]; featureData[DarkSpotCount7] = (float)darkSpotFreq[7]; featureData[DarkSpotCount8] = (float)darkSpotFreq[8]; featureData[DarkSpotCount9] = (float)darkSpotFreq[9]; delete blobs; blobs = NULL; } if (intermediateImages) { RasterPtr thinnedImage = initRaster->ThinContour (); SaveIntermediateImage (*thinnedImage, "_Thinned", intermediateImages); delete thinnedImage; thinnedImage = NULL; } featureData[0] = (float)areaBeforeReduction; // In case the example was reduced. fv->OrigSize ((float)areaBeforeReduction); return fv; } /* ComputeFeatureVector */
FeatureVectorListPtr FeatureFileIO::FeatureDataReSink (FactoryFVProducerPtr _fvProducerFactory, const KKStr& _dirName, const KKStr& _fileName, MLClassPtr _unknownClass, bool _useDirectoryNameForClassName, MLClassList& _mlClasses, VolConstBool& _cancelFlag, bool& _changesMade, KKB::DateTime& _timeStamp, RunLog& _log ) { _changesMade = false; _timeStamp = DateTime (); if (_unknownClass == NULL) _unknownClass = MLClass::GetUnKnownClassStatic (); KKStr className = _unknownClass->Name (); _log.Level (10) << "FeatureFileIO::FeatureDataReSink dirName: " << _dirName << endl << " fileName: " << _fileName << " UnKnownClass: " << className << endl; KKStr fullFeatureFileName = osAddSlash (_dirName) + _fileName; bool successful = true; KKStr fileNameToOpen; if (_dirName.Empty ()) fileNameToOpen = _fileName; else fileNameToOpen = osAddSlash (_dirName) + _fileName; bool versionsAreSame = false; FeatureVectorListPtr origFeatureVectorData = LoadFeatureFile (fileNameToOpen, _mlClasses, -1, _cancelFlag, successful, _changesMade, _log); if (origFeatureVectorData == NULL) { successful = false; origFeatureVectorData = _fvProducerFactory->ManufacturFeatureVectorList (true); } if (_cancelFlag) { delete origFeatureVectorData; origFeatureVectorData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorListPtr origFeatureData = NULL; if (successful && (&typeid (*origFeatureVectorData) == _fvProducerFactory->FeatureVectorListTypeId ()) && ((*(origFeatureVectorData->FileDesc ())) == (*(_fvProducerFactory->FileDesc ()))) ) { origFeatureData = origFeatureVectorData; } else { origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); delete origFeatureVectorData; origFeatureVectorData = NULL; } KKStr fileSpec = osAddSlash (_dirName) + "*.*"; KKStrListPtr fileNameList = osGetListOfFiles (fileSpec); if (!fileNameList) { // There are no Image Files, so we need to return a Empty List of Image Features. if (origFeatureData->QueueSize () > 0) _changesMade = true; delete origFeatureData; origFeatureData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorProducerPtr fvProducer = _fvProducerFactory->ManufactureInstance (_log); if (successful) { if (origFeatureData->Version () == fvProducer->Version ()) { versionsAreSame = true; _timeStamp = osGetFileDateTime (fileNameToOpen); } else { _changesMade = true; } } else { delete origFeatureData; origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); } origFeatureData->SortByRootName (false); FeatureVectorListPtr extractedFeatures = _fvProducerFactory->ManufacturFeatureVectorList (true); extractedFeatures->Version (fvProducer->Version ()); fileNameList->Sort (false); KKStrList::iterator fnIDX; fnIDX = fileNameList->begin (); // fileNameList KKStrPtr imageFileName; kkuint32 numImagesFoundInOrigFeatureData = 0; kkuint32 numOfNewFeatureExtractions = 0; for (fnIDX = fileNameList->begin (); (fnIDX != fileNameList->end ()) && (!_cancelFlag); ++fnIDX) { imageFileName = *fnIDX; // pv414-_002_20140414-162243_02068814-1261.bmp KKStr rootName = osGetRootName (*imageFileName); if (rootName == "pv414-_002_20140414-162243_02068814-1261") cout << "Stop Here." << endl; bool validImageFileFormat = SupportedImageFileFormat (*imageFileName); if (!validImageFileFormat) continue; bool featureVectorCoputaionSuccessful = false; FeatureVectorPtr origFV = origFeatureData->BinarySearchByName (*imageFileName); if (origFV) numImagesFoundInOrigFeatureData++; if (origFV && versionsAreSame) { featureVectorCoputaionSuccessful = true; if (_useDirectoryNameForClassName) { if (origFV->MLClass () != _unknownClass) { _changesMade = true; origFV->MLClass (_unknownClass); } } else if ((origFV->MLClass ()->UnDefined ()) && (origFV->MLClass () != _unknownClass)) { _changesMade = true; origFV->MLClass (_unknownClass); } extractedFeatures->PushOnBack (origFV); origFeatureData->DeleteEntry (origFV); } else { // We either DON'T have an original image or versions are not the same. KKStr fullFileName = osAddSlash (_dirName) + (*imageFileName); FeatureVectorPtr fv = NULL; try { RasterPtr image = ReadImage (fullFileName); if (image) fv = fvProducer->ComputeFeatureVector (*image, _unknownClass, NULL, 1.0f, _log); delete image; image = NULL; if (fv) featureVectorCoputaionSuccessful = true; else featureVectorCoputaionSuccessful = false; } catch (...) { _log.Level (-1) << endl << endl << "FeatureDataReSink ***ERROR***" << endl << " Exception occurred calling constructor 'ComputeFeatureVector'." << endl << endl; featureVectorCoputaionSuccessful = false; fv = NULL; } if (!featureVectorCoputaionSuccessful) { _log.Level (-1) << " FeatureFileIOKK::FeatureDataReSink *** ERROR ***, Processing Image File[" << imageFileName << "]." << endl; delete fv; fv = NULL; } else { _changesMade = true; fv->ExampleFileName (*imageFileName); _log.Level (30) << fv->ExampleFileName () << " " << fv->OrigSize () << endl; extractedFeatures->PushOnBack (fv); numOfNewFeatureExtractions++; if ((numOfNewFeatureExtractions % 100) == 0) cout << numOfNewFeatureExtractions << " Images Extracted." << endl; } } } if (numImagesFoundInOrigFeatureData != extractedFeatures->QueueSize ()) _changesMade = true; extractedFeatures->Version (fvProducer->Version ()); if ((_changesMade) && (!_cancelFlag)) { //extractedFeatures->WriteImageFeaturesToFile (fullFeatureFileName, RawFormat, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ())); kkuint32 numExamplesWritten = 0; SaveFeatureFile (fullFeatureFileName, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()), *extractedFeatures, numExamplesWritten, _cancelFlag, successful, _log ); _timeStamp = osGetLocalDateTime (); } delete fvProducer; fvProducer = NULL; delete fileNameList; fileNameList = NULL; delete origFeatureData; origFeatureData = NULL; _log.Level (10) << "FeatureDataReSink Exiting Dir: " << _dirName << endl; return extractedFeatures; } /* FeatureDataReSink */
FeatureVectorPtr FeatureEncoder::EncodeAExample (FileDescConstPtr encodedFileDesc, FeatureVectorPtr src ) { FeatureVectorPtr encodedExample = new FeatureVector (numEncodedFeatures); encodedExample->MLClass (src->MLClass ()); encodedExample->PredictedClass (src->PredictedClass ()); //encodedExample->Version (src->Version ()); encodedExample->TrainWeight (src->TrainWeight ()); const float* featureData = src->FeatureData (); kkint32 x; for (x = 0; x < numOfFeatures; x++) { float featureVal = featureData [srcFeatureNums[x]]; kkint32 y = destFeatureNums[x]; switch (destWhatToDo[x]) { case FeWhatToDo::FeAsIs: { encodedExample->AddFeatureData (y, featureVal); } break; case FeWhatToDo::FeBinary: { for (kkint32 z = 0; z < cardinalityDest[x]; z++) { float bVal = ((kkint32)featureVal == z); encodedExample->AddFeatureData (y, bVal); y++; } } break; case FeWhatToDo::FeScale: { encodedExample->AddFeatureData (y, (featureVal / (float)cardinalityDest[x])); } break; } } return encodedExample; } /* EncodeAExample */
void FeatureEncoder::EncodeIntoSparseMatrix (FeatureVectorListPtr src, ClassAssignments& assignments, XSpacePtr& xSpace, kkint32& totalxSpaceUsed, struct svm_problem& prob, RunLog& log ) { FeatureVectorListPtr compressedExamples = NULL; FeatureVectorListPtr examplesToUseFoXSpace = NULL; kkint32 xSpaceUsed = 0; totalxSpaceUsed = 0; examplesToUseFoXSpace = src; kkint32 numOfExamples = examplesToUseFoXSpace->QueueSize (); //kkint32 elements = numOfExamples * xSpaceNeededPerExample; prob.l = numOfExamples; prob.y = (double*)malloc (prob.l * sizeof (double)); prob.x = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*)); prob.index = new kkint32[prob.l]; prob.exampleNames.clear (); kkint32 numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace); kkint32 totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node); // I added '10' to elements because I am paranoid xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded); if (xSpace == NULL) { log.Level (-1) << endl << endl << endl << " FeatureEncoder::Compress *** Failed to allocates space for 'xSpace' ****" << endl << endl << " Space needed [" << totalBytesForxSpaceNeeded << "]" << endl << " Num of Examples [" << numOfExamples << "]" << endl << " Num XSpaceNodesNeeded [" << numNeededXspaceNodes << "]" << endl << endl; // we sill have to allocate space for each individual training example separately. //throw "FeatureEncoder::Compress Allocation of memory for xSpace Failed."; } prob.W = NULL; kkint32 i = 0; FeatureVectorPtr example = NULL; MLClassPtr lastMlClass = NULL; kkint16 lastClassNum = -1; kkint32 bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node); for (i = 0; i < prob.l; i++) { if (totalxSpaceUsed > numNeededXspaceNodes) { log.Level (-1) << endl << endl << "FeatureEncoder::Compress ***ERROR*** We have exceeded the number of XSpace nodes allocated." << endl << endl; } example = examplesToUseFoXSpace->IdxToPtr (i); if (example->MLClass () != lastMlClass) { lastMlClass = example->MLClass (); lastClassNum = assignments.GetNumForClass (lastMlClass); } prob.y[i] = lastClassNum; prob.index[i] = i; prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ())); if (prob.W) { prob.W[i] = example->TrainWeight () * c_Param; if (example->TrainWeight () <= 0.0f) { log.Level (-1) << endl << "FeatureEncoder::EncodeIntoSparseMatrix ***ERROR*** Example[" << example->ExampleFileName () << "]" << endl << " has a TrainWeight value of 0 or less defaulting to 1.0" << endl << endl; prob.W[i] = 1.0 * c_Param; } } if (xSpace == NULL) { struct svm_node* xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample); prob.x[i] = xSpaceThisExample; EncodeAExample (example, prob.x[i], xSpaceUsed); if (xSpaceUsed < xSpaceNeededPerExample) { kkint32 bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node); struct svm_node* smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample); memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample); free (xSpaceThisExample); prob.x[i] = smallerXSpaceThisExample; } } else { prob.x[i] = &xSpace[totalxSpaceUsed]; EncodeAExample (example, prob.x[i], xSpaceUsed); } totalxSpaceUsed += xSpaceUsed; } delete compressedExamples; return; } /* Compress */
void GradeClassification::GradeUsingTrainingConfiguration () { log.Level (10) << "GradeClassification::GradeUsingTrainingConfiguration" << endl; delete mlClasses; mlClasses = config->ExtractClassList (); bool changesMadeToTrainingLibraries = false; KKU::DateTime latestImageTimeStamp; log.Level (10) << "GradeUsingTrainingConfiguration Loading Training Data." << endl; FeatureVectorListPtr trainingData = config->LoadFeatureDataFromTrainingLibraries (latestImageTimeStamp, changesMadeToTrainingLibraries, cancelFlag); if (!trainingData) { log.Level (-1) << endl << endl << endl << "GradedlClassification::GradeUsingTrainingConfiguration ***ERROR***" << endl << endl << " Could not load training data file Configuration File[" << configFileName << "]" << endl << endl << endl; Abort (true); return; } uint maxLevelsOfHiearchy = config->NumHierarchialLevels (); uint hierarchyLevel = 0; while (hierarchyLevel < maxLevelsOfHiearchy) { log.Level (10) << "GradeUsingTrainingConfiguration Hierarchy Level[" << hierarchyLevel << "]" << endl; TrainingConfiguration2Ptr configThisLevel = config->GenerateAConfiguraionForAHierarchialLevel (hierarchyLevel); FeatureVectorListPtr trainingDataThisLevel = trainingData->ExtractExamplesForHierarchyLevel (hierarchyLevel); FeatureVectorListPtr groundTruthThisLevel = groundTruth->ExtractExamplesForHierarchyLevel (hierarchyLevel); FeatureVectorListPtr groundTruthThisLevelClassified = new FeatureVectorList (*groundTruthThisLevel, true); KKStr statusMessage; TrainingProcess2 trainer (configThisLevel, trainingDataThisLevel, NULL, // No report file, trainingDataThisLevel->FileDesc (), log, false, // false = features are not already normalized. cancelFlag, statusMessage ); trainer.CreateModelsFromTrainingData (); { Classifier2 classifier (&trainer, log); FeatureVectorList::iterator idx; for (idx = groundTruthThisLevelClassified->begin (); idx != groundTruthThisLevelClassified->end (); idx++) { FeatureVectorPtr fv = *idx; MLClassConstPtr ic = classifier.ClassifyAImage (*fv); fv->MLClass (ic); } } GradeExamplesAgainstGroundTruth (groundTruthThisLevelClassified, groundTruthThisLevel); delete groundTruthThisLevelClassified; groundTruthThisLevelClassified = NULL; delete groundTruthThisLevel; groundTruthThisLevel = NULL; delete trainingDataThisLevel; trainingDataThisLevel = NULL; hierarchyLevel++; } ReportResults (); delete trainingData; } /* GradeUsingTrainingConfiguration */
/** * @brief Converts a single example into the svm_problem format. * @param[in] The example That we're converting * @param[in] The row kkint32 he svm_problem structure that the converted data will be stored */ void FeatureEncoder::EncodeAExample (FeatureVectorPtr example, svm_node* xSpace, kkint32& xSpaceUsed ) { const float* featureData = example->FeatureData (); kkint32 x; xSpaceUsed = 0; for (x = 0; x < numOfFeatures; x++) { float featureVal = featureData [srcFeatureNums[x]]; kkint32 y = destFeatureNums[x]; if (y >= xSpaceNeededPerExample) { KKStr errMsg (128); errMsg << "FeatureEncoder::EncodeAExample ***ERROR*** xSpaceNeededPerExample[" << xSpaceNeededPerExample << "]."; cerr << endl << "FeatureEncoder::EncodeAExample *** ERROR ***" << endl << " " << errMsg << endl << endl; throw KKException (errMsg); } switch (destWhatToDo[x]) { case FeWhatToDo::FeAsIs: { if (featureVal != 0.0) { xSpace[xSpaceUsed].index = y; xSpace[xSpaceUsed].value = featureVal; xSpaceUsed++; } } break; case FeWhatToDo::FeBinary: { for (kkint32 z = 0; z < cardinalityDest[x]; z++) { float bVal = ((kkint32)featureVal == z); if (bVal != 0.0) { xSpace[xSpaceUsed].index = y; xSpace[xSpaceUsed].value = bVal; xSpaceUsed++; } y++; } } break; case FeWhatToDo::FeScale: { if (featureVal != (float)0.0) { xSpace[xSpaceUsed].index = y; xSpace[xSpaceUsed].value = featureVal / (float)cardinalityDest[x]; xSpaceUsed++; } } break; } } xSpace[xSpaceUsed].index = -1; xSpace[xSpaceUsed].value = -1; xSpaceUsed++; } /* EncodeAExample */