FeatureVectorListPtr FeatureEncoder::CreateEncodedFeatureVector (FeatureVectorList& srcData) { if (srcData.AllFieldsAreNumeric ()) return srcData.DuplicateListAndContents (); FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (destFileDesc, true); FeatureVectorList::iterator idx; for (idx = srcData.begin (); idx != srcData.end (); idx++) { FeatureVectorPtr srcExample = *idx; XSpacePtr encodedData = EncodeAExample (srcExample); kkint32 zed = 0; FeatureVectorPtr encodedFeatureVector = new FeatureVector (codedNumOfFeatures); while (encodedData[zed].index != -1) { encodedFeatureVector->AddFeatureData (encodedData[zed].index, (float)encodedData[zed].value); zed++; } encodedFeatureVector->MLClass (srcExample->MLClass ()); encodedFeatureVectorList->PushOnBack (encodedFeatureVector); delete encodedData; encodedData = NULL; } return encodedFeatureVectorList; } /* CreateEncodedFeatureVector */
void FeatureFileIO::SaveFile (FeatureVectorList& _data, const KKStr& _fileName, FeatureNumListConst& _selFeatures, std::ostream& _out, kkuint32& _numExamplesWritten, VolConstBool& _cancelFlag, bool& _successful, KKStr& _errorMessage, RunLog& _log ) { _errorMessage = "Driver: '" + this->DriverName () + "' does not implement 'SaveFile' method."; _log.Level (-1) << endl << "FeatureFileIO::SaveFile ***ERROR*** " << _errorMessage << endl << " _data.size : " << _data.size () << endl << " _fileName : " << _fileName << endl << " _selFeatures : " << _selFeatures.ToCommaDelStr () << endl << " _out.fail : " << _out.fail () << endl << " _cancelFlag : " << _cancelFlag << endl << endl; _successful = false; _numExamplesWritten = 0; return; }
FeatureVectorListPtr FeatureEncoder2::EncodedFeatureVectorList (const FeatureVectorList& srcData) const { if (srcData.AllFieldsAreNumeric ()) return srcData.DuplicateListAndContents (); FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (encodedFileDesc, true); FeatureVectorList::const_iterator idx; for (idx = srcData.begin (); idx != srcData.end (); idx++) { FeatureVectorPtr srcExample = *idx; FeatureVectorPtr encodedFeatureVector = EncodeAExample (srcExample); encodedFeatureVector->MLClass (srcExample->MLClass ()); encodedFeatureVectorList->PushOnBack (encodedFeatureVector); } return encodedFeatureVectorList; } /* EncodedFeatureVectorList */
void FeatureFileIO::SaveFeatureFileMultipleParts (const KKStr& _fileName, FeatureNumListConst& _selFeatures, FeatureVectorList& _examples, VolConstBool& _cancelFlag, bool& _successful, RunLog& _log ) { kkuint32 numExamplesWritten = 0; SaveFeatureFile (_fileName, _selFeatures, _examples, numExamplesWritten, _cancelFlag, _successful, _log); if (_cancelFlag || (!_successful)) return; if (_examples.QueueSize () > 64000) { kkint32 numPartsNeeded = (_examples.QueueSize () / 64000); if ((_examples.QueueSize () % 64000) > 0) numPartsNeeded++; kkuint32 maxPartSize = (_examples.QueueSize () / numPartsNeeded) + 1; kkint32 partNum = 0; FeatureVectorList::const_iterator idx = _examples.begin (); while ((idx != _examples.end ()) && (_successful) && (!_cancelFlag)) { FeatureVectorListPtr part = _examples.ManufactureEmptyList (false); while ((idx != _examples.end ()) && (part->QueueSize () < maxPartSize)) { part->PushOnBack (*idx); idx++; } KKStr partFileName = osRemoveExtension (_fileName) + "-" + StrFormatInt (partNum, "00") + "." + osGetFileExtension (_fileName); SaveFeatureFile (partFileName, _selFeatures, *part, numExamplesWritten, _cancelFlag, _successful, _log); partNum++; delete part; part = NULL; } } } /* SaveFeatureFileMultipleParts */
void FeatureFileConverter::NormalizeExamples (ModelParam& param, FeatureVectorList& examples ) { KKStr parameterStr = param.ToCmdLineStr (); NormalizationParmsPtr normParms = NULL; if (osFileExists (nornParmsFileName)) { bool successful = false; normParms = new NormalizationParms (examples.FileDesc (), nornParmsFileName, successful, log); if (!successful) { KKStr errMsg = "Could not load normalization parameters file[" + nornParmsFileName + "]"; log.Level (-1) << endl << "NormalizeExamples *** ERROR *** " << errMsg << endl << endl; throw KKStrException (errMsg); } } else { bool successful = false; normParms = new NormalizationParms (param, examples, log); normParms->Save (nornParmsFileName, successful); if (!successful) { KKStr errMsg = "Could not save normalization parameters file[" + nornParmsFileName + "]"; log.Level (-1) << endl << "NormalizeExamples *** ERROR *** " << errMsg << endl << endl; throw KKStrException (errMsg); } } normParms->NormalizeImages (&examples); delete normParms; normParms = NULL; } /* NormalizeExamples */
/** *@details * Determines if the other FeatreVectorList has the same underlining layout; that is each * field is of the same type and meaning. This way we can determine if one list contains * Apples while the other contains Oranges. */ bool FeatureVectorList::SameExceptForSymbolicData (const FeatureVectorList& otherData, RunLog& log ) const { return fileDesc->SameExceptForSymbolicData (*(otherData.FileDesc ()), log); }
/** * @brief Will run M number of Train then Test passes. * * @param[in] numExamplsToUseForTraining The number examples in each ordering(group) * that are to be used for training, the remaining examples will be * used as test data. */ void CrossValidationMxN::RunTrainAndTest (kkuint32 numExamplsToUseForTraining, RunLog& log ) { CleanUpMemory (); meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ())); kkuint32 cvIDX = 0; MLClassListPtr mlClasses = orderings->MLClasses (); for (cvIDX = 0; cvIDX < numOfOrderings; cvIDX++) { FeatureVectorListPtr data = orderings->Ordering (cvIDX); FeatureVectorList trainingData (fileDesc, false); FeatureVectorList testData (fileDesc, false); FeatureVectorList::iterator fvIDX; for (fvIDX = data->begin (); fvIDX != data->end (); fvIDX++) { FeatureVectorPtr example = *fvIDX; if (trainingData.QueueSize () < numExamplsToUseForTraining) trainingData.PushOnBack (example); else testData.PushOnBack (example); } CrossValidationPtr cv = new CrossValidation (config, &trainingData, mlClasses, numOfFolds, false, fileDesc, log, cancelFlag ); cv->RunValidationOnly (&testData, NULL, // No McNemars test going to be performed. log ); accuracies.push_back (cv->Accuracy ()); supportPoints.push_back ((float)cv->NumOfSupportVectors ()); trainingTimes.push_back (cv->TrainTimeTotal ()); testTimes.push_back (cv->TestTimeTotal ()); meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log); delete cv; } CalcMeanAndStdDev (accuracies, accuracyMean, accuracyStdDev); CalcMeanAndStdDev (supportPoints, supportPointsMean, supportPointsStdDev); CalcMeanAndStdDev (trainingTimes, trainingTimeMean, trainingTimeStdDev); CalcMeanAndStdDev (testTimes, testTimeMean, testTimeStdDev); double factor = 1.0 / (double)numOfOrderings; meanConfusionMatrix->FactorCounts (factor); } /* RunTrainAndTest */
void GradeClassification::ValidateThatBothListsHaveSameEntries (FeatureVectorList& groundTruth, FeatureVectorList& examplesToGrade, bool& theyAreTheSame ) { theyAreTheSame = true; // We will assume that they are the same to start with. int missingExamplesToGrade = 0; int missingGroundTruthExamples = 0; groundTruth.SortByRootName (); examplesToGrade.SortByRootName (); *report << endl << endl << endl << "Missing Examples To Grade" << endl << "=========================" << endl; ImageFeaturesList::iterator idx; for (idx = groundTruth.begin (); idx != groundTruth.end (); idx++) { FeatureVectorPtr groundTruthExample = *idx; KKStr rootName = osGetRootName (groundTruthExample->ImageFileName ()); FeatureVectorPtr exampleToGrade = examplesToGrade.LookUpByRootName (rootName); if (!exampleToGrade) { theyAreTheSame = false; *report << rootName << "\t" << "*** MISSING ***" << "\t" << groundTruthExample->ImageFileName () << endl; missingExamplesToGrade++; } } if (missingExamplesToGrade == 0) { *report << " *** None ***" << endl; } *report << endl << endl << endl << "Missing Ground Truth Examples" << endl << "=============================" << endl; for (idx = examplesToGrade.begin (); idx != examplesToGrade.end (); idx++) { FeatureVectorPtr exampleToGrade = *idx; KKStr rootName = osGetRootName (exampleToGrade->ImageFileName ()); FeatureVectorPtr groundTruthExample = groundTruth.LookUpByRootName (rootName); if (!groundTruthExample) { theyAreTheSame = false; *report << rootName << "\t" << "*** MISSING ***" << "\t" << exampleToGrade->ImageFileName () << "\t" << endl; missingGroundTruthExamples++; } } if (missingGroundTruthExamples == 0) { *report << " *** None ***" << endl; } } /* ValidateThatBothListsHaveSameEntries */