FeatureVectorListPtr FeatureEncoder::CreateEncodedFeatureVector (FeatureVectorList& srcData) { if (srcData.AllFieldsAreNumeric ()) return srcData.DuplicateListAndContents (); FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (destFileDesc, true); FeatureVectorList::iterator idx; for (idx = srcData.begin (); idx != srcData.end (); idx++) { FeatureVectorPtr srcExample = *idx; XSpacePtr encodedData = EncodeAExample (srcExample); kkint32 zed = 0; FeatureVectorPtr encodedFeatureVector = new FeatureVector (codedNumOfFeatures); while (encodedData[zed].index != -1) { encodedFeatureVector->AddFeatureData (encodedData[zed].index, (float)encodedData[zed].value); zed++; } encodedFeatureVector->MLClass (srcExample->MLClass ()); encodedFeatureVectorList->PushOnBack (encodedFeatureVector); delete encodedData; encodedData = NULL; } return encodedFeatureVectorList; } /* CreateEncodedFeatureVector */
/** * @brief Left over from BitReduction days; removed all code except that which processed the NO bit reduction option. * @param[in] examples_list The list of examples you want to attempt to reduce * @param[out] compressed_examples_list The reduced list of examples */ void FeatureEncoder::CompressExamples (FeatureVectorListPtr srcExamples, FeatureVectorListPtr compressedExamples, ClassAssignments& assignments ) { double time_before, time_after; time_before = osGetSystemTimeUsed (); compressedExamples->AddQueue (*srcExamples); time_after = osGetSystemTimeUsed (); compressedExamples->Owner (false); return; } /* CompressExamples */
FeatureVectorListPtr FeatureEncoder::EncodeAllExamples (const FeatureVectorListPtr srcData) { FileDescConstPtr encodedFileDesc = CreateEncodedFileDesc (NULL); FeatureVectorListPtr encodedExamples = new FeatureVectorList (encodedFileDesc, true); FeatureVectorList::const_iterator idx; for (idx = srcData->begin (); idx != srcData->end (); idx++) { const FeatureVectorPtr srcExample = *idx; FeatureVectorPtr encodedExample = EncodeAExample (encodedFileDesc, srcExample); encodedExamples->PushOnBack (encodedExample); } return encodedExamples; } /* EncodeAllExamples */
void FeatureFileIO::SaveFeatureFileMultipleParts (const KKStr& _fileName, FeatureNumListConst& _selFeatures, FeatureVectorList& _examples, VolConstBool& _cancelFlag, bool& _successful, RunLog& _log ) { kkuint32 numExamplesWritten = 0; SaveFeatureFile (_fileName, _selFeatures, _examples, numExamplesWritten, _cancelFlag, _successful, _log); if (_cancelFlag || (!_successful)) return; if (_examples.QueueSize () > 64000) { kkint32 numPartsNeeded = (_examples.QueueSize () / 64000); if ((_examples.QueueSize () % 64000) > 0) numPartsNeeded++; kkuint32 maxPartSize = (_examples.QueueSize () / numPartsNeeded) + 1; kkint32 partNum = 0; FeatureVectorList::const_iterator idx = _examples.begin (); while ((idx != _examples.end ()) && (_successful) && (!_cancelFlag)) { FeatureVectorListPtr part = _examples.ManufactureEmptyList (false); while ((idx != _examples.end ()) && (part->QueueSize () < maxPartSize)) { part->PushOnBack (*idx); idx++; } KKStr partFileName = osRemoveExtension (_fileName) + "-" + StrFormatInt (partNum, "00") + "." + osGetFileExtension (_fileName); SaveFeatureFile (partFileName, _selFeatures, *part, numExamplesWritten, _cancelFlag, _successful, _log); partNum++; delete part; part = NULL; } } } /* SaveFeatureFileMultipleParts */
FeatureVectorListPtr FeatureEncoder2::EncodeAllExamples (const FeatureVectorListPtr srcData) { FeatureVectorListPtr encodedExamples = new FeatureVectorList (encodedFileDesc, true // Will own the contents ); FeatureVectorList::const_iterator idx; for (idx = srcData->begin (); idx != srcData->end (); idx++) { const FeatureVectorPtr srcExample = *idx; FeatureVectorPtr encodedExample = EncodeAExample (srcExample); encodedExamples->PushOnBack (encodedExample); } return encodedExamples; } /* EncodeAllImages */
FeatureVectorListPtr FeatureEncoder2::EncodedFeatureVectorList (const FeatureVectorList& srcData) const { if (srcData.AllFieldsAreNumeric ()) return srcData.DuplicateListAndContents (); FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (encodedFileDesc, true); FeatureVectorList::const_iterator idx; for (idx = srcData.begin (); idx != srcData.end (); idx++) { FeatureVectorPtr srcExample = *idx; FeatureVectorPtr encodedFeatureVector = EncodeAExample (srcExample); encodedFeatureVector->MLClass (srcExample->MLClass ()); encodedFeatureVectorList->PushOnBack (encodedFeatureVector); } return encodedFeatureVectorList; } /* EncodedFeatureVectorList */
void RandomSplitJobManager::RetrieveRandomSplit (int splitNum, FeatureVectorListPtr& trainData, FeatureVectorListPtr& testData ) { trainData = NULL; testData = NULL; if ((splitNum < 0) || (splitNum >= (int)splits->NumOfOrderings ())) { log.Level (-1) << endl << endl << "RandomSplitJobManager::RetrieveRandomSplit ***ERROR*** Invalid SplitNum[" << splitNum << "]" << endl << endl; return; } const FeatureVectorListPtr ordering = splits->Ordering (splitNum); trainData = ordering->ManufactureEmptyList (false); testData = ordering->ManufactureEmptyList (false); MLClassList::const_iterator classIDX; for (classIDX = mlClasses->begin (); classIDX != mlClasses->end (); classIDX++) { MLClassPtr ic = *classIDX; FeatureVectorListPtr examplesThisClass = ordering->ExtractExamplesForAGivenClass (ic); int numTrainExamplesNeeded = (int)(0.5 + (double)(examplesThisClass->QueueSize ()) * (double)splitFraction); int numExamplesAddToTrainSet = 0; FeatureVectorList::const_iterator idx; for (idx = examplesThisClass->begin (); idx != examplesThisClass->end (); idx++) { FeatureVectorPtr example = *idx; if (numExamplesAddToTrainSet < numTrainExamplesNeeded) { trainData->PushOnBack (example); numExamplesAddToTrainSet++; } else { testData->PushOnBack (example); } } } } /* RetrieveRandomSplit */
kkint32 FeatureEncoder::DetermineNumberOfNeededXspaceNodes (FeatureVectorListPtr src) const { kkint32 xSpaceNodesNeeded = 0; FeatureVectorList::const_iterator idx; for (idx = src->begin (); idx != src->end (); ++idx) { FeatureVectorPtr fv = *idx; const float* featureData = fv->FeatureData (); for (kkint32 x = 0; x < numOfFeatures; x++) { float featureVal = featureData [srcFeatureNums[x]]; kkint32 y = destFeatureNums[x]; switch (destWhatToDo[x]) { case FeWhatToDo::FeAsIs: if (featureVal != 0.0) xSpaceNodesNeeded++; break; case FeWhatToDo::FeBinary: for (kkint32 z = 0; z < cardinalityDest[x]; z++) { float bVal = ((kkint32)featureVal == z); if (bVal != 0.0) xSpaceNodesNeeded++; y++; } break; case FeWhatToDo::FeScale: if (featureVal != (float)0.0) xSpaceNodesNeeded++; break; } } xSpaceNodesNeeded++; } return xSpaceNodesNeeded; } /* DetermineNumberOfNeededXspaceNodes */
int RandomSampleJobList::DetermineCompressedImageCount (FeatureVectorListPtr trainData, TrainingConfigurationPtr config ) { FileDescPtr fileDesc = trainData->FileDesc (); FeatureVectorListPtr srcImages = trainData->DuplicateListAndContents (); FeatureVectorListPtr imagesToTrain = new FeatureVectorList (fileDesc, false, log, 10000); MLClassListPtr mlClasses = srcImages->ExtractListOfClasses (); { MLClassList::const_iterator idx; for (idx = mlClasses->begin (); idx != mlClasses->end (); idx++) { MLClassPtr mlClass = *idx; FeatureVectorListPtr imagesThisClass = srcImages->ExtractImagesForAGivenClass (mlClass); imagesToTrain->AddQueue (*imagesThisClass); delete imagesThisClass; } } NormalizationParms normParms (config, *imagesToTrain, log); normParms.NormalizeImages (imagesToTrain); ClassAssignments classAssignments (*mlClasses, log); FeatureVectorListPtr compressedImageList = new FeatureVectorList (fileDesc, true, log, 10000); BitReduction br (config->SVMparamREF (), fileDesc, trainData->AllFeatures ()); CompressionStats compressionStats = br.compress (*imagesToTrain, compressedImageList, classAssignments ); int compressedImageCount = compressionStats.num_images_after; log.Level (10) << "DetermineCompressedImageCount compressedImageCount[" << compressedImageCount << "]" << endl; delete compressedImageList; compressedImageList = NULL; delete mlClasses; mlClasses = NULL; delete imagesToTrain; imagesToTrain = NULL; delete srcImages; srcImages = NULL; return compressedImageCount; } /* DetermineCompressedImageCount */
void FeatureFileConverter::EncodeFeatureData () { bool successful = false; ModelParamKnn param (srcFileDesc, log); param.EncodingMethod (encodingMethod); param.CompressionMethod (ModelParam::BRnoCompression); if (features) param.SelectedFeatures (*features); FeatureEncoder2 encoder (param, srcFileDesc, log); // We do the next line to generate a report of the encoded field assignments. *report << endl; FileDescPtr encodedFileDesc = encoder.CreateEncodedFileDesc (report); NormalizeExamples (param, *data); FeatureVectorListPtr featureVectorEncodedData = encoder.EncodeAllExamples (data); uint numExamplesWritten = 0; destFileFormat->SaveFeatureFile (destFileName, featureVectorEncodedData->AllFeatures (), *featureVectorEncodedData, numExamplesWritten, cancelFlag, successful, log ); // FileDesc objects never get deleted until very end of application; "FileDesc::FinalCleanUp" will delete it. //delete encodedFileDesc; encodedFileDesc = NULL; delete featureVectorEncodedData; featureVectorEncodedData = NULL; } /* EncodeFeatureData */
void Strip () { bool cancelFlag = false; bool successful = false; bool changesMade = false; RunLog log; FeatureFileIOPtr driver = FeatureFileIO::FileFormatFromStr ("C45"); MLClassList mlClasses; FeatureVectorListPtr data = driver->LoadFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedDataNorm.data", mlClasses, -1, cancelFlag, successful, changesMade, log ); FeatureVectorListPtr stripped = new FeatureVectorList (data->FileDesc (), false); FeatureVectorList::const_iterator idx; for (idx = data->begin (); idx != data->end (); ++idx) { FeatureVectorPtr fv = *idx; KKStr fn = fv->ExampleFileName (); if (fn.StartsWith ("SML") || (fn.StartsWith ("SMP"))) { } else { stripped->PushOnBack (fv); } } kkuint32 numExamplesWritten = 90; driver->SaveFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedData1209.data", data->AllFeatures (), *stripped, numExamplesWritten, cancelFlag, successful, log ); }
void NormalizeAllValidatdData () { MLClassConstList classes; bool _cancelFlag = false; bool _successful = false; bool _changesMade = false; RunLog log; FeatureVectorListPtr fd = FeatureFileIOC45::Driver ()->LoadFeatureFile ("C:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages\\AllValidatedImages.data", classes, -1, _cancelFlag, _successful, _changesMade, log ); uint32 numWritten = 0; NormalizationParms parms (true, *fd, log); parms.NormalizeImages (fd); FeatureFileIOC45::Driver ()->SaveFeatureFile ("C:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages\\AllValidatedImagesNorm.data", fd->AllFeatures (), *fd, numWritten, _cancelFlag, _successful, log ); parms.Save ("C:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages\\AllValidatedImagesNorm.parms.txt", _successful); } /* NormalizeAllValidatdData */
void AbundanceCorrectionStatsBuilder::RemoveDuplicateImages () { *report << endl << endl; FeatureVectorListPtr allExamples = new FeatureVectorList (fileDesc, false, // 'false' = will not own contents. log ); allExamples->AddQueue (*trainLibData); allExamples->AddQueue (*otherClassData); DuplicateImages dupChecker (allExamples, log); if (dupChecker.DuplicatesFound ()) { *report << "DUPLICATE IMAGES DETECTED." << endl; dupChecker.ReportDuplicates (*report); FeatureVectorListPtr dups = dupChecker.ListOfExamplesToDelete (); if (dups) { FeatureVectorList::iterator idx; for (idx = dups->begin (); idx != dups->end (); ++idx) { FeatureVectorPtr fv = *idx; trainLibData->DeleteEntry (fv); otherClassData->DeleteEntry (fv); } delete dups; dups = NULL; } } else { *report << "No duplicates detected." << endl; } delete allExamples; allExamples = NULL; *report << endl << endl; } /* RemoveDuplicateImages */
FeatureVectorListPtr FeatureFileIO::FeatureDataReSink (FactoryFVProducerPtr _fvProducerFactory, const KKStr& _dirName, const KKStr& _fileName, MLClassPtr _unknownClass, bool _useDirectoryNameForClassName, MLClassList& _mlClasses, VolConstBool& _cancelFlag, bool& _changesMade, KKB::DateTime& _timeStamp, RunLog& _log ) { _changesMade = false; _timeStamp = DateTime (); if (_unknownClass == NULL) _unknownClass = MLClass::GetUnKnownClassStatic (); KKStr className = _unknownClass->Name (); _log.Level (10) << "FeatureFileIO::FeatureDataReSink dirName: " << _dirName << endl << " fileName: " << _fileName << " UnKnownClass: " << className << endl; KKStr fullFeatureFileName = osAddSlash (_dirName) + _fileName; bool successful = true; KKStr fileNameToOpen; if (_dirName.Empty ()) fileNameToOpen = _fileName; else fileNameToOpen = osAddSlash (_dirName) + _fileName; bool versionsAreSame = false; FeatureVectorListPtr origFeatureVectorData = LoadFeatureFile (fileNameToOpen, _mlClasses, -1, _cancelFlag, successful, _changesMade, _log); if (origFeatureVectorData == NULL) { successful = false; origFeatureVectorData = _fvProducerFactory->ManufacturFeatureVectorList (true); } if (_cancelFlag) { delete origFeatureVectorData; origFeatureVectorData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorListPtr origFeatureData = NULL; if (successful && (&typeid (*origFeatureVectorData) == _fvProducerFactory->FeatureVectorListTypeId ()) && ((*(origFeatureVectorData->FileDesc ())) == (*(_fvProducerFactory->FileDesc ()))) ) { origFeatureData = origFeatureVectorData; } else { origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); delete origFeatureVectorData; origFeatureVectorData = NULL; } KKStr fileSpec = osAddSlash (_dirName) + "*.*"; KKStrListPtr fileNameList = osGetListOfFiles (fileSpec); if (!fileNameList) { // There are no Image Files, so we need to return a Empty List of Image Features. if (origFeatureData->QueueSize () > 0) _changesMade = true; delete origFeatureData; origFeatureData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorProducerPtr fvProducer = _fvProducerFactory->ManufactureInstance (_log); if (successful) { if (origFeatureData->Version () == fvProducer->Version ()) { versionsAreSame = true; _timeStamp = osGetFileDateTime (fileNameToOpen); } else { _changesMade = true; } } else { delete origFeatureData; origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); } origFeatureData->SortByRootName (false); FeatureVectorListPtr extractedFeatures = _fvProducerFactory->ManufacturFeatureVectorList (true); extractedFeatures->Version (fvProducer->Version ()); fileNameList->Sort (false); KKStrList::iterator fnIDX; fnIDX = fileNameList->begin (); // fileNameList KKStrPtr imageFileName; kkuint32 numImagesFoundInOrigFeatureData = 0; kkuint32 numOfNewFeatureExtractions = 0; for (fnIDX = fileNameList->begin (); (fnIDX != fileNameList->end ()) && (!_cancelFlag); ++fnIDX) { imageFileName = *fnIDX; // pv414-_002_20140414-162243_02068814-1261.bmp KKStr rootName = osGetRootName (*imageFileName); if (rootName == "pv414-_002_20140414-162243_02068814-1261") cout << "Stop Here." << endl; bool validImageFileFormat = SupportedImageFileFormat (*imageFileName); if (!validImageFileFormat) continue; bool featureVectorCoputaionSuccessful = false; FeatureVectorPtr origFV = origFeatureData->BinarySearchByName (*imageFileName); if (origFV) numImagesFoundInOrigFeatureData++; if (origFV && versionsAreSame) { featureVectorCoputaionSuccessful = true; if (_useDirectoryNameForClassName) { if (origFV->MLClass () != _unknownClass) { _changesMade = true; origFV->MLClass (_unknownClass); } } else if ((origFV->MLClass ()->UnDefined ()) && (origFV->MLClass () != _unknownClass)) { _changesMade = true; origFV->MLClass (_unknownClass); } extractedFeatures->PushOnBack (origFV); origFeatureData->DeleteEntry (origFV); } else { // We either DON'T have an original image or versions are not the same. KKStr fullFileName = osAddSlash (_dirName) + (*imageFileName); FeatureVectorPtr fv = NULL; try { RasterPtr image = ReadImage (fullFileName); if (image) fv = fvProducer->ComputeFeatureVector (*image, _unknownClass, NULL, 1.0f, _log); delete image; image = NULL; if (fv) featureVectorCoputaionSuccessful = true; else featureVectorCoputaionSuccessful = false; } catch (...) { _log.Level (-1) << endl << endl << "FeatureDataReSink ***ERROR***" << endl << " Exception occurred calling constructor 'ComputeFeatureVector'." << endl << endl; featureVectorCoputaionSuccessful = false; fv = NULL; } if (!featureVectorCoputaionSuccessful) { _log.Level (-1) << " FeatureFileIOKK::FeatureDataReSink *** ERROR ***, Processing Image File[" << imageFileName << "]." << endl; delete fv; fv = NULL; } else { _changesMade = true; fv->ExampleFileName (*imageFileName); _log.Level (30) << fv->ExampleFileName () << " " << fv->OrigSize () << endl; extractedFeatures->PushOnBack (fv); numOfNewFeatureExtractions++; if ((numOfNewFeatureExtractions % 100) == 0) cout << numOfNewFeatureExtractions << " Images Extracted." << endl; } } } if (numImagesFoundInOrigFeatureData != extractedFeatures->QueueSize ()) _changesMade = true; extractedFeatures->Version (fvProducer->Version ()); if ((_changesMade) && (!_cancelFlag)) { //extractedFeatures->WriteImageFeaturesToFile (fullFeatureFileName, RawFormat, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ())); kkuint32 numExamplesWritten = 0; SaveFeatureFile (fullFeatureFileName, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()), *extractedFeatures, numExamplesWritten, _cancelFlag, successful, _log ); _timeStamp = osGetLocalDateTime (); } delete fvProducer; fvProducer = NULL; delete fileNameList; fileNameList = NULL; delete origFeatureData; origFeatureData = NULL; _log.Level (10) << "FeatureDataReSink Exiting Dir: " << _dirName << endl; return extractedFeatures; } /* FeatureDataReSink */
void FeatureFileConverter::ConvertData () { cout << endl << "Saving [" << data->QueueSize () << "] records to data file[" << destFileName << "]" << endl << endl; bool successful = false; int numOfFeatures = data->NumOfFeatures (); int numWithAllZeros = 0; { FeatureVectorListPtr newData = new FeatureVectorList (srcFileDesc, true, log); // Will store examples that have all zero's for all features in "zeroData" // container. This way they can be deleted from memory later and not result // in a memory leak. This has to be done because they are not going to // be placed into newData which is going to become the owner of all the // examples. FeatureVectorListPtr zeroData = new FeatureVectorList (srcFileDesc, true, log); // How many have all 0's for feature data. FeatureVectorList::iterator idx; for (idx = data->begin (); idx != data->end (); idx++) { FeatureVectorPtr i = *idx; bool allZeros = true; for (int featureNum = 0; featureNum < numOfFeatures; featureNum++) { allZeros = (i->FeatureData (featureNum) == 0.0f); if (!allZeros) break; } if (allZeros) { numWithAllZeros++; zeroData->PushOnBack (i); } else { newData->PushOnBack (i); } } data->Owner (false); delete data; data = newData; delete zeroData; } *report << endl << endl << "Num of data items with all zero feature data [" << numWithAllZeros << "]" << endl << endl; *report << data->ClassStatisticsStr (); *report << endl << endl << endl; if (statistics) { *report << "Class Statistics:" << endl; data->PrintClassStatistics (*report); *report << endl << endl; *report << "Feature Statistics:" << endl; data->PrintFeatureStatisticsByClass (*report); } if (enumerateClasses) { // We are going to change the name of the classes to numbers enumberated by className MLClassConstListPtr mlClasses = data->ExtractMLClassConstList (); mlClasses->SortByName (); MLClassConstListPtr newClassNames = new MLClassConstList (); int classIdx = 0; MLClassConstList::iterator idx; for (idx = mlClasses->begin (); idx != mlClasses->end (); idx++) { KKStr newName = StrFormatInt (classIdx, "zzz0"); MLClassConstPtr mlClass = newClassNames->GetMLClassPtr (newName); classIdx++; } FeatureVectorList::iterator idx2; for (idx2 = data->begin (); idx2 != data->end (); idx2++) { MLClassConstPtr c = (*idx2)->MLClass (); int classIndex = mlClasses->PtrToIdx (c); (*idx2)->MLClass (newClassNames->IdxToPtr (classIndex)); } delete mlClasses; mlClasses = NULL; delete newClassNames; newClassNames = NULL; } if (encodeFeatureData) { EncodeFeatureData (); } else { uint numExamplesWritten = 0; destFileFormat->SaveFeatureFile (destFileName, *features, *data, numExamplesWritten, cancelFlag, successful, log ); } } /* ConvertData */
void SplitForestCoverFile () { RunLog log; MLClassConstList mlClasses; bool cancelFlag = false; bool successful; bool changesMade = false; FeatureVectorListPtr images = FeatureFileIOC45::Driver ()->LoadFeatureFile ("covtype_alpha.data", mlClasses, -1, cancelFlag, successful, changesMade, log); FileDescPtr fileDesc = images->FileDesc (); images->RandomizeOrder (); images->RandomizeOrder (); images->RandomizeOrder (); images->RandomizeOrder (); images->RandomizeOrder (); MLClassConstPtr lodgepolePine = mlClasses.GetMLClassPtr ("Lodgepole_Pine"); MLClassConstPtr spruceFir = mlClasses.GetMLClassPtr ("Spruce_Fir"); int lodgepolePineTrainCount = 0; int spruceFirTrainCount = 0; FeatureVectorList::iterator idx; FeatureVectorListPtr trainData = new FeatureVectorList (fileDesc, false, log, 10000); FeatureVectorListPtr testData = new FeatureVectorList (fileDesc, false, log, 10000); int c = 0; for (idx = images->begin (); idx != images->end (); idx++) { FeatureVectorPtr i = *idx; if (c % 5000) cout << c << endl; if (i->MLClass () == lodgepolePine) { if (lodgepolePineTrainCount < 56404) { trainData->PushOnBack (i); lodgepolePineTrainCount++; } else { testData->PushOnBack (i); } } else if (i->MLClass () == spruceFir) { if (spruceFirTrainCount < 42480) { trainData->PushOnBack (i); spruceFirTrainCount++; } else { testData->PushOnBack (i); } } c++; } KKU::uint numExamplesWritten = 0; FeatureFileIOC45::Driver ()->SaveFeatureFile ("CovType_TwoClass.data", trainData->AllFeatures (), *trainData, numExamplesWritten, cancelFlag, successful, log ); FeatureFileIOC45::Driver ()->SaveFeatureFile ("CovType_TwoClass.test", testData->AllFeatures (), *testData, numExamplesWritten, cancelFlag, successful, log ); delete trainData; delete testData; delete images; } /* SplitForestCoverFile */
void JobValidation::EvaluateNode () { log.Level (9) << " " << endl; log.Level (9) << "JobValidation::EvaluteNode JobId[" << jobId << "]" << endl; status = BinaryJobStatus::Started; bool configFileFormatGood = true; TrainingConfiguration2Ptr config = new TrainingConfiguration2 (); config->Load (configFileName, false, log); if (!config->FormatGood ()) configFileFormatGood; config->SetFeatureNums (features); config->C_Param (cParm); config->Gamma (gammaParm); config->A_Param (aParm); config->SelectionMethod (processor->SelectionMethod ()); switch (processor->ResultType ()) { case FinalResultType::MfsFeaturesSel: case FinalResultType::NoTuningAllFeatures: case FinalResultType::MfsParmsTuned: case FinalResultType::MfsParmsTunedFeaturesSel: config->MachineType (SVM_MachineType::OneVsOne); break; case FinalResultType::BfsFeaturesSel: case FinalResultType::BfsParmsTuned: case FinalResultType::BfsFeaturesSelParmsTuned: config->MachineType (SVM_MachineType::BinaryCombos); break; } bool cancelFlag = false; FeatureVectorListPtr trainData = processor->TrainingData (); FeatureVectorListPtr validationData = processor->ValidationData (); VectorDouble trainDataMeans = trainData->ExtractMeanFeatureValues (); VectorDouble validationDataMeans = validationData->ExtractMeanFeatureValues (); CrossValidationPtr crossValidation = new CrossValidation (config, trainData, processor->MLClasses (), processor->NumOfFolds (), processor->AlreadyNormalized (), processor->FileDesc (), log, cancelFlag ); delete classedCorrectly; classedCorrectlySize = validationData->QueueSize (); classedCorrectly = new bool[classedCorrectlySize]; crossValidation->RunValidationOnly (validationData, classedCorrectly, log); testAccuracy = crossValidation->Accuracy (); testAccuracyNorm = crossValidation->AccuracyNorm (); testAvgPredProb = (float)crossValidation->AvgPredProb () * 100.0f; testFMeasure = (float)crossValidation->ConfussionMatrix ()->FMeasure (processor->PositiveClass (), log); if (processor->GradingMethod () == GradingMethodType::Accuracy) testGrade = testAccuracy; else if (processor->GradingMethod () == GradingMethodType::AccuracyNorm) testGrade = testAccuracyNorm; else if (processor->GradingMethod () == GradingMethodType::FMeasure) testGrade = testFMeasure; else testGrade = testAccuracy; testNumSVs = crossValidation->NumOfSupportVectors (); { // Save results of this Split in Results file. processor->Block (); { uint fn = 0; ofstream rl ("FinalResults.log", ios_base::app); rl << endl << endl << "ConfigFileName" << "\t" << configFileName << "\t" << "Format Good[" << (configFileFormatGood ? "Yes" : "No") << endl << "SummaryResultsFileName" << "\t" << processor->SummaryResultsFileName () << endl << "Configuration CmdLine" << "\t" << config->SVMparamREF (log).ToString () << endl << "ImagesPerClass" << "\t" << config->ImagesPerClass () << endl << endl; rl << endl << endl << "Training Data Status" << endl << endl; trainData->PrintClassStatistics (rl); rl << endl << endl; rl << "TrainingDataMeans"; for (fn = 0; fn < trainDataMeans.size (); fn++) rl << "\t" << trainDataMeans[fn]; rl << endl; rl << "ValidationDataMeans"; for (fn = 0; fn < validationDataMeans.size (); fn++) rl << "\t" << validationDataMeans[fn]; rl << endl << endl; crossValidation->ConfussionMatrix ()->PrintConfusionMatrixTabDelimited (rl); rl << endl << endl << endl << endl; rl.close (); } { ofstream f (processor->SummaryResultsFileName ().Str (), ios_base::app); ValidationResults r (processor->ResultType (), config, crossValidation, trainData, osGetHostName ().value_or ("*** unknown ***"), classedCorrectlySize, classedCorrectly, this, log ); r.Write (f); f.close (); } processor->EndBlock (); } delete crossValidation; crossValidation = NULL; delete config; config = NULL; status = BinaryJobStatus::Done; } /* EvaluateNode */
void GradeClassification::GradeUsingTrainingConfiguration () { log.Level (10) << "GradeClassification::GradeUsingTrainingConfiguration" << endl; delete mlClasses; mlClasses = config->ExtractClassList (); bool changesMadeToTrainingLibraries = false; KKU::DateTime latestImageTimeStamp; log.Level (10) << "GradeUsingTrainingConfiguration Loading Training Data." << endl; FeatureVectorListPtr trainingData = config->LoadFeatureDataFromTrainingLibraries (latestImageTimeStamp, changesMadeToTrainingLibraries, cancelFlag); if (!trainingData) { log.Level (-1) << endl << endl << endl << "GradedlClassification::GradeUsingTrainingConfiguration ***ERROR***" << endl << endl << " Could not load training data file Configuration File[" << configFileName << "]" << endl << endl << endl; Abort (true); return; } uint maxLevelsOfHiearchy = config->NumHierarchialLevels (); uint hierarchyLevel = 0; while (hierarchyLevel < maxLevelsOfHiearchy) { log.Level (10) << "GradeUsingTrainingConfiguration Hierarchy Level[" << hierarchyLevel << "]" << endl; TrainingConfiguration2Ptr configThisLevel = config->GenerateAConfiguraionForAHierarchialLevel (hierarchyLevel); FeatureVectorListPtr trainingDataThisLevel = trainingData->ExtractExamplesForHierarchyLevel (hierarchyLevel); FeatureVectorListPtr groundTruthThisLevel = groundTruth->ExtractExamplesForHierarchyLevel (hierarchyLevel); FeatureVectorListPtr groundTruthThisLevelClassified = new FeatureVectorList (*groundTruthThisLevel, true); KKStr statusMessage; TrainingProcess2 trainer (configThisLevel, trainingDataThisLevel, NULL, // No report file, trainingDataThisLevel->FileDesc (), log, false, // false = features are not already normalized. cancelFlag, statusMessage ); trainer.CreateModelsFromTrainingData (); { Classifier2 classifier (&trainer, log); FeatureVectorList::iterator idx; for (idx = groundTruthThisLevelClassified->begin (); idx != groundTruthThisLevelClassified->end (); idx++) { FeatureVectorPtr fv = *idx; MLClassConstPtr ic = classifier.ClassifyAImage (*fv); fv->MLClass (ic); } } GradeExamplesAgainstGroundTruth (groundTruthThisLevelClassified, groundTruthThisLevel); delete groundTruthThisLevelClassified; groundTruthThisLevelClassified = NULL; delete groundTruthThisLevel; groundTruthThisLevel = NULL; delete trainingDataThisLevel; trainingDataThisLevel = NULL; hierarchyLevel++; } ReportResults (); delete trainingData; } /* GradeUsingTrainingConfiguration */
/** * @brief Will run M number of Train then Test passes. * * @param[in] numExamplsToUseForTraining The number examples in each ordering(group) * that are to be used for training, the remaining examples will be * used as test data. */ void CrossValidationMxN::RunTrainAndTest (kkuint32 numExamplsToUseForTraining, RunLog& log ) { CleanUpMemory (); meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ())); kkuint32 cvIDX = 0; MLClassListPtr mlClasses = orderings->MLClasses (); for (cvIDX = 0; cvIDX < numOfOrderings; cvIDX++) { FeatureVectorListPtr data = orderings->Ordering (cvIDX); FeatureVectorList trainingData (fileDesc, false); FeatureVectorList testData (fileDesc, false); FeatureVectorList::iterator fvIDX; for (fvIDX = data->begin (); fvIDX != data->end (); fvIDX++) { FeatureVectorPtr example = *fvIDX; if (trainingData.QueueSize () < numExamplsToUseForTraining) trainingData.PushOnBack (example); else testData.PushOnBack (example); } CrossValidationPtr cv = new CrossValidation (config, &trainingData, mlClasses, numOfFolds, false, fileDesc, log, cancelFlag ); cv->RunValidationOnly (&testData, NULL, // No McNemars test going to be performed. log ); accuracies.push_back (cv->Accuracy ()); supportPoints.push_back ((float)cv->NumOfSupportVectors ()); trainingTimes.push_back (cv->TrainTimeTotal ()); testTimes.push_back (cv->TestTimeTotal ()); meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log); delete cv; } CalcMeanAndStdDev (accuracies, accuracyMean, accuracyStdDev); CalcMeanAndStdDev (supportPoints, supportPointsMean, supportPointsStdDev); CalcMeanAndStdDev (trainingTimes, trainingTimeMean, trainingTimeStdDev); CalcMeanAndStdDev (testTimes, testTimeMean, testTimeStdDev); double factor = 1.0 / (double)numOfOrderings; meanConfusionMatrix->FactorCounts (factor); } /* RunTrainAndTest */
void GradeClassification::GradeExamplesAgainstGroundTruth (FeatureVectorListPtr examplesToGrade, FeatureVectorListPtr groundTruth ) { log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth" << endl; groundTruth->SortByRootName (); MLClassConstPtr unknownClass = mlClasses->GetUnKnownClass (); MLClassConstListPtr classes = NULL; { MLClassConstListPtr examplesToGradeClasses = examplesToGrade->ExtractMLClassConstList (); MLClassConstListPtr groundTruthClasses = groundTruth->ExtractMLClassConstList (); classes = MLClassConstList::MergeClassList (*examplesToGradeClasses, *groundTruthClasses); delete examplesToGradeClasses; delete groundTruthClasses; } uint16 maxHierarchialLevel = 0; { MLClassConstList::iterator idx; for (idx = classes->begin (); idx != classes->end (); idx++) { MLClassConstPtr c = *idx; maxHierarchialLevel = Max (maxHierarchialLevel, c->NumHierarchialLevels ()); } } // Create ConfusionMatrix objects for each posible level of Hierarchy. The 'resultsSummary' vector will // end up owning the instances of 'ConfusionMatrix2' and th edestructr will be responable for deleting them. uint curLevel = 0; vector<ConfusionMatrix2Ptr> cmByLevel; for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { MLClassConstListPtr classesThisLevel = classes->ExtractListOfClassesForAGivenHierarchialLevel (curLevel); ConfusionMatrix2Ptr cm = new ConfusionMatrix2 (*classesThisLevel); cmByLevel.push_back (cm); } ConfusionMatrix2 cm (*classes); ImageFeaturesList::iterator idx; for (idx = examplesToGrade->begin (); idx != examplesToGrade->end (); idx++) { ImageFeaturesPtr exampleToGrade = *idx; MLClassConstPtr predictedClass = exampleToGrade->MLClass (); float origSize = exampleToGrade->OrigSize (); float probability = exampleToGrade->Probability (); KKStr rootName = osGetRootName (exampleToGrade->ImageFileName ()); FeatureVectorPtr groundTruthExample = groundTruth->LookUpByRootName (rootName); MLClassConstPtr groundTruthClass = unknownClass; if (groundTruthExample) groundTruthClass = groundTruthExample->MLClass (); cm.Increment (groundTruthClass, predictedClass, (int)origSize, probability, log); for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { MLClassConstPtr groundTruthClasssThisLevel = groundTruthClass->MLClassForGivenHierarchialLevel (curLevel); MLClassConstPtr predictedClassThisLevel = predictedClass->MLClassForGivenHierarchialLevel (curLevel); cmByLevel[curLevel]->Increment (groundTruthClasssThisLevel, predictedClassThisLevel, (int)origSize, probability, log); } } //cm.PrintTrueFalsePositivesTabDelimited (*report); { // report Hierarchial results for (curLevel = 0; curLevel < maxHierarchialLevel; curLevel++) { log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth Printing Level[" << curLevel << "]" << endl; *report << endl << endl << endl << "Confusion Matrix Training Level[" << maxHierarchialLevel << "] Preduction Level[" << (curLevel + 1) << "]" << endl << endl; cmByLevel[curLevel]->PrintConfusionMatrixTabDelimited (*report); resultsSummary.push_back (SummaryRec (maxHierarchialLevel, curLevel + 1, cmByLevel[curLevel])); } *report << endl << endl << endl; } log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth Exiting" << endl; } /* GradeExamplesAgainstGroundTruth */
void FeatureEncoder::EncodeIntoSparseMatrix (FeatureVectorListPtr src, ClassAssignments& assignments, XSpacePtr& xSpace, kkint32& totalxSpaceUsed, struct svm_problem& prob, RunLog& log ) { FeatureVectorListPtr compressedExamples = NULL; FeatureVectorListPtr examplesToUseFoXSpace = NULL; kkint32 xSpaceUsed = 0; totalxSpaceUsed = 0; examplesToUseFoXSpace = src; kkint32 numOfExamples = examplesToUseFoXSpace->QueueSize (); //kkint32 elements = numOfExamples * xSpaceNeededPerExample; prob.l = numOfExamples; prob.y = (double*)malloc (prob.l * sizeof (double)); prob.x = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*)); prob.index = new kkint32[prob.l]; prob.exampleNames.clear (); kkint32 numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace); kkint32 totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node); // I added '10' to elements because I am paranoid xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded); if (xSpace == NULL) { log.Level (-1) << endl << endl << endl << " FeatureEncoder::Compress *** Failed to allocates space for 'xSpace' ****" << endl << endl << " Space needed [" << totalBytesForxSpaceNeeded << "]" << endl << " Num of Examples [" << numOfExamples << "]" << endl << " Num XSpaceNodesNeeded [" << numNeededXspaceNodes << "]" << endl << endl; // we sill have to allocate space for each individual training example separately. //throw "FeatureEncoder::Compress Allocation of memory for xSpace Failed."; } prob.W = NULL; kkint32 i = 0; FeatureVectorPtr example = NULL; MLClassPtr lastMlClass = NULL; kkint16 lastClassNum = -1; kkint32 bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node); for (i = 0; i < prob.l; i++) { if (totalxSpaceUsed > numNeededXspaceNodes) { log.Level (-1) << endl << endl << "FeatureEncoder::Compress ***ERROR*** We have exceeded the number of XSpace nodes allocated." << endl << endl; } example = examplesToUseFoXSpace->IdxToPtr (i); if (example->MLClass () != lastMlClass) { lastMlClass = example->MLClass (); lastClassNum = assignments.GetNumForClass (lastMlClass); } prob.y[i] = lastClassNum; prob.index[i] = i; prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ())); if (prob.W) { prob.W[i] = example->TrainWeight () * c_Param; if (example->TrainWeight () <= 0.0f) { log.Level (-1) << endl << "FeatureEncoder::EncodeIntoSparseMatrix ***ERROR*** Example[" << example->ExampleFileName () << "]" << endl << " has a TrainWeight value of 0 or less defaulting to 1.0" << endl << endl; prob.W[i] = 1.0 * c_Param; } } if (xSpace == NULL) { struct svm_node* xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample); prob.x[i] = xSpaceThisExample; EncodeAExample (example, prob.x[i], xSpaceUsed); if (xSpaceUsed < xSpaceNeededPerExample) { kkint32 bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node); struct svm_node* smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample); memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample); free (xSpaceThisExample); prob.x[i] = smallerXSpaceThisExample; } } else { prob.x[i] = &xSpace[totalxSpaceUsed]; EncodeAExample (example, prob.x[i], xSpaceUsed); } totalxSpaceUsed += xSpaceUsed; } delete compressedExamples; return; } /* Compress */
void RandomSampleJob::EvaluteNode (FeatureVectorListPtr validationData, MLClassListPtr classes ) { log.Level (9) << " " << endl; log.Level (9) << " " << endl; log.Level (9) << "RandomSampleJob::EvaluteNode JobId[" << jobId << "] Ordering[" << orderingNum << "]" << endl; status = rjStarted; config->CompressionMethod (BRnoCompression); config->KernalType (kernelType); config->EncodingMethod (encodingMethod); config->C_Param (c); config->Gamma (gamma); FileDescPtr fileDesc = config->FileDesc (); const FeatureVectorListPtr srcExamples = orderings->Ordering (orderingNum); if (numExamplesToKeep > srcExamples->QueueSize ()) { log.Level (-1) << endl << endl << endl << "RandomSampleJob::EvaluteNode *** ERROR *** RandomExamples to large" << endl << endl << " RandomExamples > num in Training set." << endl << endl; osWaitForEnter (); exit (-1); } FeatureVectorListPtr trainingData = new FeatureVectorList (srcExamples->FileDesc (), false, log, 10000); for (int x = 0; x < numExamplesToKeep; x++) { trainingData->PushOnBack (srcExamples->IdxToPtr (x)); } bool allClassesRepresented = true; { MLClassListPtr classesInRandomSample = trainingData->ExtractListOfClasses (); if (*classesInRandomSample != (*classes)) { log.Level (-1) << endl << endl << "RandomSampling *** ERROR ***" << endl << endl << " Missing Classes From Random Sample." << endl << endl << "MLClasses[" << classes->ToCommaDelimitedStr () << "]" << endl << "Found [" << classesInRandomSample->ToCommaDelimitedStr () << "]" << endl << endl; allClassesRepresented = false; } delete classesInRandomSample; classesInRandomSample = NULL; } //if (!allClassesRepresented) //{ // accuracy = 0.0; // trainTime = 0.0; // testTime = 0.0; //} //else { delete crossValidation; crossValidation = NULL; compMethod = config->CompressionMethod (); bool cancelFlag = false; crossValidation = new CrossValidation (config, trainingData, classes, 10, false, // False = Features are not normalized already. trainingData->FileDesc (), log, cancelFlag ); crossValidation->RunValidationOnly (validationData, NULL); accuracy = crossValidation->Accuracy (); trainTime = crossValidation->TrainTimeMean (); testTime = crossValidation->TestTimeMean (); supportVectors = crossValidation->SupportPointsMean (); } delete trainingData; status = rjDone; } /* EvaluteNode */
FeatureVectorListPtr FeatureFileIO::LoadInSubDirectoryTree (FactoryFVProducerPtr _fvProducerFactory, KKStr _rootDir, MLClassList& _mlClasses, bool _useDirectoryNameForClassName, VolConstBool& _cancelFlag, bool _rewiteRootFeatureFile, RunLog& _log ) { _log.Level (10) << "FeatureFileIO::LoadInSubDirectoryTree rootDir[" << _rootDir << "]." << endl; osAddLastSlash (_rootDir); KKStr featureFileName (""); KKStr fullFeatureFileName (""); if (!_rootDir.Empty ()) { featureFileName = osGetRootNameOfDirectory (_rootDir) + ".data"; fullFeatureFileName = _rootDir + featureFileName; } else { featureFileName = "Root.data"; fullFeatureFileName = "Root.data"; } MLClassPtr unKnownClass = _mlClasses.GetUnKnownClass (); if (_useDirectoryNameForClassName) { KKStr className = MLClass::GetClassNameFromDirName (_rootDir); unKnownClass = _mlClasses.GetMLClassPtr (className); } bool changesMade = false; FeatureVectorListPtr dirImages = NULL; if (_rewiteRootFeatureFile) { DateTime timeStamp; dirImages = FeatureDataReSink (_fvProducerFactory, _rootDir, featureFileName, unKnownClass, _useDirectoryNameForClassName, _mlClasses, _cancelFlag, changesMade, timeStamp, _log ); if (_useDirectoryNameForClassName) { FeatureVectorList::iterator idx; for (idx = dirImages->begin (); idx != dirImages->end (); idx++) { if ((*idx)->MLClass () != unKnownClass) { (*idx)->MLClass (unKnownClass); changesMade = true; } } if (changesMade) { KKStr fullFileName = osAddSlash (_rootDir) + featureFileName; kkuint32 numExamplesWritten = 0; bool cancel = false; bool successful = false; SaveFeatureFile (fullFileName, dirImages->AllFeatures (), *dirImages, numExamplesWritten, cancel, successful, _log ); } } } else { dirImages = _fvProducerFactory->ManufacturFeatureVectorList (true); } // Now that we have processed all image files in "rootDir", // lets process any sub-directories. KKStr dirSearchPath = osAddSlash (_rootDir) + "*.*"; KKStrListPtr subDirectories = osGetListOfDirectories (dirSearchPath); if (subDirectories) { KKStrList::iterator idx; for (idx = subDirectories->begin (); (idx != subDirectories->end () && (!_cancelFlag)); idx++) { KKStr subDirName (**idx); if (subDirName == "BorderImages") { // We ignore this director continue; } KKStr newDirPath = osAddSlash (_rootDir) + subDirName; FeatureVectorListPtr subDirExamples = LoadInSubDirectoryTree (_fvProducerFactory, newDirPath, _mlClasses, _useDirectoryNameForClassName, _cancelFlag, true, // true = ReWriteRootFeatureFile _log ); osAddLastSlash (subDirName); // We want to add the directory path to the ExampleFileName so that we can later locate the source image. for (auto fv: *subDirExamples) { KKStr newImageFileName = subDirName + fv->ExampleFileName (); fv->ExampleFileName (newImageFileName); } dirImages->AddQueue (*subDirExamples); subDirExamples->Owner (false); delete subDirExamples; subDirExamples = NULL; } delete subDirectories; subDirectories = NULL; } _log.Level (10) << "LoadInSubDirectoryTree - Done" << endl; return dirImages; } /* LoadInSubDirectoryTree */
void MergeFeatureFiles::Main () { if (Abort ()) return; { // Will first load source files. uint srcIdx = 0; for (srcIdx = 0; srcIdx < srcFileNames.size (); srcIdx++) { bool cancelFlag = false; bool successful = false; bool changesMade = false; KKStr srcFileName = srcFileNames[srcIdx]; FeatureFileIOPtr srcFormat = srcFormats [srcIdx]; MLClassList classes; FeatureVectorListPtr s = NULL; s = srcFormat->LoadFeatureFile (srcFileName, classes, -1, cancelFlag, successful, changesMade, log); if ((s == NULL) || (!successful)) { log.Level (-1) << endl << endl << "MergeFeatureFiles::Main ***ERROR*** Could not load file[" << srcFileName << "]" << endl << endl; delete s; s = NULL; Abort (true); return; } if (!srcData) srcData = new FeatureVectorList (s->FileDesc (), true); if ((*(s->FileDesc ())) != (*(srcData->FileDesc ()))) { // The last source file read does not have the same attribute sets as previous feature files already read. log.Level (-1) << endl << endl << "MergeFeatureFiles::Main ***ERROR*** Feature File[" << srcFileName << "] does not have the same Attributes as previous feature files already read in." << endl << endl; Abort (true); delete s; s = NULL; return; } srcData->AddQueue (*s); s->Owner (false); delete s; } } if (!Abort ()) { if (randomize) { DateTime dt = osGetLocalDateTime (); srcData->RandomizeOrder (dt.Seconds ()); srcData->RandomizeOrder (); } if (stratify) { FeatureVectorListPtr stratifiedSrc = srcData->StratifyAmoungstClasses (numOfFolds, log); srcData->Owner (false); stratifiedSrc->Owner (true); delete srcData; srcData = stratifiedSrc; stratifiedSrc = NULL; } bool cancelFlag = false; bool successful = false; uint numExamplesWritten = 0; destFormat->SaveFeatureFile (destFileName, srcData->AllFeatures (), *srcData, numExamplesWritten, cancelFlag, successful, log); if (!successful) { log.Level (-1) << endl << endl << "MergeFeatureFiles::Main ***ERROR*** Could not save to file[" << destFileName << "]" << endl << endl; Abort (true); } } } /* Main */
void AbundanceCorrectionStatsBuilder::CreateInitialThreadInstaces () { log.Level (10) << "AbundanceCorrectionStatsBuilder::CreateInitialThreadInstaces" << endl; FeatureVectorListPtr stratifiedTrainData = trainLibData->StratifyAmoungstClasses (numOfFolds); FeatureVectorListPtr stratifiedOtherData = otherClassData->StratifyAmoungstClasses (numOfFolds); int32 numTrainExamples = stratifiedTrainData->QueueSize (); int32 numOtherExamples = stratifiedOtherData->QueueSize (); msgQueue = new MsgQueue ("AbundanceCorrectionStatsBuilder"); int32 lastFvInFold = -1; int32 firstFvInFold = 0; int32 firstOtherFvInFold = 0; int32 lastOtherFvInFold = -1; for (int32 foldNum = 0; foldNum < numOfFolds; ++foldNum) { firstFvInFold = lastFvInFold + 1; lastFvInFold = (numTrainExamples * (foldNum + 1) / numOfFolds) - 1; firstOtherFvInFold = lastOtherFvInFold + 1; lastOtherFvInFold = (numOtherExamples * (foldNum + 1) / numOfFolds) - 1; FeatureVectorListPtr trainData = new FeatureVectorList (fileDesc, false, log); FeatureVectorListPtr testData = new FeatureVectorList (fileDesc, false, log); for (int32 idx = 0; idx < numTrainExamples; ++idx) { FeatureVectorPtr fv = stratifiedTrainData->IdxToPtr (idx); if ((idx >= firstFvInFold) && (idx <= lastFvInFold)) testData->PushOnBack (fv); else trainData->PushOnBack (fv); } // Add OtherClass exampes to test data. for (int32 idx = firstOtherFvInFold; idx <= lastOtherFvInFold; ++idx) { FeatureVectorPtr fv = stratifiedOtherData->IdxToPtr (idx); testData->PushOnBack (fv); } RunLogPtr threadRunLog = new RunLog (); threadRunLog->AttachMsgQueue (msgQueue); KKStr threadName = "AbundanceCorrFold" + StrFormatInt (foldNum, "00"); TrainTestThreadPtr thread = new TrainTestThread ("Fold_" + StrFormatInt (foldNum, "00"), this, config, allClasses, trainData, // Will take ownesrship and delete in its destructor. trainLibDataClasses, testData, // Will take ownesrship and delete in its destructor. otherClass, threadName, msgQueue, // Will take ownesrship and delete in its destructor. threadRunLog ); queueReady->PushOnBack (thread); } delete stratifiedOtherData; stratifiedOtherData = NULL; delete stratifiedTrainData; stratifiedTrainData = NULL; } /* CreateInitialThreadInstaces */