void RandomSplitJobManager::RetrieveRandomSplit (int splitNum, FeatureVectorListPtr& trainData, FeatureVectorListPtr& testData ) { trainData = NULL; testData = NULL; if ((splitNum < 0) || (splitNum >= (int)splits->NumOfOrderings ())) { log.Level (-1) << endl << endl << "RandomSplitJobManager::RetrieveRandomSplit ***ERROR*** Invalid SplitNum[" << splitNum << "]" << endl << endl; return; } const FeatureVectorListPtr ordering = splits->Ordering (splitNum); trainData = ordering->ManufactureEmptyList (false); testData = ordering->ManufactureEmptyList (false); MLClassList::const_iterator classIDX; for (classIDX = mlClasses->begin (); classIDX != mlClasses->end (); classIDX++) { MLClassPtr ic = *classIDX; FeatureVectorListPtr examplesThisClass = ordering->ExtractExamplesForAGivenClass (ic); int numTrainExamplesNeeded = (int)(0.5 + (double)(examplesThisClass->QueueSize ()) * (double)splitFraction); int numExamplesAddToTrainSet = 0; FeatureVectorList::const_iterator idx; for (idx = examplesThisClass->begin (); idx != examplesThisClass->end (); idx++) { FeatureVectorPtr example = *idx; if (numExamplesAddToTrainSet < numTrainExamplesNeeded) { trainData->PushOnBack (example); numExamplesAddToTrainSet++; } else { testData->PushOnBack (example); } } } } /* RetrieveRandomSplit */
void FeatureFileIO::SaveFeatureFileMultipleParts (const KKStr& _fileName, FeatureNumListConst& _selFeatures, FeatureVectorList& _examples, VolConstBool& _cancelFlag, bool& _successful, RunLog& _log ) { kkuint32 numExamplesWritten = 0; SaveFeatureFile (_fileName, _selFeatures, _examples, numExamplesWritten, _cancelFlag, _successful, _log); if (_cancelFlag || (!_successful)) return; if (_examples.QueueSize () > 64000) { kkint32 numPartsNeeded = (_examples.QueueSize () / 64000); if ((_examples.QueueSize () % 64000) > 0) numPartsNeeded++; kkuint32 maxPartSize = (_examples.QueueSize () / numPartsNeeded) + 1; kkint32 partNum = 0; FeatureVectorList::const_iterator idx = _examples.begin (); while ((idx != _examples.end ()) && (_successful) && (!_cancelFlag)) { FeatureVectorListPtr part = _examples.ManufactureEmptyList (false); while ((idx != _examples.end ()) && (part->QueueSize () < maxPartSize)) { part->PushOnBack (*idx); idx++; } KKStr partFileName = osRemoveExtension (_fileName) + "-" + StrFormatInt (partNum, "00") + "." + osGetFileExtension (_fileName); SaveFeatureFile (partFileName, _selFeatures, *part, numExamplesWritten, _cancelFlag, _successful, _log); partNum++; delete part; part = NULL; } } } /* SaveFeatureFileMultipleParts */
void JobValidation::EvaluateNode () { log.Level (9) << " " << endl; log.Level (9) << "JobValidation::EvaluteNode JobId[" << jobId << "]" << endl; status = BinaryJobStatus::Started; bool configFileFormatGood = true; TrainingConfiguration2Ptr config = new TrainingConfiguration2 (); config->Load (configFileName, false, log); if (!config->FormatGood ()) configFileFormatGood; config->SetFeatureNums (features); config->C_Param (cParm); config->Gamma (gammaParm); config->A_Param (aParm); config->SelectionMethod (processor->SelectionMethod ()); switch (processor->ResultType ()) { case FinalResultType::MfsFeaturesSel: case FinalResultType::NoTuningAllFeatures: case FinalResultType::MfsParmsTuned: case FinalResultType::MfsParmsTunedFeaturesSel: config->MachineType (SVM_MachineType::OneVsOne); break; case FinalResultType::BfsFeaturesSel: case FinalResultType::BfsParmsTuned: case FinalResultType::BfsFeaturesSelParmsTuned: config->MachineType (SVM_MachineType::BinaryCombos); break; } bool cancelFlag = false; FeatureVectorListPtr trainData = processor->TrainingData (); FeatureVectorListPtr validationData = processor->ValidationData (); VectorDouble trainDataMeans = trainData->ExtractMeanFeatureValues (); VectorDouble validationDataMeans = validationData->ExtractMeanFeatureValues (); CrossValidationPtr crossValidation = new CrossValidation (config, trainData, processor->MLClasses (), processor->NumOfFolds (), processor->AlreadyNormalized (), processor->FileDesc (), log, cancelFlag ); delete classedCorrectly; classedCorrectlySize = validationData->QueueSize (); classedCorrectly = new bool[classedCorrectlySize]; crossValidation->RunValidationOnly (validationData, classedCorrectly, log); testAccuracy = crossValidation->Accuracy (); testAccuracyNorm = crossValidation->AccuracyNorm (); testAvgPredProb = (float)crossValidation->AvgPredProb () * 100.0f; testFMeasure = (float)crossValidation->ConfussionMatrix ()->FMeasure (processor->PositiveClass (), log); if (processor->GradingMethod () == GradingMethodType::Accuracy) testGrade = testAccuracy; else if (processor->GradingMethod () == GradingMethodType::AccuracyNorm) testGrade = testAccuracyNorm; else if (processor->GradingMethod () == GradingMethodType::FMeasure) testGrade = testFMeasure; else testGrade = testAccuracy; testNumSVs = crossValidation->NumOfSupportVectors (); { // Save results of this Split in Results file. processor->Block (); { uint fn = 0; ofstream rl ("FinalResults.log", ios_base::app); rl << endl << endl << "ConfigFileName" << "\t" << configFileName << "\t" << "Format Good[" << (configFileFormatGood ? "Yes" : "No") << endl << "SummaryResultsFileName" << "\t" << processor->SummaryResultsFileName () << endl << "Configuration CmdLine" << "\t" << config->SVMparamREF (log).ToString () << endl << "ImagesPerClass" << "\t" << config->ImagesPerClass () << endl << endl; rl << endl << endl << "Training Data Status" << endl << endl; trainData->PrintClassStatistics (rl); rl << endl << endl; rl << "TrainingDataMeans"; for (fn = 0; fn < trainDataMeans.size (); fn++) rl << "\t" << trainDataMeans[fn]; rl << endl; rl << "ValidationDataMeans"; for (fn = 0; fn < validationDataMeans.size (); fn++) rl << "\t" << validationDataMeans[fn]; rl << endl << endl; crossValidation->ConfussionMatrix ()->PrintConfusionMatrixTabDelimited (rl); rl << endl << endl << endl << endl; rl.close (); } { ofstream f (processor->SummaryResultsFileName ().Str (), ios_base::app); ValidationResults r (processor->ResultType (), config, crossValidation, trainData, osGetHostName ().value_or ("*** unknown ***"), classedCorrectlySize, classedCorrectly, this, log ); r.Write (f); f.close (); } processor->EndBlock (); } delete crossValidation; crossValidation = NULL; delete config; config = NULL; status = BinaryJobStatus::Done; } /* EvaluateNode */
void FeatureEncoder::EncodeIntoSparseMatrix (FeatureVectorListPtr src, ClassAssignments& assignments, XSpacePtr& xSpace, kkint32& totalxSpaceUsed, struct svm_problem& prob, RunLog& log ) { FeatureVectorListPtr compressedExamples = NULL; FeatureVectorListPtr examplesToUseFoXSpace = NULL; kkint32 xSpaceUsed = 0; totalxSpaceUsed = 0; examplesToUseFoXSpace = src; kkint32 numOfExamples = examplesToUseFoXSpace->QueueSize (); //kkint32 elements = numOfExamples * xSpaceNeededPerExample; prob.l = numOfExamples; prob.y = (double*)malloc (prob.l * sizeof (double)); prob.x = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*)); prob.index = new kkint32[prob.l]; prob.exampleNames.clear (); kkint32 numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace); kkint32 totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node); // I added '10' to elements because I am paranoid xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded); if (xSpace == NULL) { log.Level (-1) << endl << endl << endl << " FeatureEncoder::Compress *** Failed to allocates space for 'xSpace' ****" << endl << endl << " Space needed [" << totalBytesForxSpaceNeeded << "]" << endl << " Num of Examples [" << numOfExamples << "]" << endl << " Num XSpaceNodesNeeded [" << numNeededXspaceNodes << "]" << endl << endl; // we sill have to allocate space for each individual training example separately. //throw "FeatureEncoder::Compress Allocation of memory for xSpace Failed."; } prob.W = NULL; kkint32 i = 0; FeatureVectorPtr example = NULL; MLClassPtr lastMlClass = NULL; kkint16 lastClassNum = -1; kkint32 bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node); for (i = 0; i < prob.l; i++) { if (totalxSpaceUsed > numNeededXspaceNodes) { log.Level (-1) << endl << endl << "FeatureEncoder::Compress ***ERROR*** We have exceeded the number of XSpace nodes allocated." << endl << endl; } example = examplesToUseFoXSpace->IdxToPtr (i); if (example->MLClass () != lastMlClass) { lastMlClass = example->MLClass (); lastClassNum = assignments.GetNumForClass (lastMlClass); } prob.y[i] = lastClassNum; prob.index[i] = i; prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ())); if (prob.W) { prob.W[i] = example->TrainWeight () * c_Param; if (example->TrainWeight () <= 0.0f) { log.Level (-1) << endl << "FeatureEncoder::EncodeIntoSparseMatrix ***ERROR*** Example[" << example->ExampleFileName () << "]" << endl << " has a TrainWeight value of 0 or less defaulting to 1.0" << endl << endl; prob.W[i] = 1.0 * c_Param; } } if (xSpace == NULL) { struct svm_node* xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample); prob.x[i] = xSpaceThisExample; EncodeAExample (example, prob.x[i], xSpaceUsed); if (xSpaceUsed < xSpaceNeededPerExample) { kkint32 bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node); struct svm_node* smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample); memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample); free (xSpaceThisExample); prob.x[i] = smallerXSpaceThisExample; } } else { prob.x[i] = &xSpace[totalxSpaceUsed]; EncodeAExample (example, prob.x[i], xSpaceUsed); } totalxSpaceUsed += xSpaceUsed; } delete compressedExamples; return; } /* Compress */
FeatureVectorListPtr FeatureFileIO::FeatureDataReSink (FactoryFVProducerPtr _fvProducerFactory, const KKStr& _dirName, const KKStr& _fileName, MLClassPtr _unknownClass, bool _useDirectoryNameForClassName, MLClassList& _mlClasses, VolConstBool& _cancelFlag, bool& _changesMade, KKB::DateTime& _timeStamp, RunLog& _log ) { _changesMade = false; _timeStamp = DateTime (); if (_unknownClass == NULL) _unknownClass = MLClass::GetUnKnownClassStatic (); KKStr className = _unknownClass->Name (); _log.Level (10) << "FeatureFileIO::FeatureDataReSink dirName: " << _dirName << endl << " fileName: " << _fileName << " UnKnownClass: " << className << endl; KKStr fullFeatureFileName = osAddSlash (_dirName) + _fileName; bool successful = true; KKStr fileNameToOpen; if (_dirName.Empty ()) fileNameToOpen = _fileName; else fileNameToOpen = osAddSlash (_dirName) + _fileName; bool versionsAreSame = false; FeatureVectorListPtr origFeatureVectorData = LoadFeatureFile (fileNameToOpen, _mlClasses, -1, _cancelFlag, successful, _changesMade, _log); if (origFeatureVectorData == NULL) { successful = false; origFeatureVectorData = _fvProducerFactory->ManufacturFeatureVectorList (true); } if (_cancelFlag) { delete origFeatureVectorData; origFeatureVectorData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorListPtr origFeatureData = NULL; if (successful && (&typeid (*origFeatureVectorData) == _fvProducerFactory->FeatureVectorListTypeId ()) && ((*(origFeatureVectorData->FileDesc ())) == (*(_fvProducerFactory->FileDesc ()))) ) { origFeatureData = origFeatureVectorData; } else { origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); delete origFeatureVectorData; origFeatureVectorData = NULL; } KKStr fileSpec = osAddSlash (_dirName) + "*.*"; KKStrListPtr fileNameList = osGetListOfFiles (fileSpec); if (!fileNameList) { // There are no Image Files, so we need to return a Empty List of Image Features. if (origFeatureData->QueueSize () > 0) _changesMade = true; delete origFeatureData; origFeatureData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorProducerPtr fvProducer = _fvProducerFactory->ManufactureInstance (_log); if (successful) { if (origFeatureData->Version () == fvProducer->Version ()) { versionsAreSame = true; _timeStamp = osGetFileDateTime (fileNameToOpen); } else { _changesMade = true; } } else { delete origFeatureData; origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); } origFeatureData->SortByRootName (false); FeatureVectorListPtr extractedFeatures = _fvProducerFactory->ManufacturFeatureVectorList (true); extractedFeatures->Version (fvProducer->Version ()); fileNameList->Sort (false); KKStrList::iterator fnIDX; fnIDX = fileNameList->begin (); // fileNameList KKStrPtr imageFileName; kkuint32 numImagesFoundInOrigFeatureData = 0; kkuint32 numOfNewFeatureExtractions = 0; for (fnIDX = fileNameList->begin (); (fnIDX != fileNameList->end ()) && (!_cancelFlag); ++fnIDX) { imageFileName = *fnIDX; // pv414-_002_20140414-162243_02068814-1261.bmp KKStr rootName = osGetRootName (*imageFileName); if (rootName == "pv414-_002_20140414-162243_02068814-1261") cout << "Stop Here." << endl; bool validImageFileFormat = SupportedImageFileFormat (*imageFileName); if (!validImageFileFormat) continue; bool featureVectorCoputaionSuccessful = false; FeatureVectorPtr origFV = origFeatureData->BinarySearchByName (*imageFileName); if (origFV) numImagesFoundInOrigFeatureData++; if (origFV && versionsAreSame) { featureVectorCoputaionSuccessful = true; if (_useDirectoryNameForClassName) { if (origFV->MLClass () != _unknownClass) { _changesMade = true; origFV->MLClass (_unknownClass); } } else if ((origFV->MLClass ()->UnDefined ()) && (origFV->MLClass () != _unknownClass)) { _changesMade = true; origFV->MLClass (_unknownClass); } extractedFeatures->PushOnBack (origFV); origFeatureData->DeleteEntry (origFV); } else { // We either DON'T have an original image or versions are not the same. KKStr fullFileName = osAddSlash (_dirName) + (*imageFileName); FeatureVectorPtr fv = NULL; try { RasterPtr image = ReadImage (fullFileName); if (image) fv = fvProducer->ComputeFeatureVector (*image, _unknownClass, NULL, 1.0f, _log); delete image; image = NULL; if (fv) featureVectorCoputaionSuccessful = true; else featureVectorCoputaionSuccessful = false; } catch (...) { _log.Level (-1) << endl << endl << "FeatureDataReSink ***ERROR***" << endl << " Exception occurred calling constructor 'ComputeFeatureVector'." << endl << endl; featureVectorCoputaionSuccessful = false; fv = NULL; } if (!featureVectorCoputaionSuccessful) { _log.Level (-1) << " FeatureFileIOKK::FeatureDataReSink *** ERROR ***, Processing Image File[" << imageFileName << "]." << endl; delete fv; fv = NULL; } else { _changesMade = true; fv->ExampleFileName (*imageFileName); _log.Level (30) << fv->ExampleFileName () << " " << fv->OrigSize () << endl; extractedFeatures->PushOnBack (fv); numOfNewFeatureExtractions++; if ((numOfNewFeatureExtractions % 100) == 0) cout << numOfNewFeatureExtractions << " Images Extracted." << endl; } } } if (numImagesFoundInOrigFeatureData != extractedFeatures->QueueSize ()) _changesMade = true; extractedFeatures->Version (fvProducer->Version ()); if ((_changesMade) && (!_cancelFlag)) { //extractedFeatures->WriteImageFeaturesToFile (fullFeatureFileName, RawFormat, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ())); kkuint32 numExamplesWritten = 0; SaveFeatureFile (fullFeatureFileName, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()), *extractedFeatures, numExamplesWritten, _cancelFlag, successful, _log ); _timeStamp = osGetLocalDateTime (); } delete fvProducer; fvProducer = NULL; delete fileNameList; fileNameList = NULL; delete origFeatureData; origFeatureData = NULL; _log.Level (10) << "FeatureDataReSink Exiting Dir: " << _dirName << endl; return extractedFeatures; } /* FeatureDataReSink */
void RandomSampleJob::EvaluteNode (FeatureVectorListPtr validationData, MLClassListPtr classes ) { log.Level (9) << " " << endl; log.Level (9) << " " << endl; log.Level (9) << "RandomSampleJob::EvaluteNode JobId[" << jobId << "] Ordering[" << orderingNum << "]" << endl; status = rjStarted; config->CompressionMethod (BRnoCompression); config->KernalType (kernelType); config->EncodingMethod (encodingMethod); config->C_Param (c); config->Gamma (gamma); FileDescPtr fileDesc = config->FileDesc (); const FeatureVectorListPtr srcExamples = orderings->Ordering (orderingNum); if (numExamplesToKeep > srcExamples->QueueSize ()) { log.Level (-1) << endl << endl << endl << "RandomSampleJob::EvaluteNode *** ERROR *** RandomExamples to large" << endl << endl << " RandomExamples > num in Training set." << endl << endl; osWaitForEnter (); exit (-1); } FeatureVectorListPtr trainingData = new FeatureVectorList (srcExamples->FileDesc (), false, log, 10000); for (int x = 0; x < numExamplesToKeep; x++) { trainingData->PushOnBack (srcExamples->IdxToPtr (x)); } bool allClassesRepresented = true; { MLClassListPtr classesInRandomSample = trainingData->ExtractListOfClasses (); if (*classesInRandomSample != (*classes)) { log.Level (-1) << endl << endl << "RandomSampling *** ERROR ***" << endl << endl << " Missing Classes From Random Sample." << endl << endl << "MLClasses[" << classes->ToCommaDelimitedStr () << "]" << endl << "Found [" << classesInRandomSample->ToCommaDelimitedStr () << "]" << endl << endl; allClassesRepresented = false; } delete classesInRandomSample; classesInRandomSample = NULL; } //if (!allClassesRepresented) //{ // accuracy = 0.0; // trainTime = 0.0; // testTime = 0.0; //} //else { delete crossValidation; crossValidation = NULL; compMethod = config->CompressionMethod (); bool cancelFlag = false; crossValidation = new CrossValidation (config, trainingData, classes, 10, false, // False = Features are not normalized already. trainingData->FileDesc (), log, cancelFlag ); crossValidation->RunValidationOnly (validationData, NULL); accuracy = crossValidation->Accuracy (); trainTime = crossValidation->TrainTimeMean (); testTime = crossValidation->TestTimeMean (); supportVectors = crossValidation->SupportPointsMean (); } delete trainingData; status = rjDone; } /* EvaluteNode */
void AbundanceCorrectionStatsBuilder::CreateInitialThreadInstaces () { log.Level (10) << "AbundanceCorrectionStatsBuilder::CreateInitialThreadInstaces" << endl; FeatureVectorListPtr stratifiedTrainData = trainLibData->StratifyAmoungstClasses (numOfFolds); FeatureVectorListPtr stratifiedOtherData = otherClassData->StratifyAmoungstClasses (numOfFolds); int32 numTrainExamples = stratifiedTrainData->QueueSize (); int32 numOtherExamples = stratifiedOtherData->QueueSize (); msgQueue = new MsgQueue ("AbundanceCorrectionStatsBuilder"); int32 lastFvInFold = -1; int32 firstFvInFold = 0; int32 firstOtherFvInFold = 0; int32 lastOtherFvInFold = -1; for (int32 foldNum = 0; foldNum < numOfFolds; ++foldNum) { firstFvInFold = lastFvInFold + 1; lastFvInFold = (numTrainExamples * (foldNum + 1) / numOfFolds) - 1; firstOtherFvInFold = lastOtherFvInFold + 1; lastOtherFvInFold = (numOtherExamples * (foldNum + 1) / numOfFolds) - 1; FeatureVectorListPtr trainData = new FeatureVectorList (fileDesc, false, log); FeatureVectorListPtr testData = new FeatureVectorList (fileDesc, false, log); for (int32 idx = 0; idx < numTrainExamples; ++idx) { FeatureVectorPtr fv = stratifiedTrainData->IdxToPtr (idx); if ((idx >= firstFvInFold) && (idx <= lastFvInFold)) testData->PushOnBack (fv); else trainData->PushOnBack (fv); } // Add OtherClass exampes to test data. for (int32 idx = firstOtherFvInFold; idx <= lastOtherFvInFold; ++idx) { FeatureVectorPtr fv = stratifiedOtherData->IdxToPtr (idx); testData->PushOnBack (fv); } RunLogPtr threadRunLog = new RunLog (); threadRunLog->AttachMsgQueue (msgQueue); KKStr threadName = "AbundanceCorrFold" + StrFormatInt (foldNum, "00"); TrainTestThreadPtr thread = new TrainTestThread ("Fold_" + StrFormatInt (foldNum, "00"), this, config, allClasses, trainData, // Will take ownesrship and delete in its destructor. trainLibDataClasses, testData, // Will take ownesrship and delete in its destructor. otherClass, threadName, msgQueue, // Will take ownesrship and delete in its destructor. threadRunLog ); queueReady->PushOnBack (thread); } delete stratifiedOtherData; stratifiedOtherData = NULL; delete stratifiedTrainData; stratifiedTrainData = NULL; } /* CreateInitialThreadInstaces */