void Strip () { bool cancelFlag = false; bool successful = false; bool changesMade = false; RunLog log; FeatureFileIOPtr driver = FeatureFileIO::FileFormatFromStr ("C45"); MLClassList mlClasses; FeatureVectorListPtr data = driver->LoadFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedDataNorm.data", mlClasses, -1, cancelFlag, successful, changesMade, log ); FeatureVectorListPtr stripped = new FeatureVectorList (data->FileDesc (), false); FeatureVectorList::const_iterator idx; for (idx = data->begin (); idx != data->end (); ++idx) { FeatureVectorPtr fv = *idx; KKStr fn = fv->ExampleFileName (); if (fn.StartsWith ("SML") || (fn.StartsWith ("SMP"))) { } else { stripped->PushOnBack (fv); } } kkuint32 numExamplesWritten = 90; driver->SaveFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedData1209.data", data->AllFeatures (), *stripped, numExamplesWritten, cancelFlag, successful, log ); }
void FeatureEncoder::EncodeIntoSparseMatrix (FeatureVectorListPtr src, ClassAssignments& assignments, XSpacePtr& xSpace, kkint32& totalxSpaceUsed, struct svm_problem& prob, RunLog& log ) { FeatureVectorListPtr compressedExamples = NULL; FeatureVectorListPtr examplesToUseFoXSpace = NULL; kkint32 xSpaceUsed = 0; totalxSpaceUsed = 0; examplesToUseFoXSpace = src; kkint32 numOfExamples = examplesToUseFoXSpace->QueueSize (); //kkint32 elements = numOfExamples * xSpaceNeededPerExample; prob.l = numOfExamples; prob.y = (double*)malloc (prob.l * sizeof (double)); prob.x = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*)); prob.index = new kkint32[prob.l]; prob.exampleNames.clear (); kkint32 numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace); kkint32 totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node); // I added '10' to elements because I am paranoid xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded); if (xSpace == NULL) { log.Level (-1) << endl << endl << endl << " FeatureEncoder::Compress *** Failed to allocates space for 'xSpace' ****" << endl << endl << " Space needed [" << totalBytesForxSpaceNeeded << "]" << endl << " Num of Examples [" << numOfExamples << "]" << endl << " Num XSpaceNodesNeeded [" << numNeededXspaceNodes << "]" << endl << endl; // we sill have to allocate space for each individual training example separately. //throw "FeatureEncoder::Compress Allocation of memory for xSpace Failed."; } prob.W = NULL; kkint32 i = 0; FeatureVectorPtr example = NULL; MLClassPtr lastMlClass = NULL; kkint16 lastClassNum = -1; kkint32 bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node); for (i = 0; i < prob.l; i++) { if (totalxSpaceUsed > numNeededXspaceNodes) { log.Level (-1) << endl << endl << "FeatureEncoder::Compress ***ERROR*** We have exceeded the number of XSpace nodes allocated." << endl << endl; } example = examplesToUseFoXSpace->IdxToPtr (i); if (example->MLClass () != lastMlClass) { lastMlClass = example->MLClass (); lastClassNum = assignments.GetNumForClass (lastMlClass); } prob.y[i] = lastClassNum; prob.index[i] = i; prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ())); if (prob.W) { prob.W[i] = example->TrainWeight () * c_Param; if (example->TrainWeight () <= 0.0f) { log.Level (-1) << endl << "FeatureEncoder::EncodeIntoSparseMatrix ***ERROR*** Example[" << example->ExampleFileName () << "]" << endl << " has a TrainWeight value of 0 or less defaulting to 1.0" << endl << endl; prob.W[i] = 1.0 * c_Param; } } if (xSpace == NULL) { struct svm_node* xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample); prob.x[i] = xSpaceThisExample; EncodeAExample (example, prob.x[i], xSpaceUsed); if (xSpaceUsed < xSpaceNeededPerExample) { kkint32 bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node); struct svm_node* smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample); memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample); free (xSpaceThisExample); prob.x[i] = smallerXSpaceThisExample; } } else { prob.x[i] = &xSpace[totalxSpaceUsed]; EncodeAExample (example, prob.x[i], xSpaceUsed); } totalxSpaceUsed += xSpaceUsed; } delete compressedExamples; return; } /* Compress */
FeatureVectorListPtr FeatureFileIO::FeatureDataReSink (FactoryFVProducerPtr _fvProducerFactory, const KKStr& _dirName, const KKStr& _fileName, MLClassPtr _unknownClass, bool _useDirectoryNameForClassName, MLClassList& _mlClasses, VolConstBool& _cancelFlag, bool& _changesMade, KKB::DateTime& _timeStamp, RunLog& _log ) { _changesMade = false; _timeStamp = DateTime (); if (_unknownClass == NULL) _unknownClass = MLClass::GetUnKnownClassStatic (); KKStr className = _unknownClass->Name (); _log.Level (10) << "FeatureFileIO::FeatureDataReSink dirName: " << _dirName << endl << " fileName: " << _fileName << " UnKnownClass: " << className << endl; KKStr fullFeatureFileName = osAddSlash (_dirName) + _fileName; bool successful = true; KKStr fileNameToOpen; if (_dirName.Empty ()) fileNameToOpen = _fileName; else fileNameToOpen = osAddSlash (_dirName) + _fileName; bool versionsAreSame = false; FeatureVectorListPtr origFeatureVectorData = LoadFeatureFile (fileNameToOpen, _mlClasses, -1, _cancelFlag, successful, _changesMade, _log); if (origFeatureVectorData == NULL) { successful = false; origFeatureVectorData = _fvProducerFactory->ManufacturFeatureVectorList (true); } if (_cancelFlag) { delete origFeatureVectorData; origFeatureVectorData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorListPtr origFeatureData = NULL; if (successful && (&typeid (*origFeatureVectorData) == _fvProducerFactory->FeatureVectorListTypeId ()) && ((*(origFeatureVectorData->FileDesc ())) == (*(_fvProducerFactory->FileDesc ()))) ) { origFeatureData = origFeatureVectorData; } else { origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); delete origFeatureVectorData; origFeatureVectorData = NULL; } KKStr fileSpec = osAddSlash (_dirName) + "*.*"; KKStrListPtr fileNameList = osGetListOfFiles (fileSpec); if (!fileNameList) { // There are no Image Files, so we need to return a Empty List of Image Features. if (origFeatureData->QueueSize () > 0) _changesMade = true; delete origFeatureData; origFeatureData = NULL; return _fvProducerFactory->ManufacturFeatureVectorList (true); } FeatureVectorProducerPtr fvProducer = _fvProducerFactory->ManufactureInstance (_log); if (successful) { if (origFeatureData->Version () == fvProducer->Version ()) { versionsAreSame = true; _timeStamp = osGetFileDateTime (fileNameToOpen); } else { _changesMade = true; } } else { delete origFeatureData; origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true); } origFeatureData->SortByRootName (false); FeatureVectorListPtr extractedFeatures = _fvProducerFactory->ManufacturFeatureVectorList (true); extractedFeatures->Version (fvProducer->Version ()); fileNameList->Sort (false); KKStrList::iterator fnIDX; fnIDX = fileNameList->begin (); // fileNameList KKStrPtr imageFileName; kkuint32 numImagesFoundInOrigFeatureData = 0; kkuint32 numOfNewFeatureExtractions = 0; for (fnIDX = fileNameList->begin (); (fnIDX != fileNameList->end ()) && (!_cancelFlag); ++fnIDX) { imageFileName = *fnIDX; // pv414-_002_20140414-162243_02068814-1261.bmp KKStr rootName = osGetRootName (*imageFileName); if (rootName == "pv414-_002_20140414-162243_02068814-1261") cout << "Stop Here." << endl; bool validImageFileFormat = SupportedImageFileFormat (*imageFileName); if (!validImageFileFormat) continue; bool featureVectorCoputaionSuccessful = false; FeatureVectorPtr origFV = origFeatureData->BinarySearchByName (*imageFileName); if (origFV) numImagesFoundInOrigFeatureData++; if (origFV && versionsAreSame) { featureVectorCoputaionSuccessful = true; if (_useDirectoryNameForClassName) { if (origFV->MLClass () != _unknownClass) { _changesMade = true; origFV->MLClass (_unknownClass); } } else if ((origFV->MLClass ()->UnDefined ()) && (origFV->MLClass () != _unknownClass)) { _changesMade = true; origFV->MLClass (_unknownClass); } extractedFeatures->PushOnBack (origFV); origFeatureData->DeleteEntry (origFV); } else { // We either DON'T have an original image or versions are not the same. KKStr fullFileName = osAddSlash (_dirName) + (*imageFileName); FeatureVectorPtr fv = NULL; try { RasterPtr image = ReadImage (fullFileName); if (image) fv = fvProducer->ComputeFeatureVector (*image, _unknownClass, NULL, 1.0f, _log); delete image; image = NULL; if (fv) featureVectorCoputaionSuccessful = true; else featureVectorCoputaionSuccessful = false; } catch (...) { _log.Level (-1) << endl << endl << "FeatureDataReSink ***ERROR***" << endl << " Exception occurred calling constructor 'ComputeFeatureVector'." << endl << endl; featureVectorCoputaionSuccessful = false; fv = NULL; } if (!featureVectorCoputaionSuccessful) { _log.Level (-1) << " FeatureFileIOKK::FeatureDataReSink *** ERROR ***, Processing Image File[" << imageFileName << "]." << endl; delete fv; fv = NULL; } else { _changesMade = true; fv->ExampleFileName (*imageFileName); _log.Level (30) << fv->ExampleFileName () << " " << fv->OrigSize () << endl; extractedFeatures->PushOnBack (fv); numOfNewFeatureExtractions++; if ((numOfNewFeatureExtractions % 100) == 0) cout << numOfNewFeatureExtractions << " Images Extracted." << endl; } } } if (numImagesFoundInOrigFeatureData != extractedFeatures->QueueSize ()) _changesMade = true; extractedFeatures->Version (fvProducer->Version ()); if ((_changesMade) && (!_cancelFlag)) { //extractedFeatures->WriteImageFeaturesToFile (fullFeatureFileName, RawFormat, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ())); kkuint32 numExamplesWritten = 0; SaveFeatureFile (fullFeatureFileName, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()), *extractedFeatures, numExamplesWritten, _cancelFlag, successful, _log ); _timeStamp = osGetLocalDateTime (); } delete fvProducer; fvProducer = NULL; delete fileNameList; fileNameList = NULL; delete origFeatureData; origFeatureData = NULL; _log.Level (10) << "FeatureDataReSink Exiting Dir: " << _dirName << endl; return extractedFeatures; } /* FeatureDataReSink */