Exemplo n.º 1
0
void  Strip ()
{
  bool  cancelFlag  = false;
  bool  successful  = false;
  bool  changesMade = false;

  RunLog  log;

  FeatureFileIOPtr driver =  FeatureFileIO::FileFormatFromStr ("C45");

  MLClassList  mlClasses;
  FeatureVectorListPtr  data = 
        driver->LoadFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedDataNorm.data",
                                 mlClasses,
                                 -1,
                                 cancelFlag,
                                 successful,
                                 changesMade,
                                 log
                               );

  FeatureVectorListPtr  stripped = new FeatureVectorList (data->FileDesc (), false);

  FeatureVectorList::const_iterator  idx;
  for  (idx = data->begin ();  idx != data->end ();  ++idx)
  {
    FeatureVectorPtr  fv = *idx;
    KKStr  fn = fv->ExampleFileName ();
    if  (fn.StartsWith ("SML")  ||  (fn.StartsWith ("SMP")))
    {
    }
    else
    {
      stripped->PushOnBack (fv);
    }
  }


  kkuint32  numExamplesWritten = 90;
  driver->SaveFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedData1209.data",
                           data->AllFeatures (), 
                           *stripped, 
                           numExamplesWritten,
                           cancelFlag,
                           successful,
                           log
                          );



}
Exemplo n.º 2
0
int   RandomSampleJobList::DetermineCompressedImageCount (FeatureVectorListPtr       trainData,
                                                          TrainingConfigurationPtr   config
                                                         )
{
  FileDescPtr  fileDesc = trainData->FileDesc ();
  FeatureVectorListPtr  srcImages = trainData->DuplicateListAndContents ();
  FeatureVectorListPtr  imagesToTrain = new FeatureVectorList (fileDesc, false, log, 10000);

  MLClassListPtr mlClasses = srcImages->ExtractListOfClasses ();

  {
    MLClassList::const_iterator  idx;

    for  (idx = mlClasses->begin ();  idx != mlClasses->end ();  idx++)
    {
      MLClassPtr  mlClass = *idx;
      FeatureVectorListPtr  imagesThisClass = srcImages->ExtractImagesForAGivenClass (mlClass);
      imagesToTrain->AddQueue (*imagesThisClass);
      delete  imagesThisClass;
    }
  }

  NormalizationParms  normParms (config, *imagesToTrain, log);
  normParms.NormalizeImages (imagesToTrain);

  ClassAssignments  classAssignments (*mlClasses, log);
  FeatureVectorListPtr  compressedImageList = new FeatureVectorList (fileDesc, true, log, 10000);

  BitReduction br (config->SVMparamREF (), fileDesc, trainData->AllFeatures ());

  CompressionStats compressionStats 
              = br.compress (*imagesToTrain, 
                             compressedImageList, 
                             classAssignments
                            );


  int  compressedImageCount = compressionStats.num_images_after;

  log.Level (10) << "DetermineCompressedImageCount  compressedImageCount[" << compressedImageCount << "]" << endl;

  delete  compressedImageList;  compressedImageList = NULL;
  delete  mlClasses;         mlClasses        = NULL;
  delete  imagesToTrain;        imagesToTrain       = NULL;
  delete  srcImages;            srcImages           = NULL;

  return compressedImageCount;
}  /* DetermineCompressedImageCount */
Exemplo n.º 3
0
void  SplitForestCoverFile ()
{
  RunLog  log;

  MLClassConstList  mlClasses;  
  bool  cancelFlag  = false;
  bool  successful;
  bool  changesMade = false;

  FeatureVectorListPtr  images = FeatureFileIOC45::Driver ()->LoadFeatureFile 
                    ("covtype_alpha.data", mlClasses, -1, cancelFlag, successful, changesMade, log);

  FileDescPtr  fileDesc = images->FileDesc ();

  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();

  MLClassConstPtr  lodgepolePine = mlClasses.GetMLClassPtr ("Lodgepole_Pine");
  MLClassConstPtr  spruceFir     = mlClasses.GetMLClassPtr ("Spruce_Fir");

  int  lodgepolePineTrainCount = 0;
  int  spruceFirTrainCount     = 0;
  FeatureVectorList::iterator  idx;

  FeatureVectorListPtr  trainData = new FeatureVectorList (fileDesc, false, log, 10000);
  FeatureVectorListPtr  testData  = new FeatureVectorList (fileDesc, false, log, 10000);

  int  c = 0;

  for  (idx = images->begin ();  idx != images->end ();  idx++)
  {
    FeatureVectorPtr i = *idx;

    if  (c % 5000)
      cout << c << endl;

    if  (i->MLClass () == lodgepolePine)
    {
      if  (lodgepolePineTrainCount < 56404)
      {
        trainData->PushOnBack (i);
        lodgepolePineTrainCount++;
      }
      else
      {
        testData->PushOnBack (i);
      }
    }
    else if  (i->MLClass () == spruceFir)
    {
      if  (spruceFirTrainCount < 42480)
      {
        trainData->PushOnBack (i);
        spruceFirTrainCount++;
      }
      else
      {
        testData->PushOnBack (i);
      }
    }

    c++;
  }

  KKU::uint  numExamplesWritten = 0;
  FeatureFileIOC45::Driver ()->SaveFeatureFile 
                  ("CovType_TwoClass.data", 
                   trainData->AllFeatures (),
                   *trainData, 
                   numExamplesWritten,
                   cancelFlag,
                   successful,
                   log
                  );

  FeatureFileIOC45::Driver ()->SaveFeatureFile 
                  ("CovType_TwoClass.test", 
                   testData->AllFeatures (),
                   *testData, 
                   numExamplesWritten,
                   cancelFlag,
                   successful,
                   log
                  );

  delete  trainData;
  delete  testData;
  delete  images;
}  /* SplitForestCoverFile */
Exemplo n.º 4
0
void   MergeFeatureFiles::Main ()
{
  if  (Abort ())
    return;

  {
    // Will first load source files.
    uint  srcIdx = 0;
    for  (srcIdx = 0;  srcIdx < srcFileNames.size ();  srcIdx++)
    {
      bool  cancelFlag  = false;
      bool  successful  = false;
      bool  changesMade = false;

      KKStr             srcFileName = srcFileNames[srcIdx];
      FeatureFileIOPtr  srcFormat   = srcFormats  [srcIdx];
      
      MLClassList   classes;
      
      FeatureVectorListPtr  s = NULL;

      s = srcFormat->LoadFeatureFile (srcFileName, classes, -1, cancelFlag, successful, changesMade, log);

      if  ((s == NULL)  ||  (!successful))
      {
        log.Level (-1) << endl << endl
                       << "MergeFeatureFiles::Main   ***ERROR***    Could not load file[" << srcFileName << "]" << endl
                       << endl;
        delete  s;  s = NULL;
        Abort (true);
        return;
      }

      if  (!srcData)
        srcData = new FeatureVectorList (s->FileDesc (), true);

      if  ((*(s->FileDesc ())) != (*(srcData->FileDesc ())))
      {
        // The last source file read does not have the same attribute sets as previous feature files already read.
        log.Level (-1) << endl << endl
                       << "MergeFeatureFiles::Main   ***ERROR***    Feature File[" << srcFileName << "] does not have the same Attributes as previous feature files already read in." << endl
                       << endl;
        Abort (true);
        delete  s;  s = NULL;
        return;
      }
      
      srcData->AddQueue (*s);
      s->Owner (false);
      delete  s;
    }
  }


  if  (!Abort ())
  {
    if  (randomize)
    {
      DateTime  dt = osGetLocalDateTime ();
      srcData->RandomizeOrder (dt.Seconds ());
      srcData->RandomizeOrder ();
    }

    if  (stratify)
    {
      FeatureVectorListPtr  stratifiedSrc = srcData->StratifyAmoungstClasses (numOfFolds, log);
      srcData->Owner (false);
      stratifiedSrc->Owner (true);
      delete  srcData;
      srcData = stratifiedSrc;
      stratifiedSrc = NULL;
    }

    bool  cancelFlag  = false;
    bool  successful  = false;

    uint  numExamplesWritten = 0;
    destFormat->SaveFeatureFile (destFileName, srcData->AllFeatures (), *srcData, numExamplesWritten, cancelFlag, successful, log);
    if  (!successful)
    {
      log.Level (-1) << endl << endl
                     << "MergeFeatureFiles::Main   ***ERROR***    Could not save to file[" << destFileName << "]" << endl
                     << endl;
      Abort (true);
    }
  }
}  /* Main */
Exemplo n.º 5
0
FeatureVectorListPtr  FeatureFileIO::FeatureDataReSink (FactoryFVProducerPtr  _fvProducerFactory,
                                                        const KKStr&          _dirName,
                                                        const KKStr&          _fileName, 
                                                        MLClassPtr            _unknownClass,
                                                        bool                  _useDirectoryNameForClassName,
                                                        MLClassList&          _mlClasses,
                                                        VolConstBool&         _cancelFlag,
                                                        bool&                 _changesMade,
                                                        KKB::DateTime&        _timeStamp,
                                                        RunLog&               _log
                                                      )
{
  _changesMade = false;
  _timeStamp = DateTime ();

  if  (_unknownClass == NULL)
    _unknownClass = MLClass::GetUnKnownClassStatic ();

  KKStr  className = _unknownClass->Name ();

  _log.Level (10) << "FeatureFileIO::FeatureDataReSink  dirName: " << _dirName << endl
                  << "               fileName: " << _fileName << "  UnKnownClass: " << className << endl;

  KKStr  fullFeatureFileName = osAddSlash (_dirName) +  _fileName;

  bool  successful = true;

  KKStr fileNameToOpen;
  if  (_dirName.Empty ())
    fileNameToOpen = _fileName;
  else
    fileNameToOpen = osAddSlash (_dirName) + _fileName;

  bool  versionsAreSame = false;

  FeatureVectorListPtr  origFeatureVectorData 
        = LoadFeatureFile (fileNameToOpen, _mlClasses, -1, _cancelFlag, successful, _changesMade, _log);

  if  (origFeatureVectorData == NULL)
  {
    successful = false;
    origFeatureVectorData = _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  if  (_cancelFlag)
  {
    delete  origFeatureVectorData;  origFeatureVectorData = NULL;
    return  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  FeatureVectorListPtr  origFeatureData = NULL;

  if  (successful  &&
       (&typeid (*origFeatureVectorData) == _fvProducerFactory->FeatureVectorListTypeId ())  &&
       ((*(origFeatureVectorData->FileDesc ())) ==  (*(_fvProducerFactory->FileDesc ())))
      )
  {
     origFeatureData = origFeatureVectorData;
  }
  else
  {
    origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true);
    delete  origFeatureVectorData;
    origFeatureVectorData = NULL;
  }

  KKStr  fileSpec = osAddSlash (_dirName) + "*.*";
  KKStrListPtr   fileNameList = osGetListOfFiles (fileSpec);

  if  (!fileNameList)
  {
    // There are no Image Files,  so we need to return a Empty List of Image Features.

    if  (origFeatureData->QueueSize () > 0)
      _changesMade = true;

    delete  origFeatureData;  origFeatureData = NULL;

    return  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  FeatureVectorProducerPtr  fvProducer = _fvProducerFactory->ManufactureInstance (_log);

  if  (successful)
  {
    if  (origFeatureData->Version () == fvProducer->Version ())
    {
      versionsAreSame = true;
      _timeStamp = osGetFileDateTime (fileNameToOpen);
    }

    else
    {
      _changesMade = true;
    }
  }
  else
  {
    delete  origFeatureData;
    origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  origFeatureData->SortByRootName (false);

  FeatureVectorListPtr  extractedFeatures = _fvProducerFactory->ManufacturFeatureVectorList (true);
  extractedFeatures->Version (fvProducer->Version ());

  fileNameList->Sort (false);

  KKStrList::iterator  fnIDX;
  fnIDX = fileNameList->begin ();   // fileNameList

  KKStrPtr  imageFileName;

  kkuint32  numImagesFoundInOrigFeatureData = 0;
  kkuint32  numOfNewFeatureExtractions = 0;

  for  (fnIDX = fileNameList->begin ();  (fnIDX != fileNameList->end ())  &&  (!_cancelFlag);  ++fnIDX)
  {
    imageFileName = *fnIDX;

    // pv414-_002_20140414-162243_02068814-1261.bmp
    KKStr  rootName = osGetRootName (*imageFileName);
    if  (rootName == "pv414-_002_20140414-162243_02068814-1261")
      cout << "Stop Here." << endl;

    bool validImageFileFormat = SupportedImageFileFormat (*imageFileName);
    
    if  (!validImageFileFormat)
      continue;

    bool  featureVectorCoputaionSuccessful = false;

    FeatureVectorPtr  origFV = origFeatureData->BinarySearchByName (*imageFileName);
    if  (origFV)
      numImagesFoundInOrigFeatureData++;

    if  (origFV  &&  versionsAreSame)
    {
      featureVectorCoputaionSuccessful = true;
      if  (_useDirectoryNameForClassName)
      {
        if  (origFV->MLClass () != _unknownClass)
        {
          _changesMade = true;
          origFV->MLClass (_unknownClass);
        }
      }

      else if  ((origFV->MLClass ()->UnDefined ())  &&  (origFV->MLClass () != _unknownClass))
      {
        _changesMade = true;
        origFV->MLClass (_unknownClass);
      }

      extractedFeatures->PushOnBack (origFV);
      origFeatureData->DeleteEntry (origFV);
    }
    else
    {
      // We either  DON'T have an original image    or    versions are not the same.

      KKStr  fullFileName = osAddSlash (_dirName) + (*imageFileName);
      FeatureVectorPtr fv = NULL;
      try
      {
        RasterPtr image = ReadImage (fullFileName);
        if  (image)
          fv = fvProducer->ComputeFeatureVector (*image, _unknownClass, NULL, 1.0f, _log);
        delete image;
        image = NULL;
        if  (fv)
          featureVectorCoputaionSuccessful = true;
        else
          featureVectorCoputaionSuccessful = false;
      }
      catch  (...)
      {
        _log.Level (-1) << endl << endl
          << "FeatureDataReSink   ***ERROR***"  << endl
          << "       Exception occurred calling constructor 'ComputeFeatureVector'." << endl
          << endl;
        featureVectorCoputaionSuccessful = false;
        fv = NULL;
      }

      if  (!featureVectorCoputaionSuccessful)
      {
        _log.Level (-1) << " FeatureFileIOKK::FeatureDataReSink  *** ERROR ***, Processing Image File["
                       << imageFileName << "]."
                       << endl;
        delete  fv;
        fv = NULL;
      }

      else
      {
        _changesMade = true;
        fv->ExampleFileName (*imageFileName);
        _log.Level (30) << fv->ExampleFileName () << "  " << fv->OrigSize () << endl;
        extractedFeatures->PushOnBack (fv);
        numOfNewFeatureExtractions++;

        if  ((numOfNewFeatureExtractions % 100) == 0)
          cout << numOfNewFeatureExtractions << " Images Extracted." << endl;
      }
    }
  }

  if  (numImagesFoundInOrigFeatureData != extractedFeatures->QueueSize ())
    _changesMade = true;
  
  extractedFeatures->Version (fvProducer->Version ());

  if  ((_changesMade)  &&  (!_cancelFlag))
  {
    //extractedFeatures->WriteImageFeaturesToFile (fullFeatureFileName, RawFormat, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()));

    kkuint32  numExamplesWritten = 0;

    SaveFeatureFile (fullFeatureFileName,  
                     FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()),
                     *extractedFeatures,
                     numExamplesWritten,
                     _cancelFlag,
                     successful,
                     _log
                    );

    _timeStamp = osGetLocalDateTime ();
  }

  delete fvProducer;       fvProducer      = NULL;
  delete fileNameList;     fileNameList    = NULL;
  delete origFeatureData;  origFeatureData = NULL;

  _log.Level (10) << "FeatureDataReSink  Exiting  Dir: "  << _dirName << endl;

  return  extractedFeatures;
}  /* FeatureDataReSink */
Exemplo n.º 6
0
void  RandomSampleJob::EvaluteNode (FeatureVectorListPtr  validationData,
                                    MLClassListPtr     classes
                                   )
{
  log.Level (9) << "  " << endl;
  log.Level (9) << "  " << endl;
  log.Level (9) << "RandomSampleJob::EvaluteNode JobId[" << jobId << "] Ordering[" << orderingNum << "]" << endl;

  status = rjStarted;

  config->CompressionMethod (BRnoCompression);
  config->KernalType        (kernelType);
  config->EncodingMethod    (encodingMethod);
  config->C_Param           (c);
  config->Gamma             (gamma);

  FileDescPtr fileDesc = config->FileDesc ();


  const FeatureVectorListPtr  srcExamples = orderings->Ordering (orderingNum);

  if  (numExamplesToKeep > srcExamples->QueueSize ())
  {
    log.Level (-1) << endl << endl << endl
                   << "RandomSampleJob::EvaluteNode     *** ERROR ***    RandomExamples to large" << endl
                   << endl
                   << "                     RandomExamples > num in Training set." << endl
                   << endl;
    osWaitForEnter ();
    exit (-1);
  }



  FeatureVectorListPtr  trainingData = new FeatureVectorList (srcExamples->FileDesc (), false, log, 10000);
  for  (int x = 0;  x < numExamplesToKeep;  x++)
  {
    trainingData->PushOnBack (srcExamples->IdxToPtr (x));
  }

  bool  allClassesRepresented = true;
  {
    MLClassListPtr  classesInRandomSample = trainingData->ExtractListOfClasses ();
    if  (*classesInRandomSample != (*classes))
    {
      log.Level (-1) << endl << endl
                     << "RandomSampling    *** ERROR ***" << endl
                     << endl
                     << "                  Missing Classes From Random Sample." << endl
                     << endl
                     << "MLClasses[" << classes->ToCommaDelimitedStr               () << "]" << endl
                     << "Found       [" << classesInRandomSample->ToCommaDelimitedStr () << "]" << endl
                     << endl;

       allClassesRepresented = false;

    }

    delete  classesInRandomSample;  classesInRandomSample = NULL;
  }


  //if  (!allClassesRepresented)
  //{
  //  accuracy  = 0.0;
  //  trainTime = 0.0;
  //  testTime  = 0.0;
  //}
  //else
  {
    delete  crossValidation;  crossValidation = NULL;

    compMethod = config->CompressionMethod ();

    bool  cancelFlag = false;

    crossValidation = new CrossValidation 
                              (config,
                               trainingData,
                               classes,
                               10,
                               false,   //  False = Features are not normalized already.
                               trainingData->FileDesc (),
                               log,
                               cancelFlag
                              );

    crossValidation->RunValidationOnly (validationData, NULL);

    accuracy  = crossValidation->Accuracy ();
    trainTime = crossValidation->TrainTimeMean ();
    testTime  = crossValidation->TestTimeMean ();
    supportVectors = crossValidation->SupportPointsMean ();
  }

  delete  trainingData;

  status = rjDone;
}  /* EvaluteNode */
Exemplo n.º 7
0
void  GradeClassification::GradeUsingTrainingConfiguration ()
{
  log.Level (10) << "GradeClassification::GradeUsingTrainingConfiguration" << endl;

  delete  mlClasses;
  mlClasses = config->ExtractClassList ();

  bool  changesMadeToTrainingLibraries = false;

  KKU::DateTime  latestImageTimeStamp;

  log.Level (10) << "GradeUsingTrainingConfiguration  Loading Training Data." << endl;

  FeatureVectorListPtr  trainingData = config->LoadFeatureDataFromTrainingLibraries (latestImageTimeStamp, changesMadeToTrainingLibraries, cancelFlag);
  if  (!trainingData)
  {
    log.Level (-1) << endl << endl << endl
                   << "GradedlClassification::GradeUsingTrainingConfiguration      ***ERROR***" << endl
                   << endl
                   << "               Could not load training data file Configuration File[" << configFileName << "]" << endl
                   << endl
                   << endl;
    Abort (true);
    return;
  }

  uint  maxLevelsOfHiearchy = config->NumHierarchialLevels ();
  uint  hierarchyLevel = 0;

  while  (hierarchyLevel < maxLevelsOfHiearchy)
  {
    log.Level (10) << "GradeUsingTrainingConfiguration    Hierarchy Level[" << hierarchyLevel << "]" << endl;

    TrainingConfiguration2Ptr  configThisLevel = config->GenerateAConfiguraionForAHierarchialLevel (hierarchyLevel);

    FeatureVectorListPtr  trainingDataThisLevel = trainingData->ExtractExamplesForHierarchyLevel (hierarchyLevel);
    FeatureVectorListPtr  groundTruthThisLevel  = groundTruth->ExtractExamplesForHierarchyLevel (hierarchyLevel);
    FeatureVectorListPtr  groundTruthThisLevelClassified  = new FeatureVectorList (*groundTruthThisLevel, true);

    KKStr  statusMessage;

    TrainingProcess2 trainer (configThisLevel,
                              trainingDataThisLevel,
                              NULL,                               // No report file,
                              trainingDataThisLevel->FileDesc (),
                              log,
                              false,                              // false = features are not already normalized.
                              cancelFlag,
                              statusMessage
                             );
    trainer.CreateModelsFromTrainingData ();

    {
      Classifier2 classifier (&trainer, log);
      FeatureVectorList::iterator  idx;

      for  (idx = groundTruthThisLevelClassified->begin ();  idx != groundTruthThisLevelClassified->end ();  idx++)
      {
        FeatureVectorPtr  fv = *idx;
        MLClassConstPtr  ic = classifier.ClassifyAImage (*fv);
        fv->MLClass (ic);
      }
    }

    GradeExamplesAgainstGroundTruth (groundTruthThisLevelClassified, groundTruthThisLevel);

    delete  groundTruthThisLevelClassified;  groundTruthThisLevelClassified = NULL;
    delete  groundTruthThisLevel;            groundTruthThisLevel           = NULL;
    delete  trainingDataThisLevel;           trainingDataThisLevel          = NULL;

    hierarchyLevel++;
  }

  ReportResults ();

  delete  trainingData;
}  /* GradeUsingTrainingConfiguration */