Ejemplo n.º 1
0
void  Strip ()
{
  bool  cancelFlag  = false;
  bool  successful  = false;
  bool  changesMade = false;

  RunLog  log;

  FeatureFileIOPtr driver =  FeatureFileIO::FileFormatFromStr ("C45");

  MLClassList  mlClasses;
  FeatureVectorListPtr  data = 
        driver->LoadFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedDataNorm.data",
                                 mlClasses,
                                 -1,
                                 cancelFlag,
                                 successful,
                                 changesMade,
                                 log
                               );

  FeatureVectorListPtr  stripped = new FeatureVectorList (data->FileDesc (), false);

  FeatureVectorList::const_iterator  idx;
  for  (idx = data->begin ();  idx != data->end ();  ++idx)
  {
    FeatureVectorPtr  fv = *idx;
    KKStr  fn = fv->ExampleFileName ();
    if  (fn.StartsWith ("SML")  ||  (fn.StartsWith ("SMP")))
    {
    }
    else
    {
      stripped->PushOnBack (fv);
    }
  }


  kkuint32  numExamplesWritten = 90;
  driver->SaveFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedData1209.data",
                           data->AllFeatures (), 
                           *stripped, 
                           numExamplesWritten,
                           cancelFlag,
                           successful,
                           log
                          );



}
Ejemplo n.º 2
0
void   RandomSplitJobManager::RetrieveRandomSplit (int                    splitNum,
                                                   FeatureVectorListPtr&  trainData,
                                                   FeatureVectorListPtr&  testData
                                                  )
{
  trainData = NULL;
  testData  = NULL;

  if  ((splitNum < 0)  ||  (splitNum >= (int)splits->NumOfOrderings ()))
  {
    log.Level (-1) << endl << endl
                   << "RandomSplitJobManager::RetrieveRandomSplit    ***ERROR***   Invalid SplitNum[" << splitNum << "]" << endl
                   << endl;
    return;
  }
  
  const
  FeatureVectorListPtr  ordering = splits->Ordering (splitNum);

  trainData = ordering->ManufactureEmptyList (false);
  testData  = ordering->ManufactureEmptyList (false);

  MLClassList::const_iterator  classIDX;
  for  (classIDX = mlClasses->begin ();  classIDX != mlClasses->end ();  classIDX++)
  {
    MLClassPtr  ic = *classIDX;

    FeatureVectorListPtr  examplesThisClass = ordering->ExtractExamplesForAGivenClass (ic);
    int  numTrainExamplesNeeded = (int)(0.5 + (double)(examplesThisClass->QueueSize ()) * (double)splitFraction);

    int  numExamplesAddToTrainSet = 0;

    FeatureVectorList::const_iterator  idx;
    for  (idx = examplesThisClass->begin ();  idx != examplesThisClass->end ();  idx++)
    {
      FeatureVectorPtr  example = *idx;

      if  (numExamplesAddToTrainSet < numTrainExamplesNeeded)
      {
        trainData->PushOnBack (example);
        numExamplesAddToTrainSet++;
      }
      else
      {
        testData->PushOnBack (example);
      }
    }
  }
}  /* RetrieveRandomSplit */
Ejemplo n.º 3
0
FeatureVectorListPtr  FeatureEncoder::EncodeAllExamples (const FeatureVectorListPtr  srcData)
{
  FileDescConstPtr  encodedFileDesc = CreateEncodedFileDesc (NULL);

  FeatureVectorListPtr  encodedExamples = new FeatureVectorList (encodedFileDesc, true);

  FeatureVectorList::const_iterator  idx;

  for  (idx = srcData->begin ();  idx !=  srcData->end ();   idx++)
  {
    const FeatureVectorPtr srcExample = *idx;
    FeatureVectorPtr  encodedExample = EncodeAExample (encodedFileDesc, srcExample);
    encodedExamples->PushOnBack (encodedExample);
  }

  return  encodedExamples;
}  /* EncodeAllExamples */
Ejemplo n.º 4
0
FeatureVectorListPtr  FeatureEncoder2::EncodeAllExamples (const FeatureVectorListPtr  srcData)
{
  FeatureVectorListPtr  encodedExamples = new FeatureVectorList (encodedFileDesc, 
                                                                  true                  // Will own the contents 
                                                                );

  FeatureVectorList::const_iterator  idx;

  for  (idx = srcData->begin ();  idx !=  srcData->end ();   idx++)
  {
    const FeatureVectorPtr srcExample = *idx;
    FeatureVectorPtr  encodedExample = EncodeAExample (srcExample);
    encodedExamples->PushOnBack (encodedExample);
  }

  return  encodedExamples;
}  /* EncodeAllImages */
Ejemplo n.º 5
0
kkint32  FeatureEncoder::DetermineNumberOfNeededXspaceNodes (FeatureVectorListPtr   src)  const
{
  kkint32  xSpaceNodesNeeded = 0;
  FeatureVectorList::const_iterator  idx;
  for  (idx = src->begin ();  idx != src->end ();  ++idx)
  {
    FeatureVectorPtr fv = *idx;
    const float*  featureData = fv->FeatureData ();

    for  (kkint32 x = 0;  x < numOfFeatures; x++)
    {
      float  featureVal = featureData [srcFeatureNums[x]];
      kkint32  y = destFeatureNums[x];
  
      switch (destWhatToDo[x])
      {
      case  FeWhatToDo::FeAsIs:
        if  (featureVal != 0.0)
          xSpaceNodesNeeded++;
        break;

      case  FeWhatToDo::FeBinary:
        for  (kkint32 z = 0; z < cardinalityDest[x]; z++)
        {
          float  bVal = ((kkint32)featureVal == z);
          if  (bVal != 0.0)
            xSpaceNodesNeeded++;
          y++;
         }
         break;

      case  FeWhatToDo::FeScale:
         if  (featureVal != (float)0.0)
           xSpaceNodesNeeded++;
         break;
      }
    }
    xSpaceNodesNeeded++;
  }

  return xSpaceNodesNeeded;
}  /* DetermineNumberOfNeededXspaceNodes */
void  AbundanceCorrectionStatsBuilder::RemoveDuplicateImages ()
{
  *report << endl << endl;

  FeatureVectorListPtr  allExamples = new FeatureVectorList (fileDesc, 
                                                             false,    // 'false' = will not own contents.
                                                             log
                                                            ); 
  allExamples->AddQueue (*trainLibData);
  allExamples->AddQueue (*otherClassData);
  
  DuplicateImages  dupChecker (allExamples, log);
  if  (dupChecker.DuplicatesFound ())
  {
    *report << "DUPLICATE IMAGES DETECTED." << endl;
    dupChecker.ReportDuplicates (*report);
    
    FeatureVectorListPtr  dups = dupChecker.ListOfExamplesToDelete ();
    if  (dups)
    {
      FeatureVectorList::iterator  idx;
      for  (idx = dups->begin ();  idx != dups->end ();  ++idx)
      {
        FeatureVectorPtr  fv = *idx;
        trainLibData->DeleteEntry (fv);
        otherClassData->DeleteEntry (fv);
      }
      delete  dups;
      dups = NULL;
    }
  }
  else
  {
    *report << "No duplicates detected." << endl;
  }

  delete  allExamples;  allExamples = NULL;

  *report << endl << endl;
}  /* RemoveDuplicateImages */
Ejemplo n.º 7
0
void  SplitForestCoverFile ()
{
  RunLog  log;

  MLClassConstList  mlClasses;  
  bool  cancelFlag  = false;
  bool  successful;
  bool  changesMade = false;

  FeatureVectorListPtr  images = FeatureFileIOC45::Driver ()->LoadFeatureFile 
                    ("covtype_alpha.data", mlClasses, -1, cancelFlag, successful, changesMade, log);

  FileDescPtr  fileDesc = images->FileDesc ();

  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();

  MLClassConstPtr  lodgepolePine = mlClasses.GetMLClassPtr ("Lodgepole_Pine");
  MLClassConstPtr  spruceFir     = mlClasses.GetMLClassPtr ("Spruce_Fir");

  int  lodgepolePineTrainCount = 0;
  int  spruceFirTrainCount     = 0;
  FeatureVectorList::iterator  idx;

  FeatureVectorListPtr  trainData = new FeatureVectorList (fileDesc, false, log, 10000);
  FeatureVectorListPtr  testData  = new FeatureVectorList (fileDesc, false, log, 10000);

  int  c = 0;

  for  (idx = images->begin ();  idx != images->end ();  idx++)
  {
    FeatureVectorPtr i = *idx;

    if  (c % 5000)
      cout << c << endl;

    if  (i->MLClass () == lodgepolePine)
    {
      if  (lodgepolePineTrainCount < 56404)
      {
        trainData->PushOnBack (i);
        lodgepolePineTrainCount++;
      }
      else
      {
        testData->PushOnBack (i);
      }
    }
    else if  (i->MLClass () == spruceFir)
    {
      if  (spruceFirTrainCount < 42480)
      {
        trainData->PushOnBack (i);
        spruceFirTrainCount++;
      }
      else
      {
        testData->PushOnBack (i);
      }
    }

    c++;
  }

  KKU::uint  numExamplesWritten = 0;
  FeatureFileIOC45::Driver ()->SaveFeatureFile 
                  ("CovType_TwoClass.data", 
                   trainData->AllFeatures (),
                   *trainData, 
                   numExamplesWritten,
                   cancelFlag,
                   successful,
                   log
                  );

  FeatureFileIOC45::Driver ()->SaveFeatureFile 
                  ("CovType_TwoClass.test", 
                   testData->AllFeatures (),
                   *testData, 
                   numExamplesWritten,
                   cancelFlag,
                   successful,
                   log
                  );

  delete  trainData;
  delete  testData;
  delete  images;
}  /* SplitForestCoverFile */
Ejemplo n.º 8
0
FeatureVectorListPtr  FeatureFileIO::LoadInSubDirectoryTree 
                         (FactoryFVProducerPtr  _fvProducerFactory,
                          KKStr                 _rootDir,
                          MLClassList&          _mlClasses,
                          bool                  _useDirectoryNameForClassName,
                          VolConstBool&         _cancelFlag, 
                          bool                  _rewiteRootFeatureFile,
                          RunLog&               _log
                         )
{
  _log.Level (10) << "FeatureFileIO::LoadInSubDirectoryTree    rootDir[" << _rootDir << "]." << endl;

  osAddLastSlash (_rootDir);

  KKStr  featureFileName ("");
  KKStr  fullFeatureFileName ("");

  if  (!_rootDir.Empty ())
  {
    featureFileName = osGetRootNameOfDirectory (_rootDir) + ".data";
    fullFeatureFileName = _rootDir + featureFileName;
  }
  else
  {
    featureFileName     = "Root.data";
    fullFeatureFileName = "Root.data";
  }

  MLClassPtr  unKnownClass = _mlClasses.GetUnKnownClass ();
  if  (_useDirectoryNameForClassName)
  {
    KKStr className = MLClass::GetClassNameFromDirName (_rootDir);
    unKnownClass    = _mlClasses.GetMLClassPtr (className);
  }

  bool  changesMade = false;

  FeatureVectorListPtr  dirImages = NULL;

  if  (_rewiteRootFeatureFile)
  {
    DateTime  timeStamp;
    dirImages = FeatureDataReSink (_fvProducerFactory,
                                   _rootDir,
                                   featureFileName,
                                   unKnownClass,
                                   _useDirectoryNameForClassName,
                                   _mlClasses,
                                   _cancelFlag,
                                   changesMade,
                                   timeStamp,
                                   _log
                                  );
    if  (_useDirectoryNameForClassName)
    {
      FeatureVectorList::iterator  idx;
      for  (idx = dirImages->begin ();  idx != dirImages->end ();  idx++)
      {
        if  ((*idx)->MLClass () != unKnownClass)
        {
          (*idx)->MLClass (unKnownClass);
          changesMade = true;
        }
      }

      if  (changesMade)
      {
        KKStr  fullFileName = osAddSlash (_rootDir) + featureFileName;
        kkuint32  numExamplesWritten = 0;
        bool  cancel     = false;
        bool  successful = false;
        SaveFeatureFile (fullFileName, 
                         dirImages->AllFeatures (), 
                         *dirImages, 
                         numExamplesWritten,
                         cancel,
                         successful,
                         _log
                        );
      }
    }
  }
  else
  {
    dirImages =  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  // Now that we have processed all image files in "rootDir",
  // lets process any sub-directories.

  KKStr  dirSearchPath = osAddSlash (_rootDir) + "*.*";

  KKStrListPtr  subDirectories = osGetListOfDirectories (dirSearchPath);
  if  (subDirectories)
  {
    KKStrList::iterator  idx;

    for  (idx = subDirectories->begin ();  (idx != subDirectories->end ()  &&  (!_cancelFlag));   idx++)
    {
      KKStr  subDirName (**idx);
      if  (subDirName == "BorderImages")
      {
        // We ignore this director
        continue;
      }

      KKStr  newDirPath = osAddSlash (_rootDir) + subDirName;

      FeatureVectorListPtr  subDirExamples = LoadInSubDirectoryTree (_fvProducerFactory,
                                                                     newDirPath, 
                                                                     _mlClasses, 
                                                                     _useDirectoryNameForClassName, 
                                                                     _cancelFlag,
                                                                     true,     // true = ReWriteRootFeatureFile
                                                                     _log
                                                                    );
      osAddLastSlash (subDirName);

      // We want to add the directory path to the ExampleFileName so that we can later locate the source image.
      for  (auto fv: *subDirExamples)
      {
        KKStr  newImageFileName = subDirName + fv->ExampleFileName ();
        fv->ExampleFileName (newImageFileName);
      }

      dirImages->AddQueue (*subDirExamples);
      subDirExamples->Owner (false);
      delete  subDirExamples;
      subDirExamples = NULL;
    }

    delete  subDirectories;  subDirectories = NULL;
  }

  _log.Level (10) << "LoadInSubDirectoryTree - Done" << endl;

  return  dirImages;
}  /* LoadInSubDirectoryTree */
/**
 * @brief Will run M number of Train then Test passes.
 *        
 * @param[in] numExamplsToUseForTraining The number examples in each ordering(group)
 *            that are to be used for training, the remaining examples will be 
 *            used as test data.
 */
void  CrossValidationMxN::RunTrainAndTest (kkuint32  numExamplsToUseForTraining,
                                           RunLog&   log
                                          )
{ 
  CleanUpMemory ();

  meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ()));

  kkuint32  cvIDX = 0;

  MLClassListPtr  mlClasses = orderings->MLClasses ();

  for  (cvIDX = 0;  cvIDX < numOfOrderings;  cvIDX++)
  {
    FeatureVectorListPtr  data = orderings->Ordering (cvIDX);

    FeatureVectorList  trainingData (fileDesc, false);
    FeatureVectorList  testData     (fileDesc, false);

    FeatureVectorList::iterator  fvIDX;

    for  (fvIDX = data->begin ();  fvIDX != data->end ();  fvIDX++)
    {
      FeatureVectorPtr example = *fvIDX;

      if  (trainingData.QueueSize () < numExamplsToUseForTraining)
        trainingData.PushOnBack (example);
      else
        testData.PushOnBack (example);
    }

    CrossValidationPtr  cv = new CrossValidation (config,
                                                  &trainingData,
                                                  mlClasses,
                                                  numOfFolds,
                                                  false,
                                                  fileDesc,
                                                  log,
                                                  cancelFlag
                                                 );

    cv->RunValidationOnly (&testData, 
                           NULL,        // No McNemars test going to be performed.
                           log
                          );

    accuracies.push_back    (cv->Accuracy       ());
    supportPoints.push_back ((float)cv->NumOfSupportVectors ());
    trainingTimes.push_back (cv->TrainTimeTotal ());
    testTimes.push_back     (cv->TestTimeTotal  ());

    meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log);

    delete  cv;
  }

  CalcMeanAndStdDev (accuracies,      accuracyMean,       accuracyStdDev);
  CalcMeanAndStdDev (supportPoints,   supportPointsMean,  supportPointsStdDev);
  CalcMeanAndStdDev (trainingTimes,   trainingTimeMean,   trainingTimeStdDev);
  CalcMeanAndStdDev (testTimes,       testTimeMean,       testTimeStdDev);

  double  factor = 1.0 / (double)numOfOrderings;

  meanConfusionMatrix->FactorCounts (factor);
}  /* RunTrainAndTest */
Ejemplo n.º 10
0
void  GradeClassification::GradeExamplesAgainstGroundTruth (FeatureVectorListPtr  examplesToGrade,
                                                            FeatureVectorListPtr  groundTruth
                                                           )
{
  log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth" << endl;

  groundTruth->SortByRootName ();

  MLClassConstPtr  unknownClass = mlClasses->GetUnKnownClass ();

  MLClassConstListPtr classes = NULL;
  {
    MLClassConstListPtr examplesToGradeClasses = examplesToGrade->ExtractMLClassConstList ();
    MLClassConstListPtr groundTruthClasses     = groundTruth->ExtractMLClassConstList ();
    classes = MLClassConstList::MergeClassList (*examplesToGradeClasses, *groundTruthClasses);
    delete  examplesToGradeClasses;
    delete  groundTruthClasses;
  }

  uint16  maxHierarchialLevel = 0;
  {
    MLClassConstList::iterator  idx;
    for  (idx = classes->begin ();  idx != classes->end ();  idx++)
    {
      MLClassConstPtr  c = *idx;
      maxHierarchialLevel = Max (maxHierarchialLevel, c->NumHierarchialLevels ());
    }
  }

  // Create ConfusionMatrix objects for each posible level of Hierarchy.  The 'resultsSummary' vector will 
  // end up owning the instances of 'ConfusionMatrix2' and th edestructr will be responable for deleting them.
  uint  curLevel = 0;
  vector<ConfusionMatrix2Ptr>  cmByLevel;
  for  (curLevel = 0;  curLevel < maxHierarchialLevel;  curLevel++)
  {
    MLClassConstListPtr  classesThisLevel = classes->ExtractListOfClassesForAGivenHierarchialLevel (curLevel);
    ConfusionMatrix2Ptr  cm = new ConfusionMatrix2 (*classesThisLevel);
    cmByLevel.push_back (cm);
  }

  ConfusionMatrix2  cm (*classes);


  ImageFeaturesList::iterator  idx;

  for  (idx = examplesToGrade->begin ();  idx !=  examplesToGrade->end ();  idx++)
  {
    ImageFeaturesPtr  exampleToGrade = *idx;
    MLClassConstPtr  predictedClass = exampleToGrade->MLClass ();
    float          origSize       = exampleToGrade->OrigSize ();
    float          probability    = exampleToGrade->Probability ();

    KKStr  rootName = osGetRootName (exampleToGrade->ImageFileName ());
    FeatureVectorPtr  groundTruthExample = groundTruth->LookUpByRootName (rootName);
    MLClassConstPtr  groundTruthClass = unknownClass;
    if  (groundTruthExample)
      groundTruthClass = groundTruthExample->MLClass ();

    cm.Increment (groundTruthClass, predictedClass, (int)origSize, probability, log);

    for  (curLevel = 0;  curLevel < maxHierarchialLevel;  curLevel++)
    {
      MLClassConstPtr  groundTruthClasssThisLevel = groundTruthClass->MLClassForGivenHierarchialLevel (curLevel);
      MLClassConstPtr  predictedClassThisLevel    = predictedClass->MLClassForGivenHierarchialLevel   (curLevel);

      cmByLevel[curLevel]->Increment (groundTruthClasssThisLevel, predictedClassThisLevel, (int)origSize, probability, log);
    }
  }    


  //cm.PrintTrueFalsePositivesTabDelimited (*report);

  {
    // report Hierarchial results
    for  (curLevel = 0;  curLevel < maxHierarchialLevel;  curLevel++)
    {
      log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth   Printing Level[" << curLevel << "]" << endl;
      *report << endl << endl << endl
              << "Confusion Matrix   Training Level[" << maxHierarchialLevel << "]       Preduction Level[" << (curLevel + 1) << "]" << endl
              << endl;
      cmByLevel[curLevel]->PrintConfusionMatrixTabDelimited (*report);
      resultsSummary.push_back (SummaryRec (maxHierarchialLevel, curLevel + 1, cmByLevel[curLevel]));
    }

    *report << endl << endl << endl;
  }

  log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth     Exiting"  << endl;
}  /* GradeExamplesAgainstGroundTruth */
Ejemplo n.º 11
0
void  GradeClassification::GradeUsingTrainingConfiguration ()
{
  log.Level (10) << "GradeClassification::GradeUsingTrainingConfiguration" << endl;

  delete  mlClasses;
  mlClasses = config->ExtractClassList ();

  bool  changesMadeToTrainingLibraries = false;

  KKU::DateTime  latestImageTimeStamp;

  log.Level (10) << "GradeUsingTrainingConfiguration  Loading Training Data." << endl;

  FeatureVectorListPtr  trainingData = config->LoadFeatureDataFromTrainingLibraries (latestImageTimeStamp, changesMadeToTrainingLibraries, cancelFlag);
  if  (!trainingData)
  {
    log.Level (-1) << endl << endl << endl
                   << "GradedlClassification::GradeUsingTrainingConfiguration      ***ERROR***" << endl
                   << endl
                   << "               Could not load training data file Configuration File[" << configFileName << "]" << endl
                   << endl
                   << endl;
    Abort (true);
    return;
  }

  uint  maxLevelsOfHiearchy = config->NumHierarchialLevels ();
  uint  hierarchyLevel = 0;

  while  (hierarchyLevel < maxLevelsOfHiearchy)
  {
    log.Level (10) << "GradeUsingTrainingConfiguration    Hierarchy Level[" << hierarchyLevel << "]" << endl;

    TrainingConfiguration2Ptr  configThisLevel = config->GenerateAConfiguraionForAHierarchialLevel (hierarchyLevel);

    FeatureVectorListPtr  trainingDataThisLevel = trainingData->ExtractExamplesForHierarchyLevel (hierarchyLevel);
    FeatureVectorListPtr  groundTruthThisLevel  = groundTruth->ExtractExamplesForHierarchyLevel (hierarchyLevel);
    FeatureVectorListPtr  groundTruthThisLevelClassified  = new FeatureVectorList (*groundTruthThisLevel, true);

    KKStr  statusMessage;

    TrainingProcess2 trainer (configThisLevel,
                              trainingDataThisLevel,
                              NULL,                               // No report file,
                              trainingDataThisLevel->FileDesc (),
                              log,
                              false,                              // false = features are not already normalized.
                              cancelFlag,
                              statusMessage
                             );
    trainer.CreateModelsFromTrainingData ();

    {
      Classifier2 classifier (&trainer, log);
      FeatureVectorList::iterator  idx;

      for  (idx = groundTruthThisLevelClassified->begin ();  idx != groundTruthThisLevelClassified->end ();  idx++)
      {
        FeatureVectorPtr  fv = *idx;
        MLClassConstPtr  ic = classifier.ClassifyAImage (*fv);
        fv->MLClass (ic);
      }
    }

    GradeExamplesAgainstGroundTruth (groundTruthThisLevelClassified, groundTruthThisLevel);

    delete  groundTruthThisLevelClassified;  groundTruthThisLevelClassified = NULL;
    delete  groundTruthThisLevel;            groundTruthThisLevel           = NULL;
    delete  trainingDataThisLevel;           trainingDataThisLevel          = NULL;

    hierarchyLevel++;
  }

  ReportResults ();

  delete  trainingData;
}  /* GradeUsingTrainingConfiguration */