Ejemplo n.º 1
0
FeatureVectorListPtr  FeatureEncoder::CreateEncodedFeatureVector (FeatureVectorList&  srcData)
{
  if  (srcData.AllFieldsAreNumeric ())
    return  srcData.DuplicateListAndContents ();

  FeatureVectorListPtr  encodedFeatureVectorList = new FeatureVectorList (destFileDesc, true);

  FeatureVectorList::iterator  idx;
  for  (idx = srcData.begin ();   idx != srcData.end ();  idx++)
  {
    FeatureVectorPtr  srcExample = *idx;
    XSpacePtr  encodedData = EncodeAExample (srcExample);

    kkint32  zed = 0;
    FeatureVectorPtr  encodedFeatureVector = new FeatureVector (codedNumOfFeatures);
    while  (encodedData[zed].index != -1)
    {
      encodedFeatureVector->AddFeatureData (encodedData[zed].index, (float)encodedData[zed].value);
      zed++;
    }

    encodedFeatureVector->MLClass (srcExample->MLClass ());
    encodedFeatureVectorList->PushOnBack (encodedFeatureVector);

    delete  encodedData;
    encodedData = NULL;
  }

  return  encodedFeatureVectorList;
}  /* CreateEncodedFeatureVector */
Ejemplo n.º 2
0
/**
 * @brief  Left over from BitReduction days; removed all code except that which processed the NO bit reduction option.
 * @param[in] examples_list The list of examples you want to attempt to reduce
 * @param[out] compressed_examples_list The reduced list of examples
 */
void  FeatureEncoder::CompressExamples (FeatureVectorListPtr    srcExamples,
                                        FeatureVectorListPtr    compressedExamples,
                                        ClassAssignments&       assignments
                                       )
{
  double time_before, time_after;
  time_before = osGetSystemTimeUsed ();
  compressedExamples->AddQueue (*srcExamples);
  time_after = osGetSystemTimeUsed ();
  compressedExamples->Owner (false);
  return;
}  /* CompressExamples */
Ejemplo n.º 3
0
FeatureVectorListPtr  FeatureEncoder::EncodeAllExamples (const FeatureVectorListPtr  srcData)
{
  FileDescConstPtr  encodedFileDesc = CreateEncodedFileDesc (NULL);

  FeatureVectorListPtr  encodedExamples = new FeatureVectorList (encodedFileDesc, true);

  FeatureVectorList::const_iterator  idx;

  for  (idx = srcData->begin ();  idx !=  srcData->end ();   idx++)
  {
    const FeatureVectorPtr srcExample = *idx;
    FeatureVectorPtr  encodedExample = EncodeAExample (encodedFileDesc, srcExample);
    encodedExamples->PushOnBack (encodedExample);
  }

  return  encodedExamples;
}  /* EncodeAllExamples */
Ejemplo n.º 4
0
void  FeatureFileIO::SaveFeatureFileMultipleParts (const KKStr&          _fileName, 
                                                   FeatureNumListConst&  _selFeatures,
                                                   FeatureVectorList&    _examples,
                                                   VolConstBool&         _cancelFlag,
                                                   bool&                 _successful,
                                                   RunLog&               _log
                                                  )
{
  kkuint32  numExamplesWritten = 0;
  SaveFeatureFile (_fileName, _selFeatures, _examples, numExamplesWritten, _cancelFlag, _successful, _log);

  if  (_cancelFlag  ||  (!_successful))
    return;

  if  (_examples.QueueSize () > 64000)
  {
    kkint32  numPartsNeeded = (_examples.QueueSize () / 64000);
    if  ((_examples.QueueSize () % 64000) > 0)
      numPartsNeeded++;

    kkuint32  maxPartSize = (_examples.QueueSize () / numPartsNeeded) + 1;

    kkint32  partNum = 0;
    FeatureVectorList::const_iterator idx = _examples.begin ();

    while  ((idx != _examples.end ())  &&  (_successful)  &&  (!_cancelFlag))
    {
      FeatureVectorListPtr  part = _examples.ManufactureEmptyList (false);

      while  ((idx != _examples.end ())  &&  (part->QueueSize () < maxPartSize))
      {
        part->PushOnBack (*idx);
        idx++;
      }

      KKStr  partFileName = osRemoveExtension (_fileName) + "-" + 
                            StrFormatInt (partNum, "00") + "." +
                            osGetFileExtension (_fileName);

      SaveFeatureFile (partFileName, _selFeatures, *part, numExamplesWritten, _cancelFlag, _successful, _log);

      partNum++;
      delete  part; part = NULL;
    }
  }
}  /* SaveFeatureFileMultipleParts */
Ejemplo n.º 5
0
FeatureVectorListPtr  FeatureEncoder2::EncodeAllExamples (const FeatureVectorListPtr  srcData)
{
  FeatureVectorListPtr  encodedExamples = new FeatureVectorList (encodedFileDesc, 
                                                                  true                  // Will own the contents 
                                                                );

  FeatureVectorList::const_iterator  idx;

  for  (idx = srcData->begin ();  idx !=  srcData->end ();   idx++)
  {
    const FeatureVectorPtr srcExample = *idx;
    FeatureVectorPtr  encodedExample = EncodeAExample (srcExample);
    encodedExamples->PushOnBack (encodedExample);
  }

  return  encodedExamples;
}  /* EncodeAllImages */
Ejemplo n.º 6
0
FeatureVectorListPtr  FeatureEncoder2::EncodedFeatureVectorList (const FeatureVectorList&  srcData)  const
{
  if  (srcData.AllFieldsAreNumeric ())
    return  srcData.DuplicateListAndContents ();

  FeatureVectorListPtr  encodedFeatureVectorList = new FeatureVectorList (encodedFileDesc, true);

  FeatureVectorList::const_iterator  idx;
  for  (idx = srcData.begin ();   idx != srcData.end ();  idx++)
  {
    FeatureVectorPtr  srcExample = *idx;
    FeatureVectorPtr  encodedFeatureVector = EncodeAExample (srcExample);
    encodedFeatureVector->MLClass (srcExample->MLClass ());
    encodedFeatureVectorList->PushOnBack (encodedFeatureVector);
  }

  return  encodedFeatureVectorList;
}  /* EncodedFeatureVectorList */
Ejemplo n.º 7
0
void   RandomSplitJobManager::RetrieveRandomSplit (int                    splitNum,
                                                   FeatureVectorListPtr&  trainData,
                                                   FeatureVectorListPtr&  testData
                                                  )
{
  trainData = NULL;
  testData  = NULL;

  if  ((splitNum < 0)  ||  (splitNum >= (int)splits->NumOfOrderings ()))
  {
    log.Level (-1) << endl << endl
                   << "RandomSplitJobManager::RetrieveRandomSplit    ***ERROR***   Invalid SplitNum[" << splitNum << "]" << endl
                   << endl;
    return;
  }
  
  const
  FeatureVectorListPtr  ordering = splits->Ordering (splitNum);

  trainData = ordering->ManufactureEmptyList (false);
  testData  = ordering->ManufactureEmptyList (false);

  MLClassList::const_iterator  classIDX;
  for  (classIDX = mlClasses->begin ();  classIDX != mlClasses->end ();  classIDX++)
  {
    MLClassPtr  ic = *classIDX;

    FeatureVectorListPtr  examplesThisClass = ordering->ExtractExamplesForAGivenClass (ic);
    int  numTrainExamplesNeeded = (int)(0.5 + (double)(examplesThisClass->QueueSize ()) * (double)splitFraction);

    int  numExamplesAddToTrainSet = 0;

    FeatureVectorList::const_iterator  idx;
    for  (idx = examplesThisClass->begin ();  idx != examplesThisClass->end ();  idx++)
    {
      FeatureVectorPtr  example = *idx;

      if  (numExamplesAddToTrainSet < numTrainExamplesNeeded)
      {
        trainData->PushOnBack (example);
        numExamplesAddToTrainSet++;
      }
      else
      {
        testData->PushOnBack (example);
      }
    }
  }
}  /* RetrieveRandomSplit */
Ejemplo n.º 8
0
kkint32  FeatureEncoder::DetermineNumberOfNeededXspaceNodes (FeatureVectorListPtr   src)  const
{
  kkint32  xSpaceNodesNeeded = 0;
  FeatureVectorList::const_iterator  idx;
  for  (idx = src->begin ();  idx != src->end ();  ++idx)
  {
    FeatureVectorPtr fv = *idx;
    const float*  featureData = fv->FeatureData ();

    for  (kkint32 x = 0;  x < numOfFeatures; x++)
    {
      float  featureVal = featureData [srcFeatureNums[x]];
      kkint32  y = destFeatureNums[x];
  
      switch (destWhatToDo[x])
      {
      case  FeWhatToDo::FeAsIs:
        if  (featureVal != 0.0)
          xSpaceNodesNeeded++;
        break;

      case  FeWhatToDo::FeBinary:
        for  (kkint32 z = 0; z < cardinalityDest[x]; z++)
        {
          float  bVal = ((kkint32)featureVal == z);
          if  (bVal != 0.0)
            xSpaceNodesNeeded++;
          y++;
         }
         break;

      case  FeWhatToDo::FeScale:
         if  (featureVal != (float)0.0)
           xSpaceNodesNeeded++;
         break;
      }
    }
    xSpaceNodesNeeded++;
  }

  return xSpaceNodesNeeded;
}  /* DetermineNumberOfNeededXspaceNodes */
Ejemplo n.º 9
0
int   RandomSampleJobList::DetermineCompressedImageCount (FeatureVectorListPtr       trainData,
                                                          TrainingConfigurationPtr   config
                                                         )
{
  FileDescPtr  fileDesc = trainData->FileDesc ();
  FeatureVectorListPtr  srcImages = trainData->DuplicateListAndContents ();
  FeatureVectorListPtr  imagesToTrain = new FeatureVectorList (fileDesc, false, log, 10000);

  MLClassListPtr mlClasses = srcImages->ExtractListOfClasses ();

  {
    MLClassList::const_iterator  idx;

    for  (idx = mlClasses->begin ();  idx != mlClasses->end ();  idx++)
    {
      MLClassPtr  mlClass = *idx;
      FeatureVectorListPtr  imagesThisClass = srcImages->ExtractImagesForAGivenClass (mlClass);
      imagesToTrain->AddQueue (*imagesThisClass);
      delete  imagesThisClass;
    }
  }

  NormalizationParms  normParms (config, *imagesToTrain, log);
  normParms.NormalizeImages (imagesToTrain);

  ClassAssignments  classAssignments (*mlClasses, log);
  FeatureVectorListPtr  compressedImageList = new FeatureVectorList (fileDesc, true, log, 10000);

  BitReduction br (config->SVMparamREF (), fileDesc, trainData->AllFeatures ());

  CompressionStats compressionStats 
              = br.compress (*imagesToTrain, 
                             compressedImageList, 
                             classAssignments
                            );


  int  compressedImageCount = compressionStats.num_images_after;

  log.Level (10) << "DetermineCompressedImageCount  compressedImageCount[" << compressedImageCount << "]" << endl;

  delete  compressedImageList;  compressedImageList = NULL;
  delete  mlClasses;         mlClasses        = NULL;
  delete  imagesToTrain;        imagesToTrain       = NULL;
  delete  srcImages;            srcImages           = NULL;

  return compressedImageCount;
}  /* DetermineCompressedImageCount */
Ejemplo n.º 10
0
void  FeatureFileConverter::EncodeFeatureData ()
{
  bool  successful = false;

  ModelParamKnn  param (srcFileDesc, log);

  param.EncodingMethod    (encodingMethod);
  param.CompressionMethod (ModelParam::BRnoCompression);
  if  (features)
    param.SelectedFeatures  (*features);

  FeatureEncoder2  encoder (param, srcFileDesc, log);
  
  // We do the next line to generate a report of the encoded field assignments.
  *report << endl;
  FileDescPtr  encodedFileDesc = encoder.CreateEncodedFileDesc (report);

  NormalizeExamples (param, *data);

  FeatureVectorListPtr  featureVectorEncodedData = encoder.EncodeAllExamples (data);

  uint  numExamplesWritten = 0;
  destFileFormat->SaveFeatureFile (destFileName,
                                   featureVectorEncodedData->AllFeatures (),
                                   *featureVectorEncodedData,
                                   numExamplesWritten,
                                   cancelFlag,
                                   successful,
                                   log
                                  );

  // FileDesc objects never get deleted until very end of application; "FileDesc::FinalCleanUp" will delete it.
  //delete  encodedFileDesc;  encodedFileDesc = NULL;

  delete  featureVectorEncodedData;      featureVectorEncodedData = NULL;
}  /* EncodeFeatureData */
Ejemplo n.º 11
0
void  Strip ()
{
  bool  cancelFlag  = false;
  bool  successful  = false;
  bool  changesMade = false;

  RunLog  log;

  FeatureFileIOPtr driver =  FeatureFileIO::FileFormatFromStr ("C45");

  MLClassList  mlClasses;
  FeatureVectorListPtr  data = 
        driver->LoadFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedDataNorm.data",
                                 mlClasses,
                                 -1,
                                 cancelFlag,
                                 successful,
                                 changesMade,
                                 log
                               );

  FeatureVectorListPtr  stripped = new FeatureVectorList (data->FileDesc (), false);

  FeatureVectorList::const_iterator  idx;
  for  (idx = data->begin ();  idx != data->end ();  ++idx)
  {
    FeatureVectorPtr  fv = *idx;
    KKStr  fn = fv->ExampleFileName ();
    if  (fn.StartsWith ("SML")  ||  (fn.StartsWith ("SMP")))
    {
    }
    else
    {
      stripped->PushOnBack (fv);
    }
  }


  kkuint32  numExamplesWritten = 90;
  driver->SaveFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedData1209.data",
                           data->AllFeatures (), 
                           *stripped, 
                           numExamplesWritten,
                           cancelFlag,
                           successful,
                           log
                          );



}
Ejemplo n.º 12
0
void  NormalizeAllValidatdData ()
{
  MLClassConstList  classes;
  bool  _cancelFlag = false;
  bool  _successful = false;
  bool  _changesMade = false;

  RunLog log;

  FeatureVectorListPtr  fd =   FeatureFileIOC45::Driver ()->LoadFeatureFile 
    ("C:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages\\AllValidatedImages.data",
     classes,
     -1,
     _cancelFlag,
     _successful,
     _changesMade,
     log
    );

  uint32  numWritten = 0;

  NormalizationParms parms (true, *fd, log);
  parms.NormalizeImages (fd);
  FeatureFileIOC45::Driver ()->SaveFeatureFile 
    ("C:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages\\AllValidatedImagesNorm.data",
     fd->AllFeatures (),
     *fd,
     numWritten,
     _cancelFlag,
     _successful,
     log
    );

  parms.Save ("C:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages\\AllValidatedImagesNorm.parms.txt", _successful);

}  /* NormalizeAllValidatdData */
void  AbundanceCorrectionStatsBuilder::RemoveDuplicateImages ()
{
  *report << endl << endl;

  FeatureVectorListPtr  allExamples = new FeatureVectorList (fileDesc, 
                                                             false,    // 'false' = will not own contents.
                                                             log
                                                            ); 
  allExamples->AddQueue (*trainLibData);
  allExamples->AddQueue (*otherClassData);
  
  DuplicateImages  dupChecker (allExamples, log);
  if  (dupChecker.DuplicatesFound ())
  {
    *report << "DUPLICATE IMAGES DETECTED." << endl;
    dupChecker.ReportDuplicates (*report);
    
    FeatureVectorListPtr  dups = dupChecker.ListOfExamplesToDelete ();
    if  (dups)
    {
      FeatureVectorList::iterator  idx;
      for  (idx = dups->begin ();  idx != dups->end ();  ++idx)
      {
        FeatureVectorPtr  fv = *idx;
        trainLibData->DeleteEntry (fv);
        otherClassData->DeleteEntry (fv);
      }
      delete  dups;
      dups = NULL;
    }
  }
  else
  {
    *report << "No duplicates detected." << endl;
  }

  delete  allExamples;  allExamples = NULL;

  *report << endl << endl;
}  /* RemoveDuplicateImages */
Ejemplo n.º 14
0
FeatureVectorListPtr  FeatureFileIO::FeatureDataReSink (FactoryFVProducerPtr  _fvProducerFactory,
                                                        const KKStr&          _dirName,
                                                        const KKStr&          _fileName, 
                                                        MLClassPtr            _unknownClass,
                                                        bool                  _useDirectoryNameForClassName,
                                                        MLClassList&          _mlClasses,
                                                        VolConstBool&         _cancelFlag,
                                                        bool&                 _changesMade,
                                                        KKB::DateTime&        _timeStamp,
                                                        RunLog&               _log
                                                      )
{
  _changesMade = false;
  _timeStamp = DateTime ();

  if  (_unknownClass == NULL)
    _unknownClass = MLClass::GetUnKnownClassStatic ();

  KKStr  className = _unknownClass->Name ();

  _log.Level (10) << "FeatureFileIO::FeatureDataReSink  dirName: " << _dirName << endl
                  << "               fileName: " << _fileName << "  UnKnownClass: " << className << endl;

  KKStr  fullFeatureFileName = osAddSlash (_dirName) +  _fileName;

  bool  successful = true;

  KKStr fileNameToOpen;
  if  (_dirName.Empty ())
    fileNameToOpen = _fileName;
  else
    fileNameToOpen = osAddSlash (_dirName) + _fileName;

  bool  versionsAreSame = false;

  FeatureVectorListPtr  origFeatureVectorData 
        = LoadFeatureFile (fileNameToOpen, _mlClasses, -1, _cancelFlag, successful, _changesMade, _log);

  if  (origFeatureVectorData == NULL)
  {
    successful = false;
    origFeatureVectorData = _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  if  (_cancelFlag)
  {
    delete  origFeatureVectorData;  origFeatureVectorData = NULL;
    return  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  FeatureVectorListPtr  origFeatureData = NULL;

  if  (successful  &&
       (&typeid (*origFeatureVectorData) == _fvProducerFactory->FeatureVectorListTypeId ())  &&
       ((*(origFeatureVectorData->FileDesc ())) ==  (*(_fvProducerFactory->FileDesc ())))
      )
  {
     origFeatureData = origFeatureVectorData;
  }
  else
  {
    origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true);
    delete  origFeatureVectorData;
    origFeatureVectorData = NULL;
  }

  KKStr  fileSpec = osAddSlash (_dirName) + "*.*";
  KKStrListPtr   fileNameList = osGetListOfFiles (fileSpec);

  if  (!fileNameList)
  {
    // There are no Image Files,  so we need to return a Empty List of Image Features.

    if  (origFeatureData->QueueSize () > 0)
      _changesMade = true;

    delete  origFeatureData;  origFeatureData = NULL;

    return  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  FeatureVectorProducerPtr  fvProducer = _fvProducerFactory->ManufactureInstance (_log);

  if  (successful)
  {
    if  (origFeatureData->Version () == fvProducer->Version ())
    {
      versionsAreSame = true;
      _timeStamp = osGetFileDateTime (fileNameToOpen);
    }

    else
    {
      _changesMade = true;
    }
  }
  else
  {
    delete  origFeatureData;
    origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  origFeatureData->SortByRootName (false);

  FeatureVectorListPtr  extractedFeatures = _fvProducerFactory->ManufacturFeatureVectorList (true);
  extractedFeatures->Version (fvProducer->Version ());

  fileNameList->Sort (false);

  KKStrList::iterator  fnIDX;
  fnIDX = fileNameList->begin ();   // fileNameList

  KKStrPtr  imageFileName;

  kkuint32  numImagesFoundInOrigFeatureData = 0;
  kkuint32  numOfNewFeatureExtractions = 0;

  for  (fnIDX = fileNameList->begin ();  (fnIDX != fileNameList->end ())  &&  (!_cancelFlag);  ++fnIDX)
  {
    imageFileName = *fnIDX;

    // pv414-_002_20140414-162243_02068814-1261.bmp
    KKStr  rootName = osGetRootName (*imageFileName);
    if  (rootName == "pv414-_002_20140414-162243_02068814-1261")
      cout << "Stop Here." << endl;

    bool validImageFileFormat = SupportedImageFileFormat (*imageFileName);
    
    if  (!validImageFileFormat)
      continue;

    bool  featureVectorCoputaionSuccessful = false;

    FeatureVectorPtr  origFV = origFeatureData->BinarySearchByName (*imageFileName);
    if  (origFV)
      numImagesFoundInOrigFeatureData++;

    if  (origFV  &&  versionsAreSame)
    {
      featureVectorCoputaionSuccessful = true;
      if  (_useDirectoryNameForClassName)
      {
        if  (origFV->MLClass () != _unknownClass)
        {
          _changesMade = true;
          origFV->MLClass (_unknownClass);
        }
      }

      else if  ((origFV->MLClass ()->UnDefined ())  &&  (origFV->MLClass () != _unknownClass))
      {
        _changesMade = true;
        origFV->MLClass (_unknownClass);
      }

      extractedFeatures->PushOnBack (origFV);
      origFeatureData->DeleteEntry (origFV);
    }
    else
    {
      // We either  DON'T have an original image    or    versions are not the same.

      KKStr  fullFileName = osAddSlash (_dirName) + (*imageFileName);
      FeatureVectorPtr fv = NULL;
      try
      {
        RasterPtr image = ReadImage (fullFileName);
        if  (image)
          fv = fvProducer->ComputeFeatureVector (*image, _unknownClass, NULL, 1.0f, _log);
        delete image;
        image = NULL;
        if  (fv)
          featureVectorCoputaionSuccessful = true;
        else
          featureVectorCoputaionSuccessful = false;
      }
      catch  (...)
      {
        _log.Level (-1) << endl << endl
          << "FeatureDataReSink   ***ERROR***"  << endl
          << "       Exception occurred calling constructor 'ComputeFeatureVector'." << endl
          << endl;
        featureVectorCoputaionSuccessful = false;
        fv = NULL;
      }

      if  (!featureVectorCoputaionSuccessful)
      {
        _log.Level (-1) << " FeatureFileIOKK::FeatureDataReSink  *** ERROR ***, Processing Image File["
                       << imageFileName << "]."
                       << endl;
        delete  fv;
        fv = NULL;
      }

      else
      {
        _changesMade = true;
        fv->ExampleFileName (*imageFileName);
        _log.Level (30) << fv->ExampleFileName () << "  " << fv->OrigSize () << endl;
        extractedFeatures->PushOnBack (fv);
        numOfNewFeatureExtractions++;

        if  ((numOfNewFeatureExtractions % 100) == 0)
          cout << numOfNewFeatureExtractions << " Images Extracted." << endl;
      }
    }
  }

  if  (numImagesFoundInOrigFeatureData != extractedFeatures->QueueSize ())
    _changesMade = true;
  
  extractedFeatures->Version (fvProducer->Version ());

  if  ((_changesMade)  &&  (!_cancelFlag))
  {
    //extractedFeatures->WriteImageFeaturesToFile (fullFeatureFileName, RawFormat, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()));

    kkuint32  numExamplesWritten = 0;

    SaveFeatureFile (fullFeatureFileName,  
                     FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()),
                     *extractedFeatures,
                     numExamplesWritten,
                     _cancelFlag,
                     successful,
                     _log
                    );

    _timeStamp = osGetLocalDateTime ();
  }

  delete fvProducer;       fvProducer      = NULL;
  delete fileNameList;     fileNameList    = NULL;
  delete origFeatureData;  origFeatureData = NULL;

  _log.Level (10) << "FeatureDataReSink  Exiting  Dir: "  << _dirName << endl;

  return  extractedFeatures;
}  /* FeatureDataReSink */
Ejemplo n.º 15
0
void   FeatureFileConverter::ConvertData ()
{
  cout << endl
       << "Saving [" << data->QueueSize () << "] records to data file[" << destFileName << "]" << endl
       << endl;

  bool  successful = false;

  int  numOfFeatures = data->NumOfFeatures ();
  int  numWithAllZeros = 0;

  {
    FeatureVectorListPtr  newData  = new FeatureVectorList (srcFileDesc, true, log);

    // Will store examples that have all zero's for all features in "zeroData"
    // container.  This way they can be deleted from memory later and not result
    // in a memory leak.  This has to be done because they are not going to
    // be placed into newData which is going to become the owner of all the
    // examples.
    FeatureVectorListPtr  zeroData = new FeatureVectorList (srcFileDesc, true, log);

    // How many have all 0's for feature data.
    FeatureVectorList::iterator  idx;

    for  (idx = data->begin ();  idx != data->end ();  idx++)
    {
      FeatureVectorPtr  i = *idx;
      bool  allZeros = true;
      for  (int featureNum = 0;  featureNum < numOfFeatures;  featureNum++)
      {
        allZeros = (i->FeatureData (featureNum) == 0.0f);
        if  (!allZeros)
          break;
      }

      if  (allZeros)
      {
        numWithAllZeros++;
        zeroData->PushOnBack (i);
      }
      else
      {
        newData->PushOnBack (i);
      }
    }

    data->Owner (false);
    delete data;
    data = newData;
    delete  zeroData;
  }

  *report << endl
          << endl
          << "Num of data items with all zero feature data [" << numWithAllZeros << "]" << endl
          << endl;

  *report << data->ClassStatisticsStr ();
  *report << endl << endl << endl;

  if  (statistics)
  {
    *report << "Class Statistics:"  << endl;
    data->PrintClassStatistics (*report);
    *report << endl << endl;

    *report << "Feature Statistics:"  << endl;
    data->PrintFeatureStatisticsByClass (*report);
  }


  if  (enumerateClasses)
  {
    // We are going to change the name of the classes to numbers enumberated by className 

    MLClassConstListPtr  mlClasses = data->ExtractMLClassConstList ();
    mlClasses->SortByName ();

    MLClassConstListPtr  newClassNames = new MLClassConstList ();

    int classIdx = 0;
    MLClassConstList::iterator idx;
    for  (idx = mlClasses->begin ();  idx !=  mlClasses->end ();  idx++)
    {
      KKStr  newName = StrFormatInt (classIdx, "zzz0");
      MLClassConstPtr  mlClass = newClassNames->GetMLClassPtr (newName);
      classIdx++;
    }

    FeatureVectorList::iterator  idx2;
    for  (idx2 = data->begin ();  idx2 != data->end ();  idx2++)
    {
      MLClassConstPtr  c = (*idx2)->MLClass ();
      int  classIndex = mlClasses->PtrToIdx (c);
      (*idx2)->MLClass (newClassNames->IdxToPtr (classIndex));
    }

    delete  mlClasses; mlClasses = NULL;
    delete  newClassNames;  newClassNames = NULL;
  }

  if  (encodeFeatureData)
  {
    EncodeFeatureData ();
  }
  else
  {
    uint  numExamplesWritten = 0;
    destFileFormat->SaveFeatureFile (destFileName,
                                     *features,
                                     *data,
                                     numExamplesWritten,
                                     cancelFlag,
                                     successful,
                                     log
                                    );
  }

}  /* ConvertData */
Ejemplo n.º 16
0
void  SplitForestCoverFile ()
{
  RunLog  log;

  MLClassConstList  mlClasses;  
  bool  cancelFlag  = false;
  bool  successful;
  bool  changesMade = false;

  FeatureVectorListPtr  images = FeatureFileIOC45::Driver ()->LoadFeatureFile 
                    ("covtype_alpha.data", mlClasses, -1, cancelFlag, successful, changesMade, log);

  FileDescPtr  fileDesc = images->FileDesc ();

  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();
  images->RandomizeOrder ();

  MLClassConstPtr  lodgepolePine = mlClasses.GetMLClassPtr ("Lodgepole_Pine");
  MLClassConstPtr  spruceFir     = mlClasses.GetMLClassPtr ("Spruce_Fir");

  int  lodgepolePineTrainCount = 0;
  int  spruceFirTrainCount     = 0;
  FeatureVectorList::iterator  idx;

  FeatureVectorListPtr  trainData = new FeatureVectorList (fileDesc, false, log, 10000);
  FeatureVectorListPtr  testData  = new FeatureVectorList (fileDesc, false, log, 10000);

  int  c = 0;

  for  (idx = images->begin ();  idx != images->end ();  idx++)
  {
    FeatureVectorPtr i = *idx;

    if  (c % 5000)
      cout << c << endl;

    if  (i->MLClass () == lodgepolePine)
    {
      if  (lodgepolePineTrainCount < 56404)
      {
        trainData->PushOnBack (i);
        lodgepolePineTrainCount++;
      }
      else
      {
        testData->PushOnBack (i);
      }
    }
    else if  (i->MLClass () == spruceFir)
    {
      if  (spruceFirTrainCount < 42480)
      {
        trainData->PushOnBack (i);
        spruceFirTrainCount++;
      }
      else
      {
        testData->PushOnBack (i);
      }
    }

    c++;
  }

  KKU::uint  numExamplesWritten = 0;
  FeatureFileIOC45::Driver ()->SaveFeatureFile 
                  ("CovType_TwoClass.data", 
                   trainData->AllFeatures (),
                   *trainData, 
                   numExamplesWritten,
                   cancelFlag,
                   successful,
                   log
                  );

  FeatureFileIOC45::Driver ()->SaveFeatureFile 
                  ("CovType_TwoClass.test", 
                   testData->AllFeatures (),
                   *testData, 
                   numExamplesWritten,
                   cancelFlag,
                   successful,
                   log
                  );

  delete  trainData;
  delete  testData;
  delete  images;
}  /* SplitForestCoverFile */
Ejemplo n.º 17
0
void   JobValidation::EvaluateNode ()
{
  log.Level (9) << "  " << endl;
  log.Level (9) << "JobValidation::EvaluteNode JobId[" << jobId << "]" << endl;
  status = BinaryJobStatus::Started;

  bool  configFileFormatGood = true;
  
  TrainingConfiguration2Ptr  config = new TrainingConfiguration2 ();
  config->Load (configFileName, false, log);
  if  (!config->FormatGood ())
    configFileFormatGood;

  config->SetFeatureNums (features);
  config->C_Param (cParm);
  config->Gamma   (gammaParm);
  config->A_Param (aParm);
  config->SelectionMethod (processor->SelectionMethod ());
  
  switch  (processor->ResultType ())
  {
  case  FinalResultType::MfsFeaturesSel:
  case  FinalResultType::NoTuningAllFeatures:
  case  FinalResultType::MfsParmsTuned: 
  case  FinalResultType::MfsParmsTunedFeaturesSel: 
           config->MachineType (SVM_MachineType::OneVsOne);
           break;
    
  case  FinalResultType::BfsFeaturesSel:
  case  FinalResultType::BfsParmsTuned:
  case  FinalResultType::BfsFeaturesSelParmsTuned:
           config->MachineType (SVM_MachineType::BinaryCombos);
           break;
  }

  bool  cancelFlag = false;

  FeatureVectorListPtr  trainData       = processor->TrainingData ();
  FeatureVectorListPtr  validationData  = processor->ValidationData ();

  VectorDouble  trainDataMeans      = trainData->ExtractMeanFeatureValues ();
  VectorDouble  validationDataMeans = validationData->ExtractMeanFeatureValues ();


  CrossValidationPtr  crossValidation = new CrossValidation  
                                           (config,
                                            trainData,
                                            processor->MLClasses (),
                                            processor->NumOfFolds (),
                                            processor->AlreadyNormalized (),
                                            processor->FileDesc (),
                                            log,
                                            cancelFlag
                                           );

  delete  classedCorrectly;
  classedCorrectlySize = validationData->QueueSize ();
  classedCorrectly = new bool[classedCorrectlySize];

  crossValidation->RunValidationOnly (validationData, classedCorrectly, log);

  testAccuracy      = crossValidation->Accuracy ();
  testAccuracyNorm  = crossValidation->AccuracyNorm ();
  testAvgPredProb   = (float)crossValidation->AvgPredProb () * 100.0f;
  testFMeasure      = (float)crossValidation->ConfussionMatrix ()->FMeasure (processor->PositiveClass (), log);

  if  (processor->GradingMethod () == GradingMethodType::Accuracy)
    testGrade = testAccuracy;

  else if  (processor->GradingMethod () == GradingMethodType::AccuracyNorm)
    testGrade = testAccuracyNorm;

  else if  (processor->GradingMethod () == GradingMethodType::FMeasure)
    testGrade = testFMeasure;

  else
    testGrade = testAccuracy;

  testNumSVs  = crossValidation->NumOfSupportVectors ();

  {
    // Save results of this Split in Results file.
    processor->Block ();

    {
      uint  fn = 0;
      ofstream rl ("FinalResults.log", ios_base::app);
      rl << endl << endl
         << "ConfigFileName"          << "\t" << configFileName  << "\t" << "Format Good[" << (configFileFormatGood ? "Yes" : "No") << endl
         << "SummaryResultsFileName"  << "\t" << processor->SummaryResultsFileName () << endl
         << "Configuration CmdLine"   << "\t" << config->SVMparamREF (log).ToString ()   << endl
         << "ImagesPerClass"          << "\t" << config->ImagesPerClass ()            << endl
         << endl;

      rl << endl << endl
         << "Training Data Status" << endl
         << endl;
      trainData->PrintClassStatistics (rl);
      rl << endl << endl;


      rl << "TrainingDataMeans";
      for  (fn = 0;  fn < trainDataMeans.size ();  fn++)
        rl << "\t" << trainDataMeans[fn];
      rl << endl;

      rl << "ValidationDataMeans";
      for  (fn = 0;  fn < validationDataMeans.size ();  fn++)
        rl << "\t" << validationDataMeans[fn];
      rl << endl
         << endl;

      crossValidation->ConfussionMatrix ()->PrintConfusionMatrixTabDelimited (rl);
      rl << endl << endl << endl << endl;
      rl.close ();
    }

    {
      ofstream  f (processor->SummaryResultsFileName ().Str (), ios_base::app);
      ValidationResults r (processor->ResultType (), 
                           config, 
                           crossValidation,
                           trainData,
                           osGetHostName ().value_or ("*** unknown ***"),
                           classedCorrectlySize,
                           classedCorrectly,
                           this,
                           log
                          );
      r.Write (f);
      f.close ();
    }
    processor->EndBlock ();
  }

  delete  crossValidation;     crossValidation    = NULL;
  delete  config;              config = NULL;
  status = BinaryJobStatus::Done;
}  /* EvaluateNode */
Ejemplo n.º 18
0
void  GradeClassification::GradeUsingTrainingConfiguration ()
{
  log.Level (10) << "GradeClassification::GradeUsingTrainingConfiguration" << endl;

  delete  mlClasses;
  mlClasses = config->ExtractClassList ();

  bool  changesMadeToTrainingLibraries = false;

  KKU::DateTime  latestImageTimeStamp;

  log.Level (10) << "GradeUsingTrainingConfiguration  Loading Training Data." << endl;

  FeatureVectorListPtr  trainingData = config->LoadFeatureDataFromTrainingLibraries (latestImageTimeStamp, changesMadeToTrainingLibraries, cancelFlag);
  if  (!trainingData)
  {
    log.Level (-1) << endl << endl << endl
                   << "GradedlClassification::GradeUsingTrainingConfiguration      ***ERROR***" << endl
                   << endl
                   << "               Could not load training data file Configuration File[" << configFileName << "]" << endl
                   << endl
                   << endl;
    Abort (true);
    return;
  }

  uint  maxLevelsOfHiearchy = config->NumHierarchialLevels ();
  uint  hierarchyLevel = 0;

  while  (hierarchyLevel < maxLevelsOfHiearchy)
  {
    log.Level (10) << "GradeUsingTrainingConfiguration    Hierarchy Level[" << hierarchyLevel << "]" << endl;

    TrainingConfiguration2Ptr  configThisLevel = config->GenerateAConfiguraionForAHierarchialLevel (hierarchyLevel);

    FeatureVectorListPtr  trainingDataThisLevel = trainingData->ExtractExamplesForHierarchyLevel (hierarchyLevel);
    FeatureVectorListPtr  groundTruthThisLevel  = groundTruth->ExtractExamplesForHierarchyLevel (hierarchyLevel);
    FeatureVectorListPtr  groundTruthThisLevelClassified  = new FeatureVectorList (*groundTruthThisLevel, true);

    KKStr  statusMessage;

    TrainingProcess2 trainer (configThisLevel,
                              trainingDataThisLevel,
                              NULL,                               // No report file,
                              trainingDataThisLevel->FileDesc (),
                              log,
                              false,                              // false = features are not already normalized.
                              cancelFlag,
                              statusMessage
                             );
    trainer.CreateModelsFromTrainingData ();

    {
      Classifier2 classifier (&trainer, log);
      FeatureVectorList::iterator  idx;

      for  (idx = groundTruthThisLevelClassified->begin ();  idx != groundTruthThisLevelClassified->end ();  idx++)
      {
        FeatureVectorPtr  fv = *idx;
        MLClassConstPtr  ic = classifier.ClassifyAImage (*fv);
        fv->MLClass (ic);
      }
    }

    GradeExamplesAgainstGroundTruth (groundTruthThisLevelClassified, groundTruthThisLevel);

    delete  groundTruthThisLevelClassified;  groundTruthThisLevelClassified = NULL;
    delete  groundTruthThisLevel;            groundTruthThisLevel           = NULL;
    delete  trainingDataThisLevel;           trainingDataThisLevel          = NULL;

    hierarchyLevel++;
  }

  ReportResults ();

  delete  trainingData;
}  /* GradeUsingTrainingConfiguration */
/**
 * @brief Will run M number of Train then Test passes.
 *        
 * @param[in] numExamplsToUseForTraining The number examples in each ordering(group)
 *            that are to be used for training, the remaining examples will be 
 *            used as test data.
 */
void  CrossValidationMxN::RunTrainAndTest (kkuint32  numExamplsToUseForTraining,
                                           RunLog&   log
                                          )
{ 
  CleanUpMemory ();

  meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ()));

  kkuint32  cvIDX = 0;

  MLClassListPtr  mlClasses = orderings->MLClasses ();

  for  (cvIDX = 0;  cvIDX < numOfOrderings;  cvIDX++)
  {
    FeatureVectorListPtr  data = orderings->Ordering (cvIDX);

    FeatureVectorList  trainingData (fileDesc, false);
    FeatureVectorList  testData     (fileDesc, false);

    FeatureVectorList::iterator  fvIDX;

    for  (fvIDX = data->begin ();  fvIDX != data->end ();  fvIDX++)
    {
      FeatureVectorPtr example = *fvIDX;

      if  (trainingData.QueueSize () < numExamplsToUseForTraining)
        trainingData.PushOnBack (example);
      else
        testData.PushOnBack (example);
    }

    CrossValidationPtr  cv = new CrossValidation (config,
                                                  &trainingData,
                                                  mlClasses,
                                                  numOfFolds,
                                                  false,
                                                  fileDesc,
                                                  log,
                                                  cancelFlag
                                                 );

    cv->RunValidationOnly (&testData, 
                           NULL,        // No McNemars test going to be performed.
                           log
                          );

    accuracies.push_back    (cv->Accuracy       ());
    supportPoints.push_back ((float)cv->NumOfSupportVectors ());
    trainingTimes.push_back (cv->TrainTimeTotal ());
    testTimes.push_back     (cv->TestTimeTotal  ());

    meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log);

    delete  cv;
  }

  CalcMeanAndStdDev (accuracies,      accuracyMean,       accuracyStdDev);
  CalcMeanAndStdDev (supportPoints,   supportPointsMean,  supportPointsStdDev);
  CalcMeanAndStdDev (trainingTimes,   trainingTimeMean,   trainingTimeStdDev);
  CalcMeanAndStdDev (testTimes,       testTimeMean,       testTimeStdDev);

  double  factor = 1.0 / (double)numOfOrderings;

  meanConfusionMatrix->FactorCounts (factor);
}  /* RunTrainAndTest */
Ejemplo n.º 20
0
void  GradeClassification::GradeExamplesAgainstGroundTruth (FeatureVectorListPtr  examplesToGrade,
                                                            FeatureVectorListPtr  groundTruth
                                                           )
{
  log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth" << endl;

  groundTruth->SortByRootName ();

  MLClassConstPtr  unknownClass = mlClasses->GetUnKnownClass ();

  MLClassConstListPtr classes = NULL;
  {
    MLClassConstListPtr examplesToGradeClasses = examplesToGrade->ExtractMLClassConstList ();
    MLClassConstListPtr groundTruthClasses     = groundTruth->ExtractMLClassConstList ();
    classes = MLClassConstList::MergeClassList (*examplesToGradeClasses, *groundTruthClasses);
    delete  examplesToGradeClasses;
    delete  groundTruthClasses;
  }

  uint16  maxHierarchialLevel = 0;
  {
    MLClassConstList::iterator  idx;
    for  (idx = classes->begin ();  idx != classes->end ();  idx++)
    {
      MLClassConstPtr  c = *idx;
      maxHierarchialLevel = Max (maxHierarchialLevel, c->NumHierarchialLevels ());
    }
  }

  // Create ConfusionMatrix objects for each posible level of Hierarchy.  The 'resultsSummary' vector will 
  // end up owning the instances of 'ConfusionMatrix2' and th edestructr will be responable for deleting them.
  uint  curLevel = 0;
  vector<ConfusionMatrix2Ptr>  cmByLevel;
  for  (curLevel = 0;  curLevel < maxHierarchialLevel;  curLevel++)
  {
    MLClassConstListPtr  classesThisLevel = classes->ExtractListOfClassesForAGivenHierarchialLevel (curLevel);
    ConfusionMatrix2Ptr  cm = new ConfusionMatrix2 (*classesThisLevel);
    cmByLevel.push_back (cm);
  }

  ConfusionMatrix2  cm (*classes);


  ImageFeaturesList::iterator  idx;

  for  (idx = examplesToGrade->begin ();  idx !=  examplesToGrade->end ();  idx++)
  {
    ImageFeaturesPtr  exampleToGrade = *idx;
    MLClassConstPtr  predictedClass = exampleToGrade->MLClass ();
    float          origSize       = exampleToGrade->OrigSize ();
    float          probability    = exampleToGrade->Probability ();

    KKStr  rootName = osGetRootName (exampleToGrade->ImageFileName ());
    FeatureVectorPtr  groundTruthExample = groundTruth->LookUpByRootName (rootName);
    MLClassConstPtr  groundTruthClass = unknownClass;
    if  (groundTruthExample)
      groundTruthClass = groundTruthExample->MLClass ();

    cm.Increment (groundTruthClass, predictedClass, (int)origSize, probability, log);

    for  (curLevel = 0;  curLevel < maxHierarchialLevel;  curLevel++)
    {
      MLClassConstPtr  groundTruthClasssThisLevel = groundTruthClass->MLClassForGivenHierarchialLevel (curLevel);
      MLClassConstPtr  predictedClassThisLevel    = predictedClass->MLClassForGivenHierarchialLevel   (curLevel);

      cmByLevel[curLevel]->Increment (groundTruthClasssThisLevel, predictedClassThisLevel, (int)origSize, probability, log);
    }
  }    


  //cm.PrintTrueFalsePositivesTabDelimited (*report);

  {
    // report Hierarchial results
    for  (curLevel = 0;  curLevel < maxHierarchialLevel;  curLevel++)
    {
      log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth   Printing Level[" << curLevel << "]" << endl;
      *report << endl << endl << endl
              << "Confusion Matrix   Training Level[" << maxHierarchialLevel << "]       Preduction Level[" << (curLevel + 1) << "]" << endl
              << endl;
      cmByLevel[curLevel]->PrintConfusionMatrixTabDelimited (*report);
      resultsSummary.push_back (SummaryRec (maxHierarchialLevel, curLevel + 1, cmByLevel[curLevel]));
    }

    *report << endl << endl << endl;
  }

  log.Level (10) << "GradeClassification::GradeExamplesAgainstGroundTruth     Exiting"  << endl;
}  /* GradeExamplesAgainstGroundTruth */
Ejemplo n.º 21
0
void  FeatureEncoder::EncodeIntoSparseMatrix
                               (FeatureVectorListPtr   src,
                                ClassAssignments&      assignments,
                                XSpacePtr&             xSpace,          
                                kkint32&               totalxSpaceUsed,
                                struct svm_problem&    prob,
                                RunLog&                log
                               )

{
  FeatureVectorListPtr  compressedExamples    = NULL;
  FeatureVectorListPtr  examplesToUseFoXSpace = NULL;
  kkint32               xSpaceUsed            = 0;

  totalxSpaceUsed = 0;

  examplesToUseFoXSpace = src;

  kkint32  numOfExamples = examplesToUseFoXSpace->QueueSize ();
  //kkint32  elements      = numOfExamples * xSpaceNeededPerExample;

  prob.l     = numOfExamples;
  prob.y     = (double*)malloc  (prob.l * sizeof (double));
  prob.x     = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*));
  prob.index = new kkint32[prob.l];
  prob.exampleNames.clear ();

  kkint32  numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace);

  kkint32  totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node);  // I added '10' to elements because I am paranoid

  xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded);
  if  (xSpace == NULL)
  {
    log.Level (-1) << endl << endl << endl
                   << " FeatureEncoder::Compress   *** Failed to allocates space for 'xSpace' ****" << endl
                   << endl
                   << "     Space needed          [" << totalBytesForxSpaceNeeded << "]" << endl
                   << "     Num of Examples       [" << numOfExamples             << "]" << endl
                   << "     Num XSpaceNodesNeeded [" << numNeededXspaceNodes      << "]" << endl
                   << endl;
    // we sill have to allocate space for each individual training example separately.
    //throw "FeatureEncoder::Compress     Allocation of memory for xSpace Failed.";
  }

  prob.W = NULL;

  kkint32 i = 0;
 
  FeatureVectorPtr  example      = NULL;
  MLClassPtr        lastMlClass  = NULL;
  kkint16           lastClassNum = -1;

  kkint32  bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node);

  for (i = 0;  i < prob.l;  i++)
  {
    if  (totalxSpaceUsed > numNeededXspaceNodes)
    {
      log.Level (-1) << endl << endl
        << "FeatureEncoder::Compress   ***ERROR***   We have exceeded the number of XSpace nodes allocated." << endl
        << endl;
    }

    example = examplesToUseFoXSpace->IdxToPtr (i);

    if  (example->MLClass () != lastMlClass)
    {
      lastMlClass  = example->MLClass ();
      lastClassNum = assignments.GetNumForClass (lastMlClass);
    }

    prob.y[i]     = lastClassNum;
    prob.index[i] = i;
    prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ()));

    if  (prob.W)
    {
      prob.W[i] = example->TrainWeight () * c_Param;
      if  (example->TrainWeight () <= 0.0f)
      {
        log.Level (-1) << endl 
                       << "FeatureEncoder::EncodeIntoSparseMatrix    ***ERROR***   Example[" << example->ExampleFileName () << "]" << endl
                       << "      has a TrainWeight value of 0 or less defaulting to 1.0" << endl
                       << endl;
        prob.W[i] = 1.0 * c_Param;
      }
    }

    if  (xSpace == NULL)
    {
      struct svm_node*  xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample);
      prob.x[i] = xSpaceThisExample;
      EncodeAExample (example, prob.x[i], xSpaceUsed);
      if  (xSpaceUsed < xSpaceNeededPerExample)
      {
        kkint32  bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node);
        struct svm_node*  smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample);
        memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample);
        free  (xSpaceThisExample);
        prob.x[i] = smallerXSpaceThisExample;
      }
    }
    else
    {
      prob.x[i] = &xSpace[totalxSpaceUsed];
      EncodeAExample (example, prob.x[i], xSpaceUsed);
    }
    totalxSpaceUsed += xSpaceUsed;
  }

  delete  compressedExamples;
  return;
}  /* Compress */
Ejemplo n.º 22
0
void  RandomSampleJob::EvaluteNode (FeatureVectorListPtr  validationData,
                                    MLClassListPtr     classes
                                   )
{
  log.Level (9) << "  " << endl;
  log.Level (9) << "  " << endl;
  log.Level (9) << "RandomSampleJob::EvaluteNode JobId[" << jobId << "] Ordering[" << orderingNum << "]" << endl;

  status = rjStarted;

  config->CompressionMethod (BRnoCompression);
  config->KernalType        (kernelType);
  config->EncodingMethod    (encodingMethod);
  config->C_Param           (c);
  config->Gamma             (gamma);

  FileDescPtr fileDesc = config->FileDesc ();


  const FeatureVectorListPtr  srcExamples = orderings->Ordering (orderingNum);

  if  (numExamplesToKeep > srcExamples->QueueSize ())
  {
    log.Level (-1) << endl << endl << endl
                   << "RandomSampleJob::EvaluteNode     *** ERROR ***    RandomExamples to large" << endl
                   << endl
                   << "                     RandomExamples > num in Training set." << endl
                   << endl;
    osWaitForEnter ();
    exit (-1);
  }



  FeatureVectorListPtr  trainingData = new FeatureVectorList (srcExamples->FileDesc (), false, log, 10000);
  for  (int x = 0;  x < numExamplesToKeep;  x++)
  {
    trainingData->PushOnBack (srcExamples->IdxToPtr (x));
  }

  bool  allClassesRepresented = true;
  {
    MLClassListPtr  classesInRandomSample = trainingData->ExtractListOfClasses ();
    if  (*classesInRandomSample != (*classes))
    {
      log.Level (-1) << endl << endl
                     << "RandomSampling    *** ERROR ***" << endl
                     << endl
                     << "                  Missing Classes From Random Sample." << endl
                     << endl
                     << "MLClasses[" << classes->ToCommaDelimitedStr               () << "]" << endl
                     << "Found       [" << classesInRandomSample->ToCommaDelimitedStr () << "]" << endl
                     << endl;

       allClassesRepresented = false;

    }

    delete  classesInRandomSample;  classesInRandomSample = NULL;
  }


  //if  (!allClassesRepresented)
  //{
  //  accuracy  = 0.0;
  //  trainTime = 0.0;
  //  testTime  = 0.0;
  //}
  //else
  {
    delete  crossValidation;  crossValidation = NULL;

    compMethod = config->CompressionMethod ();

    bool  cancelFlag = false;

    crossValidation = new CrossValidation 
                              (config,
                               trainingData,
                               classes,
                               10,
                               false,   //  False = Features are not normalized already.
                               trainingData->FileDesc (),
                               log,
                               cancelFlag
                              );

    crossValidation->RunValidationOnly (validationData, NULL);

    accuracy  = crossValidation->Accuracy ();
    trainTime = crossValidation->TrainTimeMean ();
    testTime  = crossValidation->TestTimeMean ();
    supportVectors = crossValidation->SupportPointsMean ();
  }

  delete  trainingData;

  status = rjDone;
}  /* EvaluteNode */
Ejemplo n.º 23
0
FeatureVectorListPtr  FeatureFileIO::LoadInSubDirectoryTree 
                         (FactoryFVProducerPtr  _fvProducerFactory,
                          KKStr                 _rootDir,
                          MLClassList&          _mlClasses,
                          bool                  _useDirectoryNameForClassName,
                          VolConstBool&         _cancelFlag, 
                          bool                  _rewiteRootFeatureFile,
                          RunLog&               _log
                         )
{
  _log.Level (10) << "FeatureFileIO::LoadInSubDirectoryTree    rootDir[" << _rootDir << "]." << endl;

  osAddLastSlash (_rootDir);

  KKStr  featureFileName ("");
  KKStr  fullFeatureFileName ("");

  if  (!_rootDir.Empty ())
  {
    featureFileName = osGetRootNameOfDirectory (_rootDir) + ".data";
    fullFeatureFileName = _rootDir + featureFileName;
  }
  else
  {
    featureFileName     = "Root.data";
    fullFeatureFileName = "Root.data";
  }

  MLClassPtr  unKnownClass = _mlClasses.GetUnKnownClass ();
  if  (_useDirectoryNameForClassName)
  {
    KKStr className = MLClass::GetClassNameFromDirName (_rootDir);
    unKnownClass    = _mlClasses.GetMLClassPtr (className);
  }

  bool  changesMade = false;

  FeatureVectorListPtr  dirImages = NULL;

  if  (_rewiteRootFeatureFile)
  {
    DateTime  timeStamp;
    dirImages = FeatureDataReSink (_fvProducerFactory,
                                   _rootDir,
                                   featureFileName,
                                   unKnownClass,
                                   _useDirectoryNameForClassName,
                                   _mlClasses,
                                   _cancelFlag,
                                   changesMade,
                                   timeStamp,
                                   _log
                                  );
    if  (_useDirectoryNameForClassName)
    {
      FeatureVectorList::iterator  idx;
      for  (idx = dirImages->begin ();  idx != dirImages->end ();  idx++)
      {
        if  ((*idx)->MLClass () != unKnownClass)
        {
          (*idx)->MLClass (unKnownClass);
          changesMade = true;
        }
      }

      if  (changesMade)
      {
        KKStr  fullFileName = osAddSlash (_rootDir) + featureFileName;
        kkuint32  numExamplesWritten = 0;
        bool  cancel     = false;
        bool  successful = false;
        SaveFeatureFile (fullFileName, 
                         dirImages->AllFeatures (), 
                         *dirImages, 
                         numExamplesWritten,
                         cancel,
                         successful,
                         _log
                        );
      }
    }
  }
  else
  {
    dirImages =  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  // Now that we have processed all image files in "rootDir",
  // lets process any sub-directories.

  KKStr  dirSearchPath = osAddSlash (_rootDir) + "*.*";

  KKStrListPtr  subDirectories = osGetListOfDirectories (dirSearchPath);
  if  (subDirectories)
  {
    KKStrList::iterator  idx;

    for  (idx = subDirectories->begin ();  (idx != subDirectories->end ()  &&  (!_cancelFlag));   idx++)
    {
      KKStr  subDirName (**idx);
      if  (subDirName == "BorderImages")
      {
        // We ignore this director
        continue;
      }

      KKStr  newDirPath = osAddSlash (_rootDir) + subDirName;

      FeatureVectorListPtr  subDirExamples = LoadInSubDirectoryTree (_fvProducerFactory,
                                                                     newDirPath, 
                                                                     _mlClasses, 
                                                                     _useDirectoryNameForClassName, 
                                                                     _cancelFlag,
                                                                     true,     // true = ReWriteRootFeatureFile
                                                                     _log
                                                                    );
      osAddLastSlash (subDirName);

      // We want to add the directory path to the ExampleFileName so that we can later locate the source image.
      for  (auto fv: *subDirExamples)
      {
        KKStr  newImageFileName = subDirName + fv->ExampleFileName ();
        fv->ExampleFileName (newImageFileName);
      }

      dirImages->AddQueue (*subDirExamples);
      subDirExamples->Owner (false);
      delete  subDirExamples;
      subDirExamples = NULL;
    }

    delete  subDirectories;  subDirectories = NULL;
  }

  _log.Level (10) << "LoadInSubDirectoryTree - Done" << endl;

  return  dirImages;
}  /* LoadInSubDirectoryTree */
Ejemplo n.º 24
0
void   MergeFeatureFiles::Main ()
{
  if  (Abort ())
    return;

  {
    // Will first load source files.
    uint  srcIdx = 0;
    for  (srcIdx = 0;  srcIdx < srcFileNames.size ();  srcIdx++)
    {
      bool  cancelFlag  = false;
      bool  successful  = false;
      bool  changesMade = false;

      KKStr             srcFileName = srcFileNames[srcIdx];
      FeatureFileIOPtr  srcFormat   = srcFormats  [srcIdx];
      
      MLClassList   classes;
      
      FeatureVectorListPtr  s = NULL;

      s = srcFormat->LoadFeatureFile (srcFileName, classes, -1, cancelFlag, successful, changesMade, log);

      if  ((s == NULL)  ||  (!successful))
      {
        log.Level (-1) << endl << endl
                       << "MergeFeatureFiles::Main   ***ERROR***    Could not load file[" << srcFileName << "]" << endl
                       << endl;
        delete  s;  s = NULL;
        Abort (true);
        return;
      }

      if  (!srcData)
        srcData = new FeatureVectorList (s->FileDesc (), true);

      if  ((*(s->FileDesc ())) != (*(srcData->FileDesc ())))
      {
        // The last source file read does not have the same attribute sets as previous feature files already read.
        log.Level (-1) << endl << endl
                       << "MergeFeatureFiles::Main   ***ERROR***    Feature File[" << srcFileName << "] does not have the same Attributes as previous feature files already read in." << endl
                       << endl;
        Abort (true);
        delete  s;  s = NULL;
        return;
      }
      
      srcData->AddQueue (*s);
      s->Owner (false);
      delete  s;
    }
  }


  if  (!Abort ())
  {
    if  (randomize)
    {
      DateTime  dt = osGetLocalDateTime ();
      srcData->RandomizeOrder (dt.Seconds ());
      srcData->RandomizeOrder ();
    }

    if  (stratify)
    {
      FeatureVectorListPtr  stratifiedSrc = srcData->StratifyAmoungstClasses (numOfFolds, log);
      srcData->Owner (false);
      stratifiedSrc->Owner (true);
      delete  srcData;
      srcData = stratifiedSrc;
      stratifiedSrc = NULL;
    }

    bool  cancelFlag  = false;
    bool  successful  = false;

    uint  numExamplesWritten = 0;
    destFormat->SaveFeatureFile (destFileName, srcData->AllFeatures (), *srcData, numExamplesWritten, cancelFlag, successful, log);
    if  (!successful)
    {
      log.Level (-1) << endl << endl
                     << "MergeFeatureFiles::Main   ***ERROR***    Could not save to file[" << destFileName << "]" << endl
                     << endl;
      Abort (true);
    }
  }
}  /* Main */
void   AbundanceCorrectionStatsBuilder::CreateInitialThreadInstaces ()
{
  log.Level (10) << "AbundanceCorrectionStatsBuilder::CreateInitialThreadInstaces"  << endl;
  
  FeatureVectorListPtr  stratifiedTrainData = trainLibData->StratifyAmoungstClasses (numOfFolds);
  FeatureVectorListPtr  stratifiedOtherData = otherClassData->StratifyAmoungstClasses (numOfFolds);

  int32  numTrainExamples = stratifiedTrainData->QueueSize ();
  int32  numOtherExamples = stratifiedOtherData->QueueSize ();

  msgQueue = new MsgQueue ("AbundanceCorrectionStatsBuilder");

  int32  lastFvInFold = -1;
  int32  firstFvInFold = 0;

  int32  firstOtherFvInFold = 0;
  int32  lastOtherFvInFold = -1;

  for  (int32  foldNum = 0;  foldNum < numOfFolds;  ++foldNum)
  {
    firstFvInFold = lastFvInFold + 1;
    lastFvInFold  = (numTrainExamples * (foldNum + 1) / numOfFolds) - 1;

    firstOtherFvInFold = lastOtherFvInFold + 1;
    lastOtherFvInFold  = (numOtherExamples * (foldNum + 1) / numOfFolds) - 1;

    FeatureVectorListPtr  trainData = new FeatureVectorList (fileDesc, false, log);
    FeatureVectorListPtr  testData  = new FeatureVectorList (fileDesc, false, log);

    for  (int32 idx = 0;  idx < numTrainExamples;  ++idx)
    {
      FeatureVectorPtr fv = stratifiedTrainData->IdxToPtr (idx);
      if  ((idx >= firstFvInFold)  &&  (idx <= lastFvInFold))
        testData->PushOnBack (fv);
      else
        trainData->PushOnBack (fv);
    }

    // Add OtherClass exampes to test data.
    for  (int32 idx = firstOtherFvInFold;  idx <= lastOtherFvInFold;  ++idx)
    {
      FeatureVectorPtr fv = stratifiedOtherData->IdxToPtr (idx);
      testData->PushOnBack (fv);
    }

    RunLogPtr  threadRunLog = new RunLog ();
    threadRunLog->AttachMsgQueue (msgQueue);

    KKStr  threadName = "AbundanceCorrFold" + StrFormatInt (foldNum, "00");
    TrainTestThreadPtr  thread = new TrainTestThread 
               ("Fold_" + StrFormatInt (foldNum, "00"),
                this,
                config,
                allClasses,
                trainData,            // Will take ownesrship and delete in its destructor.
                trainLibDataClasses,
                testData,             // Will take ownesrship and delete in its destructor.
                otherClass,
                threadName,
                msgQueue,             // Will take ownesrship and delete in its destructor.
                threadRunLog
               );

    queueReady->PushOnBack (thread);
  }

  delete  stratifiedOtherData;  stratifiedOtherData = NULL;
  delete  stratifiedTrainData;  stratifiedTrainData = NULL;
}  /* CreateInitialThreadInstaces */