Exemple #1
0
FeatureVectorListPtr  FeatureEncoder::CreateEncodedFeatureVector (FeatureVectorList&  srcData)
{
  if  (srcData.AllFieldsAreNumeric ())
    return  srcData.DuplicateListAndContents ();

  FeatureVectorListPtr  encodedFeatureVectorList = new FeatureVectorList (destFileDesc, true);

  FeatureVectorList::iterator  idx;
  for  (idx = srcData.begin ();   idx != srcData.end ();  idx++)
  {
    FeatureVectorPtr  srcExample = *idx;
    XSpacePtr  encodedData = EncodeAExample (srcExample);

    kkint32  zed = 0;
    FeatureVectorPtr  encodedFeatureVector = new FeatureVector (codedNumOfFeatures);
    while  (encodedData[zed].index != -1)
    {
      encodedFeatureVector->AddFeatureData (encodedData[zed].index, (float)encodedData[zed].value);
      zed++;
    }

    encodedFeatureVector->MLClass (srcExample->MLClass ());
    encodedFeatureVectorList->PushOnBack (encodedFeatureVector);

    delete  encodedData;
    encodedData = NULL;
  }

  return  encodedFeatureVectorList;
}  /* CreateEncodedFeatureVector */
void  FeatureFileIO::SaveFile (FeatureVectorList&    _data,
                               const KKStr&          _fileName,
                               FeatureNumListConst&  _selFeatures,
                               std::ostream&         _out,
                               kkuint32&             _numExamplesWritten,
                               VolConstBool&         _cancelFlag,
                               bool&                 _successful,
                               KKStr&                _errorMessage,
                               RunLog&               _log
                              )
{
   _errorMessage = "Driver: '" + this->DriverName () + "' does not implement 'SaveFile' method.";
   _log.Level (-1) << endl
      << "FeatureFileIO::SaveFile    ***ERROR***   " << _errorMessage << endl
      << "     _data.size   : " << _data.size () << endl
      << "     _fileName    : " << _fileName << endl
      << "     _selFeatures : " << _selFeatures.ToCommaDelStr () << endl
      << "     _out.fail    : " << _out.fail () << endl
      << "     _cancelFlag  : " << _cancelFlag << endl
      << endl;
      
  _successful = false;
  _numExamplesWritten = 0;
  return;
}
Exemple #3
0
FeatureVectorListPtr  FeatureEncoder2::EncodedFeatureVectorList (const FeatureVectorList&  srcData)  const
{
  if  (srcData.AllFieldsAreNumeric ())
    return  srcData.DuplicateListAndContents ();

  FeatureVectorListPtr  encodedFeatureVectorList = new FeatureVectorList (encodedFileDesc, true);

  FeatureVectorList::const_iterator  idx;
  for  (idx = srcData.begin ();   idx != srcData.end ();  idx++)
  {
    FeatureVectorPtr  srcExample = *idx;
    FeatureVectorPtr  encodedFeatureVector = EncodeAExample (srcExample);
    encodedFeatureVector->MLClass (srcExample->MLClass ());
    encodedFeatureVectorList->PushOnBack (encodedFeatureVector);
  }

  return  encodedFeatureVectorList;
}  /* EncodedFeatureVectorList */
void  FeatureFileIO::SaveFeatureFileMultipleParts (const KKStr&          _fileName, 
                                                   FeatureNumListConst&  _selFeatures,
                                                   FeatureVectorList&    _examples,
                                                   VolConstBool&         _cancelFlag,
                                                   bool&                 _successful,
                                                   RunLog&               _log
                                                  )
{
  kkuint32  numExamplesWritten = 0;
  SaveFeatureFile (_fileName, _selFeatures, _examples, numExamplesWritten, _cancelFlag, _successful, _log);

  if  (_cancelFlag  ||  (!_successful))
    return;

  if  (_examples.QueueSize () > 64000)
  {
    kkint32  numPartsNeeded = (_examples.QueueSize () / 64000);
    if  ((_examples.QueueSize () % 64000) > 0)
      numPartsNeeded++;

    kkuint32  maxPartSize = (_examples.QueueSize () / numPartsNeeded) + 1;

    kkint32  partNum = 0;
    FeatureVectorList::const_iterator idx = _examples.begin ();

    while  ((idx != _examples.end ())  &&  (_successful)  &&  (!_cancelFlag))
    {
      FeatureVectorListPtr  part = _examples.ManufactureEmptyList (false);

      while  ((idx != _examples.end ())  &&  (part->QueueSize () < maxPartSize))
      {
        part->PushOnBack (*idx);
        idx++;
      }

      KKStr  partFileName = osRemoveExtension (_fileName) + "-" + 
                            StrFormatInt (partNum, "00") + "." +
                            osGetFileExtension (_fileName);

      SaveFeatureFile (partFileName, _selFeatures, *part, numExamplesWritten, _cancelFlag, _successful, _log);

      partNum++;
      delete  part; part = NULL;
    }
  }
}  /* SaveFeatureFileMultipleParts */
void   FeatureFileConverter::NormalizeExamples (ModelParam&         param,
                                                FeatureVectorList&  examples
                                               )
{
  KKStr  parameterStr = param.ToCmdLineStr ();
  NormalizationParmsPtr  normParms = NULL;

  if  (osFileExists (nornParmsFileName))
  {
    bool  successful = false;

    normParms = new NormalizationParms (examples.FileDesc (), nornParmsFileName, successful, log);
    if  (!successful)
    {
      KKStr  errMsg = "Could not load normalization parameters file[" + nornParmsFileName + "]";
      log.Level (-1) << endl << "NormalizeExamples    *** ERROR ***    " << errMsg << endl << endl;
      throw KKStrException (errMsg);
    }
  }
  else
  {
    bool  successful = false;
    normParms = new NormalizationParms (param, examples, log);
    normParms->Save (nornParmsFileName, successful);
    if  (!successful)
    {
      KKStr  errMsg = "Could not save normalization parameters file[" + nornParmsFileName + "]";
      log.Level (-1) << endl << "NormalizeExamples    *** ERROR ***      " << errMsg << endl << endl;
      throw  KKStrException (errMsg);
    }
  }

  normParms->NormalizeImages (&examples);

  delete  normParms;
  normParms = NULL;
}  /* NormalizeExamples */
/**
 *@details   
 *  Determines if the other FeatreVectorList has the same underlining layout;  that is each
 *  field is of the same type and meaning. This way we can determine if one list contains
 *  Apples while the other contains Oranges.
 */
bool  FeatureVectorList::SameExceptForSymbolicData (const FeatureVectorList&  otherData,
                                                    RunLog&                   log
                                                   )  const
{
  return  fileDesc->SameExceptForSymbolicData (*(otherData.FileDesc ()), log);
}
/**
 * @brief Will run M number of Train then Test passes.
 *        
 * @param[in] numExamplsToUseForTraining The number examples in each ordering(group)
 *            that are to be used for training, the remaining examples will be 
 *            used as test data.
 */
void  CrossValidationMxN::RunTrainAndTest (kkuint32  numExamplsToUseForTraining,
                                           RunLog&   log
                                          )
{ 
  CleanUpMemory ();

  meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ()));

  kkuint32  cvIDX = 0;

  MLClassListPtr  mlClasses = orderings->MLClasses ();

  for  (cvIDX = 0;  cvIDX < numOfOrderings;  cvIDX++)
  {
    FeatureVectorListPtr  data = orderings->Ordering (cvIDX);

    FeatureVectorList  trainingData (fileDesc, false);
    FeatureVectorList  testData     (fileDesc, false);

    FeatureVectorList::iterator  fvIDX;

    for  (fvIDX = data->begin ();  fvIDX != data->end ();  fvIDX++)
    {
      FeatureVectorPtr example = *fvIDX;

      if  (trainingData.QueueSize () < numExamplsToUseForTraining)
        trainingData.PushOnBack (example);
      else
        testData.PushOnBack (example);
    }

    CrossValidationPtr  cv = new CrossValidation (config,
                                                  &trainingData,
                                                  mlClasses,
                                                  numOfFolds,
                                                  false,
                                                  fileDesc,
                                                  log,
                                                  cancelFlag
                                                 );

    cv->RunValidationOnly (&testData, 
                           NULL,        // No McNemars test going to be performed.
                           log
                          );

    accuracies.push_back    (cv->Accuracy       ());
    supportPoints.push_back ((float)cv->NumOfSupportVectors ());
    trainingTimes.push_back (cv->TrainTimeTotal ());
    testTimes.push_back     (cv->TestTimeTotal  ());

    meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log);

    delete  cv;
  }

  CalcMeanAndStdDev (accuracies,      accuracyMean,       accuracyStdDev);
  CalcMeanAndStdDev (supportPoints,   supportPointsMean,  supportPointsStdDev);
  CalcMeanAndStdDev (trainingTimes,   trainingTimeMean,   trainingTimeStdDev);
  CalcMeanAndStdDev (testTimes,       testTimeMean,       testTimeStdDev);

  double  factor = 1.0 / (double)numOfOrderings;

  meanConfusionMatrix->FactorCounts (factor);
}  /* RunTrainAndTest */
void  GradeClassification::ValidateThatBothListsHaveSameEntries (FeatureVectorList&  groundTruth, 
                                                                 FeatureVectorList&  examplesToGrade,
                                                                 bool&               theyAreTheSame
                                                                )
{
  theyAreTheSame = true;  // We will assume that they are the same to start with.

  int  missingExamplesToGrade = 0;
  int  missingGroundTruthExamples = 0;

  groundTruth.SortByRootName ();
  examplesToGrade.SortByRootName ();

  *report << endl << endl << endl
          << "Missing Examples To Grade" << endl
          << "=========================" << endl;

  ImageFeaturesList::iterator  idx;
  for  (idx = groundTruth.begin ();  idx !=  groundTruth.end ();  idx++)
  {
    FeatureVectorPtr  groundTruthExample = *idx;

    KKStr  rootName = osGetRootName (groundTruthExample->ImageFileName ());

    FeatureVectorPtr  exampleToGrade = examplesToGrade.LookUpByRootName (rootName);

    if  (!exampleToGrade)
    {
      theyAreTheSame = false;
      *report << rootName << "\t" << "*** MISSING ***" << "\t" << groundTruthExample->ImageFileName () << endl;
      missingExamplesToGrade++;
    }
  }

  if  (missingExamplesToGrade == 0)
  {
    *report  << "    *** None ***" << endl;
  }


  *report << endl << endl << endl
          << "Missing Ground Truth Examples" << endl
          << "=============================" << endl;

  for  (idx = examplesToGrade.begin ();  idx !=  examplesToGrade.end ();  idx++)
  {
    FeatureVectorPtr  exampleToGrade = *idx;

    KKStr  rootName = osGetRootName (exampleToGrade->ImageFileName ());

    FeatureVectorPtr  groundTruthExample = groundTruth.LookUpByRootName (rootName);

    if  (!groundTruthExample)
    {
      theyAreTheSame = false;
      *report << rootName << "\t" << "*** MISSING ***" << "\t" << exampleToGrade->ImageFileName () << "\t" << endl;
      missingGroundTruthExamples++;
    }
  }

  if  (missingGroundTruthExamples == 0)
  {
    *report << "   *** None ***" << endl;
  }
} /* ValidateThatBothListsHaveSameEntries */