void  Strip ()
{
  bool  cancelFlag  = false;
  bool  successful  = false;
  bool  changesMade = false;

  RunLog  log;

  FeatureFileIOPtr driver =  FeatureFileIO::FileFormatFromStr ("C45");

  MLClassList  mlClasses;
  FeatureVectorListPtr  data = 
        driver->LoadFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedDataNorm.data",
                                 mlClasses,
                                 -1,
                                 cancelFlag,
                                 successful,
                                 changesMade,
                                 log
                               );

  FeatureVectorListPtr  stripped = new FeatureVectorList (data->FileDesc (), false);

  FeatureVectorList::const_iterator  idx;
  for  (idx = data->begin ();  idx != data->end ();  ++idx)
  {
    FeatureVectorPtr  fv = *idx;
    KKStr  fn = fv->ExampleFileName ();
    if  (fn.StartsWith ("SML")  ||  (fn.StartsWith ("SMP")))
    {
    }
    else
    {
      stripped->PushOnBack (fv);
    }
  }


  kkuint32  numExamplesWritten = 90;
  driver->SaveFeatureFile ("D:\\Pices\\Reports\\FeatureDataFiles\\AllValidatedImages_ForJonathon\\AllValidatedData1209.data",
                           data->AllFeatures (), 
                           *stripped, 
                           numExamplesWritten,
                           cancelFlag,
                           successful,
                           log
                          );



}
Exemple #2
0
void  FeatureEncoder::EncodeIntoSparseMatrix
                               (FeatureVectorListPtr   src,
                                ClassAssignments&      assignments,
                                XSpacePtr&             xSpace,          
                                kkint32&               totalxSpaceUsed,
                                struct svm_problem&    prob,
                                RunLog&                log
                               )

{
  FeatureVectorListPtr  compressedExamples    = NULL;
  FeatureVectorListPtr  examplesToUseFoXSpace = NULL;
  kkint32               xSpaceUsed            = 0;

  totalxSpaceUsed = 0;

  examplesToUseFoXSpace = src;

  kkint32  numOfExamples = examplesToUseFoXSpace->QueueSize ();
  //kkint32  elements      = numOfExamples * xSpaceNeededPerExample;

  prob.l     = numOfExamples;
  prob.y     = (double*)malloc  (prob.l * sizeof (double));
  prob.x     = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*));
  prob.index = new kkint32[prob.l];
  prob.exampleNames.clear ();

  kkint32  numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace);

  kkint32  totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node);  // I added '10' to elements because I am paranoid

  xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded);
  if  (xSpace == NULL)
  {
    log.Level (-1) << endl << endl << endl
                   << " FeatureEncoder::Compress   *** Failed to allocates space for 'xSpace' ****" << endl
                   << endl
                   << "     Space needed          [" << totalBytesForxSpaceNeeded << "]" << endl
                   << "     Num of Examples       [" << numOfExamples             << "]" << endl
                   << "     Num XSpaceNodesNeeded [" << numNeededXspaceNodes      << "]" << endl
                   << endl;
    // we sill have to allocate space for each individual training example separately.
    //throw "FeatureEncoder::Compress     Allocation of memory for xSpace Failed.";
  }

  prob.W = NULL;

  kkint32 i = 0;
 
  FeatureVectorPtr  example      = NULL;
  MLClassPtr        lastMlClass  = NULL;
  kkint16           lastClassNum = -1;

  kkint32  bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node);

  for (i = 0;  i < prob.l;  i++)
  {
    if  (totalxSpaceUsed > numNeededXspaceNodes)
    {
      log.Level (-1) << endl << endl
        << "FeatureEncoder::Compress   ***ERROR***   We have exceeded the number of XSpace nodes allocated." << endl
        << endl;
    }

    example = examplesToUseFoXSpace->IdxToPtr (i);

    if  (example->MLClass () != lastMlClass)
    {
      lastMlClass  = example->MLClass ();
      lastClassNum = assignments.GetNumForClass (lastMlClass);
    }

    prob.y[i]     = lastClassNum;
    prob.index[i] = i;
    prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ()));

    if  (prob.W)
    {
      prob.W[i] = example->TrainWeight () * c_Param;
      if  (example->TrainWeight () <= 0.0f)
      {
        log.Level (-1) << endl 
                       << "FeatureEncoder::EncodeIntoSparseMatrix    ***ERROR***   Example[" << example->ExampleFileName () << "]" << endl
                       << "      has a TrainWeight value of 0 or less defaulting to 1.0" << endl
                       << endl;
        prob.W[i] = 1.0 * c_Param;
      }
    }

    if  (xSpace == NULL)
    {
      struct svm_node*  xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample);
      prob.x[i] = xSpaceThisExample;
      EncodeAExample (example, prob.x[i], xSpaceUsed);
      if  (xSpaceUsed < xSpaceNeededPerExample)
      {
        kkint32  bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node);
        struct svm_node*  smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample);
        memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample);
        free  (xSpaceThisExample);
        prob.x[i] = smallerXSpaceThisExample;
      }
    }
    else
    {
      prob.x[i] = &xSpace[totalxSpaceUsed];
      EncodeAExample (example, prob.x[i], xSpaceUsed);
    }
    totalxSpaceUsed += xSpaceUsed;
  }

  delete  compressedExamples;
  return;
}  /* Compress */
FeatureVectorListPtr  FeatureFileIO::FeatureDataReSink (FactoryFVProducerPtr  _fvProducerFactory,
                                                        const KKStr&          _dirName,
                                                        const KKStr&          _fileName, 
                                                        MLClassPtr            _unknownClass,
                                                        bool                  _useDirectoryNameForClassName,
                                                        MLClassList&          _mlClasses,
                                                        VolConstBool&         _cancelFlag,
                                                        bool&                 _changesMade,
                                                        KKB::DateTime&        _timeStamp,
                                                        RunLog&               _log
                                                      )
{
  _changesMade = false;
  _timeStamp = DateTime ();

  if  (_unknownClass == NULL)
    _unknownClass = MLClass::GetUnKnownClassStatic ();

  KKStr  className = _unknownClass->Name ();

  _log.Level (10) << "FeatureFileIO::FeatureDataReSink  dirName: " << _dirName << endl
                  << "               fileName: " << _fileName << "  UnKnownClass: " << className << endl;

  KKStr  fullFeatureFileName = osAddSlash (_dirName) +  _fileName;

  bool  successful = true;

  KKStr fileNameToOpen;
  if  (_dirName.Empty ())
    fileNameToOpen = _fileName;
  else
    fileNameToOpen = osAddSlash (_dirName) + _fileName;

  bool  versionsAreSame = false;

  FeatureVectorListPtr  origFeatureVectorData 
        = LoadFeatureFile (fileNameToOpen, _mlClasses, -1, _cancelFlag, successful, _changesMade, _log);

  if  (origFeatureVectorData == NULL)
  {
    successful = false;
    origFeatureVectorData = _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  if  (_cancelFlag)
  {
    delete  origFeatureVectorData;  origFeatureVectorData = NULL;
    return  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  FeatureVectorListPtr  origFeatureData = NULL;

  if  (successful  &&
       (&typeid (*origFeatureVectorData) == _fvProducerFactory->FeatureVectorListTypeId ())  &&
       ((*(origFeatureVectorData->FileDesc ())) ==  (*(_fvProducerFactory->FileDesc ())))
      )
  {
     origFeatureData = origFeatureVectorData;
  }
  else
  {
    origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true);
    delete  origFeatureVectorData;
    origFeatureVectorData = NULL;
  }

  KKStr  fileSpec = osAddSlash (_dirName) + "*.*";
  KKStrListPtr   fileNameList = osGetListOfFiles (fileSpec);

  if  (!fileNameList)
  {
    // There are no Image Files,  so we need to return a Empty List of Image Features.

    if  (origFeatureData->QueueSize () > 0)
      _changesMade = true;

    delete  origFeatureData;  origFeatureData = NULL;

    return  _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  FeatureVectorProducerPtr  fvProducer = _fvProducerFactory->ManufactureInstance (_log);

  if  (successful)
  {
    if  (origFeatureData->Version () == fvProducer->Version ())
    {
      versionsAreSame = true;
      _timeStamp = osGetFileDateTime (fileNameToOpen);
    }

    else
    {
      _changesMade = true;
    }
  }
  else
  {
    delete  origFeatureData;
    origFeatureData = _fvProducerFactory->ManufacturFeatureVectorList (true);
  }

  origFeatureData->SortByRootName (false);

  FeatureVectorListPtr  extractedFeatures = _fvProducerFactory->ManufacturFeatureVectorList (true);
  extractedFeatures->Version (fvProducer->Version ());

  fileNameList->Sort (false);

  KKStrList::iterator  fnIDX;
  fnIDX = fileNameList->begin ();   // fileNameList

  KKStrPtr  imageFileName;

  kkuint32  numImagesFoundInOrigFeatureData = 0;
  kkuint32  numOfNewFeatureExtractions = 0;

  for  (fnIDX = fileNameList->begin ();  (fnIDX != fileNameList->end ())  &&  (!_cancelFlag);  ++fnIDX)
  {
    imageFileName = *fnIDX;

    // pv414-_002_20140414-162243_02068814-1261.bmp
    KKStr  rootName = osGetRootName (*imageFileName);
    if  (rootName == "pv414-_002_20140414-162243_02068814-1261")
      cout << "Stop Here." << endl;

    bool validImageFileFormat = SupportedImageFileFormat (*imageFileName);
    
    if  (!validImageFileFormat)
      continue;

    bool  featureVectorCoputaionSuccessful = false;

    FeatureVectorPtr  origFV = origFeatureData->BinarySearchByName (*imageFileName);
    if  (origFV)
      numImagesFoundInOrigFeatureData++;

    if  (origFV  &&  versionsAreSame)
    {
      featureVectorCoputaionSuccessful = true;
      if  (_useDirectoryNameForClassName)
      {
        if  (origFV->MLClass () != _unknownClass)
        {
          _changesMade = true;
          origFV->MLClass (_unknownClass);
        }
      }

      else if  ((origFV->MLClass ()->UnDefined ())  &&  (origFV->MLClass () != _unknownClass))
      {
        _changesMade = true;
        origFV->MLClass (_unknownClass);
      }

      extractedFeatures->PushOnBack (origFV);
      origFeatureData->DeleteEntry (origFV);
    }
    else
    {
      // We either  DON'T have an original image    or    versions are not the same.

      KKStr  fullFileName = osAddSlash (_dirName) + (*imageFileName);
      FeatureVectorPtr fv = NULL;
      try
      {
        RasterPtr image = ReadImage (fullFileName);
        if  (image)
          fv = fvProducer->ComputeFeatureVector (*image, _unknownClass, NULL, 1.0f, _log);
        delete image;
        image = NULL;
        if  (fv)
          featureVectorCoputaionSuccessful = true;
        else
          featureVectorCoputaionSuccessful = false;
      }
      catch  (...)
      {
        _log.Level (-1) << endl << endl
          << "FeatureDataReSink   ***ERROR***"  << endl
          << "       Exception occurred calling constructor 'ComputeFeatureVector'." << endl
          << endl;
        featureVectorCoputaionSuccessful = false;
        fv = NULL;
      }

      if  (!featureVectorCoputaionSuccessful)
      {
        _log.Level (-1) << " FeatureFileIOKK::FeatureDataReSink  *** ERROR ***, Processing Image File["
                       << imageFileName << "]."
                       << endl;
        delete  fv;
        fv = NULL;
      }

      else
      {
        _changesMade = true;
        fv->ExampleFileName (*imageFileName);
        _log.Level (30) << fv->ExampleFileName () << "  " << fv->OrigSize () << endl;
        extractedFeatures->PushOnBack (fv);
        numOfNewFeatureExtractions++;

        if  ((numOfNewFeatureExtractions % 100) == 0)
          cout << numOfNewFeatureExtractions << " Images Extracted." << endl;
      }
    }
  }

  if  (numImagesFoundInOrigFeatureData != extractedFeatures->QueueSize ())
    _changesMade = true;
  
  extractedFeatures->Version (fvProducer->Version ());

  if  ((_changesMade)  &&  (!_cancelFlag))
  {
    //extractedFeatures->WriteImageFeaturesToFile (fullFeatureFileName, RawFormat, FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()));

    kkuint32  numExamplesWritten = 0;

    SaveFeatureFile (fullFeatureFileName,  
                     FeatureNumList::AllFeatures (extractedFeatures->FileDesc ()),
                     *extractedFeatures,
                     numExamplesWritten,
                     _cancelFlag,
                     successful,
                     _log
                    );

    _timeStamp = osGetLocalDateTime ();
  }

  delete fvProducer;       fvProducer      = NULL;
  delete fileNameList;     fileNameList    = NULL;
  delete origFeatureData;  origFeatureData = NULL;

  _log.Level (10) << "FeatureDataReSink  Exiting  Dir: "  << _dirName << endl;

  return  extractedFeatures;
}  /* FeatureDataReSink */