Пример #1
0
FeatureVectorPtr  FeatureEncoder::EncodeAExample (FileDescConstPtr  encodedFileDesc,
                                                  FeatureVectorPtr  src
                                                 )
{
  FeatureVectorPtr  encodedExample = new FeatureVector (numEncodedFeatures);
  encodedExample->MLClass     (src->MLClass     ());
  encodedExample->PredictedClass (src->PredictedClass ());
  //encodedExample->Version        (src->Version        ());
  encodedExample->TrainWeight    (src->TrainWeight    ());

  const float*  featureData = src->FeatureData ();
  kkint32  x;

  for  (x = 0;  x < numOfFeatures; x++)
  {
    float  featureVal = featureData [srcFeatureNums[x]];
    kkint32  y = destFeatureNums[x];

    switch (destWhatToDo[x])
    {
    case  FeWhatToDo::FeAsIs:
      {
        encodedExample->AddFeatureData (y, featureVal);
      }
      break;

    case  FeWhatToDo::FeBinary:
      {
        for  (kkint32 z = 0; z < cardinalityDest[x]; z++)
        {
          float  bVal = ((kkint32)featureVal == z);
          encodedExample->AddFeatureData (y, bVal);
          y++;
        }
      }

      break;

    case  FeWhatToDo::FeScale:
      {
        encodedExample->AddFeatureData (y, (featureVal / (float)cardinalityDest[x]));
      }
      break;
    }
  }

  return  encodedExample;
}  /* EncodeAExample */
Пример #2
0
kkint32  FeatureEncoder::DetermineNumberOfNeededXspaceNodes (FeatureVectorListPtr   src)  const
{
  kkint32  xSpaceNodesNeeded = 0;
  FeatureVectorList::const_iterator  idx;
  for  (idx = src->begin ();  idx != src->end ();  ++idx)
  {
    FeatureVectorPtr fv = *idx;
    const float*  featureData = fv->FeatureData ();

    for  (kkint32 x = 0;  x < numOfFeatures; x++)
    {
      float  featureVal = featureData [srcFeatureNums[x]];
      kkint32  y = destFeatureNums[x];
  
      switch (destWhatToDo[x])
      {
      case  FeWhatToDo::FeAsIs:
        if  (featureVal != 0.0)
          xSpaceNodesNeeded++;
        break;

      case  FeWhatToDo::FeBinary:
        for  (kkint32 z = 0; z < cardinalityDest[x]; z++)
        {
          float  bVal = ((kkint32)featureVal == z);
          if  (bVal != 0.0)
            xSpaceNodesNeeded++;
          y++;
         }
         break;

      case  FeWhatToDo::FeScale:
         if  (featureVal != (float)0.0)
           xSpaceNodesNeeded++;
         break;
      }
    }
    xSpaceNodesNeeded++;
  }

  return xSpaceNodesNeeded;
}  /* DetermineNumberOfNeededXspaceNodes */
Пример #3
0
void   FeatureFileConverter::ConvertData ()
{
  cout << endl
       << "Saving [" << data->QueueSize () << "] records to data file[" << destFileName << "]" << endl
       << endl;

  bool  successful = false;

  int  numOfFeatures = data->NumOfFeatures ();
  int  numWithAllZeros = 0;

  {
    FeatureVectorListPtr  newData  = new FeatureVectorList (srcFileDesc, true, log);

    // Will store examples that have all zero's for all features in "zeroData"
    // container.  This way they can be deleted from memory later and not result
    // in a memory leak.  This has to be done because they are not going to
    // be placed into newData which is going to become the owner of all the
    // examples.
    FeatureVectorListPtr  zeroData = new FeatureVectorList (srcFileDesc, true, log);

    // How many have all 0's for feature data.
    FeatureVectorList::iterator  idx;

    for  (idx = data->begin ();  idx != data->end ();  idx++)
    {
      FeatureVectorPtr  i = *idx;
      bool  allZeros = true;
      for  (int featureNum = 0;  featureNum < numOfFeatures;  featureNum++)
      {
        allZeros = (i->FeatureData (featureNum) == 0.0f);
        if  (!allZeros)
          break;
      }

      if  (allZeros)
      {
        numWithAllZeros++;
        zeroData->PushOnBack (i);
      }
      else
      {
        newData->PushOnBack (i);
      }
    }

    data->Owner (false);
    delete data;
    data = newData;
    delete  zeroData;
  }

  *report << endl
          << endl
          << "Num of data items with all zero feature data [" << numWithAllZeros << "]" << endl
          << endl;

  *report << data->ClassStatisticsStr ();
  *report << endl << endl << endl;

  if  (statistics)
  {
    *report << "Class Statistics:"  << endl;
    data->PrintClassStatistics (*report);
    *report << endl << endl;

    *report << "Feature Statistics:"  << endl;
    data->PrintFeatureStatisticsByClass (*report);
  }


  if  (enumerateClasses)
  {
    // We are going to change the name of the classes to numbers enumberated by className 

    MLClassConstListPtr  mlClasses = data->ExtractMLClassConstList ();
    mlClasses->SortByName ();

    MLClassConstListPtr  newClassNames = new MLClassConstList ();

    int classIdx = 0;
    MLClassConstList::iterator idx;
    for  (idx = mlClasses->begin ();  idx !=  mlClasses->end ();  idx++)
    {
      KKStr  newName = StrFormatInt (classIdx, "zzz0");
      MLClassConstPtr  mlClass = newClassNames->GetMLClassPtr (newName);
      classIdx++;
    }

    FeatureVectorList::iterator  idx2;
    for  (idx2 = data->begin ();  idx2 != data->end ();  idx2++)
    {
      MLClassConstPtr  c = (*idx2)->MLClass ();
      int  classIndex = mlClasses->PtrToIdx (c);
      (*idx2)->MLClass (newClassNames->IdxToPtr (classIndex));
    }

    delete  mlClasses; mlClasses = NULL;
    delete  newClassNames;  newClassNames = NULL;
  }

  if  (encodeFeatureData)
  {
    EncodeFeatureData ();
  }
  else
  {
    uint  numExamplesWritten = 0;
    destFileFormat->SaveFeatureFile (destFileName,
                                     *features,
                                     *data,
                                     numExamplesWritten,
                                     cancelFlag,
                                     successful,
                                     log
                                    );
  }

}  /* ConvertData */
Пример #4
0
/**
 * @brief Converts a single example into the svm_problem format.
 * @param[in] The example That we're converting
 * @param[in] The row kkint32 he svm_problem structure that the converted data will be stored
 */
void  FeatureEncoder::EncodeAExample (FeatureVectorPtr  example,
                                      svm_node*         xSpace,
                                      kkint32&          xSpaceUsed
                                     )
{
  const float*  featureData = example->FeatureData ();
  kkint32  x;

  xSpaceUsed = 0;

  for  (x = 0;  x < numOfFeatures; x++)
  {
    float  featureVal = featureData [srcFeatureNums[x]];
    kkint32  y = destFeatureNums[x];

    if  (y >= xSpaceNeededPerExample)
    {
      KKStr  errMsg (128);
      errMsg << "FeatureEncoder::EncodeAExample  ***ERROR***   xSpaceNeededPerExample[" << xSpaceNeededPerExample << "].";
      cerr << endl
           << "FeatureEncoder::EncodeAExample     *** ERROR ***"  << endl
           << "             " << errMsg                           << endl
           << endl;
      throw KKException (errMsg);
    }

    switch (destWhatToDo[x])
    {
    case  FeWhatToDo::FeAsIs:
      {
        if  (featureVal != 0.0)
        {
          xSpace[xSpaceUsed].index = y;
          xSpace[xSpaceUsed].value = featureVal;
          xSpaceUsed++;
        }
      }
      break;

    case  FeWhatToDo::FeBinary:
      {
        for  (kkint32 z = 0; z < cardinalityDest[x]; z++)
        {
          float  bVal = ((kkint32)featureVal == z);
          if  (bVal != 0.0)
          {
            xSpace[xSpaceUsed].index = y;
            xSpace[xSpaceUsed].value = bVal;
            xSpaceUsed++;
          }
          y++;
        }
      }

      break;

    case  FeWhatToDo::FeScale:
      {
        if  (featureVal != (float)0.0)
        {
          xSpace[xSpaceUsed].index = y;
          xSpace[xSpaceUsed].value = featureVal / (float)cardinalityDest[x];
          xSpaceUsed++;
        }
      }
      break;
    }
  }

  xSpace[xSpaceUsed].index = -1;
  xSpace[xSpaceUsed].value = -1;
  xSpaceUsed++;
}  /* EncodeAExample */