Пример #1
0
FeatureVectorPtr  FeatureEncoder::EncodeAExample (FileDescConstPtr  encodedFileDesc,
                                                  FeatureVectorPtr  src
                                                 )
{
  FeatureVectorPtr  encodedExample = new FeatureVector (numEncodedFeatures);
  encodedExample->MLClass     (src->MLClass     ());
  encodedExample->PredictedClass (src->PredictedClass ());
  //encodedExample->Version        (src->Version        ());
  encodedExample->TrainWeight    (src->TrainWeight    ());

  const float*  featureData = src->FeatureData ();
  kkint32  x;

  for  (x = 0;  x < numOfFeatures; x++)
  {
    float  featureVal = featureData [srcFeatureNums[x]];
    kkint32  y = destFeatureNums[x];

    switch (destWhatToDo[x])
    {
    case  FeWhatToDo::FeAsIs:
      {
        encodedExample->AddFeatureData (y, featureVal);
      }
      break;

    case  FeWhatToDo::FeBinary:
      {
        for  (kkint32 z = 0; z < cardinalityDest[x]; z++)
        {
          float  bVal = ((kkint32)featureVal == z);
          encodedExample->AddFeatureData (y, bVal);
          y++;
        }
      }

      break;

    case  FeWhatToDo::FeScale:
      {
        encodedExample->AddFeatureData (y, (featureVal / (float)cardinalityDest[x]));
      }
      break;
    }
  }

  return  encodedExample;
}  /* EncodeAExample */
Пример #2
0
void  FeatureEncoder::EncodeIntoSparseMatrix
                               (FeatureVectorListPtr   src,
                                ClassAssignments&      assignments,
                                XSpacePtr&             xSpace,          
                                kkint32&               totalxSpaceUsed,
                                struct svm_problem&    prob,
                                RunLog&                log
                               )

{
  FeatureVectorListPtr  compressedExamples    = NULL;
  FeatureVectorListPtr  examplesToUseFoXSpace = NULL;
  kkint32               xSpaceUsed            = 0;

  totalxSpaceUsed = 0;

  examplesToUseFoXSpace = src;

  kkint32  numOfExamples = examplesToUseFoXSpace->QueueSize ();
  //kkint32  elements      = numOfExamples * xSpaceNeededPerExample;

  prob.l     = numOfExamples;
  prob.y     = (double*)malloc  (prob.l * sizeof (double));
  prob.x     = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*));
  prob.index = new kkint32[prob.l];
  prob.exampleNames.clear ();

  kkint32  numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace);

  kkint32  totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node);  // I added '10' to elements because I am paranoid

  xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded);
  if  (xSpace == NULL)
  {
    log.Level (-1) << endl << endl << endl
                   << " FeatureEncoder::Compress   *** Failed to allocates space for 'xSpace' ****" << endl
                   << endl
                   << "     Space needed          [" << totalBytesForxSpaceNeeded << "]" << endl
                   << "     Num of Examples       [" << numOfExamples             << "]" << endl
                   << "     Num XSpaceNodesNeeded [" << numNeededXspaceNodes      << "]" << endl
                   << endl;
    // we sill have to allocate space for each individual training example separately.
    //throw "FeatureEncoder::Compress     Allocation of memory for xSpace Failed.";
  }

  prob.W = NULL;

  kkint32 i = 0;
 
  FeatureVectorPtr  example      = NULL;
  MLClassPtr        lastMlClass  = NULL;
  kkint16           lastClassNum = -1;

  kkint32  bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node);

  for (i = 0;  i < prob.l;  i++)
  {
    if  (totalxSpaceUsed > numNeededXspaceNodes)
    {
      log.Level (-1) << endl << endl
        << "FeatureEncoder::Compress   ***ERROR***   We have exceeded the number of XSpace nodes allocated." << endl
        << endl;
    }

    example = examplesToUseFoXSpace->IdxToPtr (i);

    if  (example->MLClass () != lastMlClass)
    {
      lastMlClass  = example->MLClass ();
      lastClassNum = assignments.GetNumForClass (lastMlClass);
    }

    prob.y[i]     = lastClassNum;
    prob.index[i] = i;
    prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ()));

    if  (prob.W)
    {
      prob.W[i] = example->TrainWeight () * c_Param;
      if  (example->TrainWeight () <= 0.0f)
      {
        log.Level (-1) << endl 
                       << "FeatureEncoder::EncodeIntoSparseMatrix    ***ERROR***   Example[" << example->ExampleFileName () << "]" << endl
                       << "      has a TrainWeight value of 0 or less defaulting to 1.0" << endl
                       << endl;
        prob.W[i] = 1.0 * c_Param;
      }
    }

    if  (xSpace == NULL)
    {
      struct svm_node*  xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample);
      prob.x[i] = xSpaceThisExample;
      EncodeAExample (example, prob.x[i], xSpaceUsed);
      if  (xSpaceUsed < xSpaceNeededPerExample)
      {
        kkint32  bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node);
        struct svm_node*  smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample);
        memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample);
        free  (xSpaceThisExample);
        prob.x[i] = smallerXSpaceThisExample;
      }
    }
    else
    {
      prob.x[i] = &xSpace[totalxSpaceUsed];
      EncodeAExample (example, prob.x[i], xSpaceUsed);
    }
    totalxSpaceUsed += xSpaceUsed;
  }

  delete  compressedExamples;
  return;
}  /* Compress */