FeatureVectorPtr FeatureEncoder::EncodeAExample (FileDescConstPtr encodedFileDesc, FeatureVectorPtr src ) { FeatureVectorPtr encodedExample = new FeatureVector (numEncodedFeatures); encodedExample->MLClass (src->MLClass ()); encodedExample->PredictedClass (src->PredictedClass ()); //encodedExample->Version (src->Version ()); encodedExample->TrainWeight (src->TrainWeight ()); const float* featureData = src->FeatureData (); kkint32 x; for (x = 0; x < numOfFeatures; x++) { float featureVal = featureData [srcFeatureNums[x]]; kkint32 y = destFeatureNums[x]; switch (destWhatToDo[x]) { case FeWhatToDo::FeAsIs: { encodedExample->AddFeatureData (y, featureVal); } break; case FeWhatToDo::FeBinary: { for (kkint32 z = 0; z < cardinalityDest[x]; z++) { float bVal = ((kkint32)featureVal == z); encodedExample->AddFeatureData (y, bVal); y++; } } break; case FeWhatToDo::FeScale: { encodedExample->AddFeatureData (y, (featureVal / (float)cardinalityDest[x])); } break; } } return encodedExample; } /* EncodeAExample */
void FeatureEncoder::EncodeIntoSparseMatrix (FeatureVectorListPtr src, ClassAssignments& assignments, XSpacePtr& xSpace, kkint32& totalxSpaceUsed, struct svm_problem& prob, RunLog& log ) { FeatureVectorListPtr compressedExamples = NULL; FeatureVectorListPtr examplesToUseFoXSpace = NULL; kkint32 xSpaceUsed = 0; totalxSpaceUsed = 0; examplesToUseFoXSpace = src; kkint32 numOfExamples = examplesToUseFoXSpace->QueueSize (); //kkint32 elements = numOfExamples * xSpaceNeededPerExample; prob.l = numOfExamples; prob.y = (double*)malloc (prob.l * sizeof (double)); prob.x = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*)); prob.index = new kkint32[prob.l]; prob.exampleNames.clear (); kkint32 numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace); kkint32 totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node); // I added '10' to elements because I am paranoid xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded); if (xSpace == NULL) { log.Level (-1) << endl << endl << endl << " FeatureEncoder::Compress *** Failed to allocates space for 'xSpace' ****" << endl << endl << " Space needed [" << totalBytesForxSpaceNeeded << "]" << endl << " Num of Examples [" << numOfExamples << "]" << endl << " Num XSpaceNodesNeeded [" << numNeededXspaceNodes << "]" << endl << endl; // we sill have to allocate space for each individual training example separately. //throw "FeatureEncoder::Compress Allocation of memory for xSpace Failed."; } prob.W = NULL; kkint32 i = 0; FeatureVectorPtr example = NULL; MLClassPtr lastMlClass = NULL; kkint16 lastClassNum = -1; kkint32 bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node); for (i = 0; i < prob.l; i++) { if (totalxSpaceUsed > numNeededXspaceNodes) { log.Level (-1) << endl << endl << "FeatureEncoder::Compress ***ERROR*** We have exceeded the number of XSpace nodes allocated." << endl << endl; } example = examplesToUseFoXSpace->IdxToPtr (i); if (example->MLClass () != lastMlClass) { lastMlClass = example->MLClass (); lastClassNum = assignments.GetNumForClass (lastMlClass); } prob.y[i] = lastClassNum; prob.index[i] = i; prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ())); if (prob.W) { prob.W[i] = example->TrainWeight () * c_Param; if (example->TrainWeight () <= 0.0f) { log.Level (-1) << endl << "FeatureEncoder::EncodeIntoSparseMatrix ***ERROR*** Example[" << example->ExampleFileName () << "]" << endl << " has a TrainWeight value of 0 or less defaulting to 1.0" << endl << endl; prob.W[i] = 1.0 * c_Param; } } if (xSpace == NULL) { struct svm_node* xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample); prob.x[i] = xSpaceThisExample; EncodeAExample (example, prob.x[i], xSpaceUsed); if (xSpaceUsed < xSpaceNeededPerExample) { kkint32 bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node); struct svm_node* smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample); memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample); free (xSpaceThisExample); prob.x[i] = smallerXSpaceThisExample; } } else { prob.x[i] = &xSpace[totalxSpaceUsed]; EncodeAExample (example, prob.x[i], xSpaceUsed); } totalxSpaceUsed += xSpaceUsed; } delete compressedExamples; return; } /* Compress */