void FeatureFileIO::SaveFeatureFileMultipleParts (const KKStr& _fileName, FeatureNumListConst& _selFeatures, FeatureVectorList& _examples, VolConstBool& _cancelFlag, bool& _successful, RunLog& _log ) { kkuint32 numExamplesWritten = 0; SaveFeatureFile (_fileName, _selFeatures, _examples, numExamplesWritten, _cancelFlag, _successful, _log); if (_cancelFlag || (!_successful)) return; if (_examples.QueueSize () > 64000) { kkint32 numPartsNeeded = (_examples.QueueSize () / 64000); if ((_examples.QueueSize () % 64000) > 0) numPartsNeeded++; kkuint32 maxPartSize = (_examples.QueueSize () / numPartsNeeded) + 1; kkint32 partNum = 0; FeatureVectorList::const_iterator idx = _examples.begin (); while ((idx != _examples.end ()) && (_successful) && (!_cancelFlag)) { FeatureVectorListPtr part = _examples.ManufactureEmptyList (false); while ((idx != _examples.end ()) && (part->QueueSize () < maxPartSize)) { part->PushOnBack (*idx); idx++; } KKStr partFileName = osRemoveExtension (_fileName) + "-" + StrFormatInt (partNum, "00") + "." + osGetFileExtension (_fileName); SaveFeatureFile (partFileName, _selFeatures, *part, numExamplesWritten, _cancelFlag, _successful, _log); partNum++; delete part; part = NULL; } } } /* SaveFeatureFileMultipleParts */
/** * @brief Will run M number of Train then Test passes. * * @param[in] numExamplsToUseForTraining The number examples in each ordering(group) * that are to be used for training, the remaining examples will be * used as test data. */ void CrossValidationMxN::RunTrainAndTest (kkuint32 numExamplsToUseForTraining, RunLog& log ) { CleanUpMemory (); meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ())); kkuint32 cvIDX = 0; MLClassListPtr mlClasses = orderings->MLClasses (); for (cvIDX = 0; cvIDX < numOfOrderings; cvIDX++) { FeatureVectorListPtr data = orderings->Ordering (cvIDX); FeatureVectorList trainingData (fileDesc, false); FeatureVectorList testData (fileDesc, false); FeatureVectorList::iterator fvIDX; for (fvIDX = data->begin (); fvIDX != data->end (); fvIDX++) { FeatureVectorPtr example = *fvIDX; if (trainingData.QueueSize () < numExamplsToUseForTraining) trainingData.PushOnBack (example); else testData.PushOnBack (example); } CrossValidationPtr cv = new CrossValidation (config, &trainingData, mlClasses, numOfFolds, false, fileDesc, log, cancelFlag ); cv->RunValidationOnly (&testData, NULL, // No McNemars test going to be performed. log ); accuracies.push_back (cv->Accuracy ()); supportPoints.push_back ((float)cv->NumOfSupportVectors ()); trainingTimes.push_back (cv->TrainTimeTotal ()); testTimes.push_back (cv->TestTimeTotal ()); meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log); delete cv; } CalcMeanAndStdDev (accuracies, accuracyMean, accuracyStdDev); CalcMeanAndStdDev (supportPoints, supportPointsMean, supportPointsStdDev); CalcMeanAndStdDev (trainingTimes, trainingTimeMean, trainingTimeStdDev); CalcMeanAndStdDev (testTimes, testTimeMean, testTimeStdDev); double factor = 1.0 / (double)numOfOrderings; meanConfusionMatrix->FactorCounts (factor); } /* RunTrainAndTest */