void MDDAGClassifier::saveLikelihoods(const string& dataFileName, const string& shypFileName, const string& outFileName, int numIterations) { InputData* pData = loadInputData(dataFileName, shypFileName); if (_verbose > 0) cout << "Loading strong hypothesis..." << flush; // The class that loads the weak hypotheses UnSerialization us; // Where to put the weak hypotheses vector<BaseLearner*> weakHypotheses; // loads them us.loadHypotheses(shypFileName, weakHypotheses, pData); // where the results go vector< ExampleResults* > results; if (_verbose > 0) cout << "Classifying..." << flush; const int numClasses = pData->getNumClasses(); const int numExamples = pData->getNumExamples(); ofstream outFile(outFileName.c_str()); string exampleName; if (_verbose > 0) cout << "Output likelihoods..." << flush; // get the results ///////////////////////////////////////////////////////////////////// // computeResults( pData, weakHypotheses, results, numIterations ); assert( !weakHypotheses.empty() ); // Initialize the output info OutputInfo* pOutInfo = NULL; if ( !_outputInfoFile.empty() ) pOutInfo = new OutputInfo(_outputInfoFile, "err"); // Creating the results structures. See file Structures.h for the // PointResults structure results.clear(); results.reserve(numExamples); for (int i = 0; i < numExamples; ++i) results.push_back( new ExampleResults(i, numClasses) ); // sum votes for classes vector< AlphaReal > votesForExamples( numClasses ); vector< AlphaReal > expVotesForExamples( numClasses ); // iterator over all the weak hypotheses vector<BaseLearner*>::const_iterator whyIt; int t; pOutInfo->initialize( pData ); // for every feature: 1..T for (whyIt = weakHypotheses.begin(), t = 0; whyIt != weakHypotheses.end() && t < numIterations; ++whyIt, ++t) { BaseLearner* currWeakHyp = *whyIt; AlphaReal alpha = currWeakHyp->getAlpha(); // for every point for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); // for every class for (int l = 0; l < numClasses; ++l) currVotesVector[l] += alpha * currWeakHyp->classify(pData, i, l); } // if needed output the step-by-step information if ( pOutInfo ) { pOutInfo->outputIteration(t); pOutInfo->outputCustom(pData, currWeakHyp); // Margins and edge requires an update of the weight, // therefore I keep them out for the moment //outInfo.outputMargins(pData, currWeakHyp); //outInfo.outputEdge(pData, currWeakHyp); pOutInfo->endLine(); } // for (int i = 0; i < numExamples; ++i) // calculate likelihoods from votes fill( votesForExamples.begin(), votesForExamples.end(), 0.0 ); AlphaReal lLambda = 0.0; for (int i = 0; i < numExamples; ++i) { // a reference for clarity and speed vector<AlphaReal>& currVotesVector = results[i]->getVotesVector(); AlphaReal sumExp = 0.0; // for every class for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] = exp( currVotesVector[l] ) ; sumExp += expVotesForExamples[l]; } if ( sumExp > numeric_limits<AlphaReal>::epsilon() ) { for (int l = 0; l < numClasses; ++l) { expVotesForExamples[l] /= sumExp; } } Example ex = pData->getExample( results[i]->getIdx() ); vector<Label> labs = ex.getLabels(); AlphaReal m = numeric_limits<AlphaReal>::infinity(); for (int l = 0; l < numClasses; ++l) { if ( labs[l].y > 0 ) { if ( expVotesForExamples[l] > numeric_limits<AlphaReal>::epsilon() ) { AlphaReal logVal = log( expVotesForExamples[l] ); if ( logVal != m ) { lLambda += ( ( 1.0/(AlphaReal)numExamples ) * logVal ); } } } } } outFile << t << "\t" << lLambda ; outFile << '\n'; outFile.flush(); } if (pOutInfo) delete pOutInfo; // computeResults( pData, weakHypotheses, results, numIterations ); /////////////////////////////////////////////////////////////////////////////////// /* for (int i = 0; i < numExamples; ++i) { // output the name if it exists, otherwise the number // of the example exampleName = pData->getExampleName(i); if ( !exampleName.empty() ) outFile << exampleName << ','; // output the posteriors outFile << results[i]->getVotesVector()[0]; for (int l = 1; l < numClasses; ++l) outFile << ',' << results[i]->getVotesVector()[l]; outFile << '\n'; } */ if (_verbose > 0) cout << "Done!" << endl; if (_verbose > 1) { cout << "\nClass order (You can change it in the header of the data file):" << endl; for (int l = 0; l < numClasses; ++l) cout << "- " << pData->getClassMap().getNameFromIdx(l) << endl; } // delete the input data file if (pData) delete pData; vector<ExampleResults*>::iterator it; for (it = results.begin(); it != results.end(); ++it) delete (*it); }
void VJCascadeClassifier::run(const string& dataFileName, const string& shypFileName, int numIterations, const string& outResFileName ) { // loading data InputData* pData = loadInputData(dataFileName, shypFileName); const int numOfExamples = pData->getNumExamples(); //get the index of positive label const NameMap& namemap = pData->getClassMap(); _positiveLabelIndex = namemap.getIdxFromName( _positiveLabelName ); if (_verbose > 0) cout << "Loading strong hypothesis..." << flush; // The class that loads the weak hypotheses UnSerialization us; // Where to put the weak hypotheses vector<vector<BaseLearner*> > weakHypotheses; // For stagewise thresholds vector<AlphaReal> thresholds(0); // loads them //us.loadHypotheses(shypFileName, weakHypotheses, pData); us.loadCascadeHypotheses(shypFileName, weakHypotheses, thresholds, pData); // store result vector<CascadeOutputInformation> cascadeData(0); vector<CascadeOutputInformation>::iterator it; cascadeData.resize(numOfExamples); for( it=cascadeData.begin(); it != cascadeData.end(); ++it ) { it->active=true; } if (!_outputInfoFile.empty()) { outputHeader(); } for(int stagei=0; stagei < weakHypotheses.size(); ++stagei ) { // for posteriors vector<AlphaReal> posteriors(0); // calculate the posteriors after stage VJCascadeLearner::calculatePosteriors( pData, weakHypotheses[stagei], posteriors, _positiveLabelIndex ); // update the data (posteriors, active element index etc.) updateCascadeData(pData, weakHypotheses, stagei, posteriors, thresholds, _positiveLabelIndex, cascadeData); if (!_outputInfoFile.empty()) { _output << stagei + 1 << "\t"; _output << weakHypotheses[stagei].size() << "\t"; outputCascadeResult( pData, cascadeData ); } int numberOfActiveInstance = 0; for( int i = 0; i < numOfExamples; ++i ) if (cascadeData[i].active) numberOfActiveInstance++; if (_verbose > 0 ) cout << "Number of active instances: " << numberOfActiveInstance << "(" << numOfExamples << ")" << endl; } vector<vector<int> > confMatrix(2); confMatrix[0].resize(2); fill( confMatrix[0].begin(), confMatrix[0].end(), 0 ); confMatrix[1].resize(2); fill( confMatrix[1].begin(), confMatrix[1].end(), 0 ); // print accuracy for(int i=0; i<numOfExamples; ++i ) { const Example& example = pData->getExample(i); int labelY = example.getLabelY(_positiveLabelIndex); if (labelY>0) // pos label if (cascadeData[i].forecast==1) confMatrix[1][1]++; else confMatrix[1][0]++; else // negative label if (cascadeData[i].forecast==0) confMatrix[0][0]++; else confMatrix[0][1]++; } double acc = 100.0 * (confMatrix[0][0] + confMatrix[1][1]) / ((double) numOfExamples); // output it cout << endl; cout << "Error Summary" << endl; cout << "=============" << endl; cout << "Accuracy: " << setprecision(4) << acc << endl; cout << setw(10) << "\t" << setw(10) << namemap.getNameFromIdx(1-_positiveLabelIndex) << setw(10) << namemap.getNameFromIdx(_positiveLabelIndex) << endl; cout << setw(10) << namemap.getNameFromIdx(1-_positiveLabelIndex) << setw(10) << confMatrix[0][0] << setw(10) << confMatrix[0][1] << endl; cout << setw(10) << namemap.getNameFromIdx(_positiveLabelIndex) << setw(10) << confMatrix[1][0] << setw(10) << confMatrix[1][1] << endl; // output forecast if (!outResFileName.empty() ) outputForecast(pData, outResFileName, cascadeData ); // free memory allocation vector<vector<BaseLearner*> >::iterator bvIt; for( bvIt = weakHypotheses.begin(); bvIt != weakHypotheses.end(); ++bvIt ) { vector<BaseLearner* >::iterator bIt; for( bIt = (*bvIt).begin(); bIt != (*bvIt).end(); ++bIt ) delete *bIt; } }
/** * The main function. Everything starts here! * \param argc The number of arguments. * \param argv The arguments. * \date 11/11/2005 */ int main(int argc, const char* argv[]) { // initializing the random number generator srand ( time(NULL) ); // no need to synchronize with C style stream std::ios_base::sync_with_stdio(false); #if STABLE_SORT cerr << "WARNING: Stable sort active! It might be slower!!" << endl; #endif ////////////////////////////////////////////////////////////////////////// // Standard arguments nor_utils::Args args; args.setArgumentDiscriminator("--"); args.declareArgument("help"); args.declareArgument("static"); args.declareArgument("h", "Help", 1, "<optiongroup>"); ////////////////////////////////////////////////////////////////////////// // Basic Arguments args.setGroup("Parameters"); args.declareArgument("train", "Performs training.", 2, "<dataFile> <nInterations>"); args.declareArgument("traintest", "Performs training and test at the same time.", 3, "<trainingDataFile> <testDataFile> <nInterations>"); args.declareArgument("trainvalidtest", "Performs training and test at the same time.", 4, "<trainingDataFile> <validDataFile> <testDataFile> <nInterations>"); args.declareArgument("test", "Test the model.", 3, "<dataFile> <numIters> <shypFile>"); args.declareArgument("test", "Test the model and output the results", 4, "<datafile> <shypFile> <numIters> <outFile>"); args.declareArgument("cmatrix", "Print the confusion matrix for the given model.", 2, "<dataFile> <shypFile>"); args.declareArgument("cmatrixfile", "Print the confusion matrix with the class names to a file.", 3, "<dataFile> <shypFile> <outFile>"); args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model.", 4, "<dataFile> <shypFile> <outFile> <numIters>"); args.declareArgument("posteriors", "Output the posteriors for each class, that is the vector-valued discriminant function for the given dataset and model periodically.", 5, "<dataFile> <shypFile> <outFile> <numIters> <period>"); args.declareArgument("encode", "Save the coefficient vector of boosting individually on each point using ParasiteLearner", 6, "<inputDataFile> <autoassociativeDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners>"); args.declareArgument("ssfeatures", "Print matrix data for SingleStump-Based weak learners (if numIters=0 it means all of them).", 4, "<dataFile> <shypFile> <outFile> <numIters>"); args.declareArgument( "fileformat", "Defines the type of intput file. Available types are:\n" "* simple: each line has attributes separated by whitespace and class at the end (DEFAULT!)\n" "* arff: arff filetype. The header file can be specified using --headerfile option\n" "* arffbzip: bziped arff filetype. The header file can be specified using --headerfile option\n" "* svmlight: \n" "(Example: --fileformat simple)", 1, "<fileFormat>" ); args.declareArgument("headerfile", "The header file for arff and SVMLight and arff formats.", 1, "header.txt"); args.declareArgument("constant", "Check constant learner in each iteration.", 0, ""); args.declareArgument("timelimit", "Time limit in minutes", 1, "<minutes>" ); args.declareArgument("stronglearner", "Available strong learners:\n" "AdaBoost (default)\n" "FilterBoost\n" "SoftCascade\n" "VJcascade\n", 1, "<stronglearner>" ); args.declareArgument("slowresumeprocess", "Computes every statitstic in each iteration (slow resume)\n" "Computes only the statistics in the last iteration (fast resume, default)\n", 0, "" ); args.declareArgument("weights", "Outputs the weights of instances at the end of the learning process", 1, "<filename>" ); args.declareArgument("Cn", "Resampling size for FilterBoost (default=300)", 1, "<value>" ); args.declareArgument("onlinetraining", "The weak learner will be trained online\n", 0, "" ); //// ignored for the moment! //args.declareArgument("arffheader", "Specify the arff header.", 1, "<arffHeaderFile>"); // for MDDAG //args.setGroup("MDDAG"); args.declareArgument("traintestmddag", "Performs training and test at the same time using mddag.", 5, "<trainingDataFile> <testDataFile> <modelFile> <nIterations> <baseIter>"); args.declareArgument("policytrainingiter", "The iteration number the policy learner takes.", 1, "<iternum>"); args.declareArgument("rollouts", "The number of rollouts.", 1, "<num>"); args.declareArgument("rollouttype", "Rollout type (montecarlo or szatymaz)", 1, "<rollouttype>"); args.declareArgument("beta", "Trade-off parameter", 1, "<beta>"); args.declareArgument("outdir", "Output directory.", 1, "<outdir>"); args.declareArgument("policyalpha", "Alpha for policy array.", 1, "<alpha>"); args.declareArgument("succrewardtype", "Rewrd type (e01 or hammng)", 1, "<rward_type"); args.declareArgument("outtrainingerror", "Output training error", 0, ""); args.declareArgument("epsilon", "Exploration term", 1, "<epsilon>"); args.declareArgument("updateperc", "Number of component in the policy are updated", 1, "<perc>"); // for VJ cascade VJCascadeLearner::declareBaseArguments(args); // for SoftCascade SoftCascadeLearner::declareBaseArguments(args); ////////////////////////////////////////////////////////////////////////// // Options args.setGroup("I/O Options"); ///////////////////////////////////////////// // these are valid only for .txt input! // they might be removed! args.declareArgument("d", "The separation characters between the fields (default: whitespaces).\nExample: -d \"\\t,.-\"\nNote: new-line is always included!", 1, "<separators>"); args.declareArgument("classend", "The class is the last column instead of the first (or second if -examplelabel is active)."); args.declareArgument("examplename", "The data file has an additional column (the very first) which contains the 'name' of the example."); ///////////////////////////////////////////// args.setGroup("Basic Algorithm Options"); args.declareArgument("weightpolicy", "Specify the type of weight initialization. The user specified weights (if available) are used inside the policy which can be:\n" "* sharepoints Share the weight equally among data points and between positiv and negative labels (DEFAULT)\n" "* sharelabels Share the weight equally among data points\n" "* proportional Share the weights freely", 1, "<weightType>"); args.setGroup("General Options"); args.declareArgument("verbose", "Set the verbose level 0, 1 or 2 (0=no messages, 1=default, 2=all messages).", 1, "<val>"); args.declareArgument("outputinfo", "Output informations on the algorithm performances during training, on file <filename>.", 1, "<filename>"); args.declareArgument("outputinfo", "Output specific informations on the algorithm performances during training, on file <filename> <outputlist>. <outputlist> must be a concatenated list of three characters abreviation (ex: err for error, fpr for false positive rate)", 2, "<filename> <outputlist>"); args.declareArgument("seed", "Defines the seed for the random operations.", 1, "<seedval>"); ////////////////////////////////////////////////////////////////////////// // Shows the list of available learners string learnersComment = "Available learners are:"; vector<string> learnersList; BaseLearner::RegisteredLearners().getList(learnersList); vector<string>::const_iterator it; for (it = learnersList.begin(); it != learnersList.end(); ++it) { learnersComment += "\n ** " + *it; // defaultLearner is defined in Defaults.h if ( *it == defaultLearner ) learnersComment += " (DEFAULT)"; } args.declareArgument("learnertype", "Change the type of weak learner. " + learnersComment, 1, "<learner>"); ////////////////////////////////////////////////////////////////////////// //// Declare arguments that belongs to all weak learners BaseLearner::declareBaseArguments(args); //////////////////////////////////////////////////////////////////////////// //// Weak learners (and input data) arguments for (it = learnersList.begin(); it != learnersList.end(); ++it) { args.setGroup(*it + " Options"); // add weaklearner-specific options BaseLearner::RegisteredLearners().getLearner(*it)->declareArguments(args); } ////////////////////////////////////////////////////////////////////////// //// Declare arguments that belongs to all bandit learner GenericBanditAlgorithm::declareBaseArguments(args); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// switch ( args.readArguments(argc, argv) ) { case nor_utils::AOT_NO_ARGUMENTS: showBase(); break; case nor_utils::AOT_UNKOWN_ARGUMENT: exit(1); break; case nor_utils::AOT_INCORRECT_VALUES_NUMBER: exit(1); break; case nor_utils::AOT_OK: break; } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// if ( args.hasArgument("help") ) showHelp(args, learnersList); if ( args.hasArgument("static") ) showStaticConfig(); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// if ( args.hasArgument("h") ) showOptionalHelp(args); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// int verbose = 1; if ( args.hasArgument("verbose") ) args.getValue("verbose", 0, verbose); ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// // defines the seed if (args.hasArgument("seed")) { unsigned int seed = args.getValue<unsigned int>("seed", 0); srand(seed); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// GenericStrongLearner* pModel = NULL; if ( args.hasArgument("train") || args.hasArgument("traintest") || args.hasArgument("trainvalidtest") ) // for Viola-Jones Cascade { // get the name of the learner string baseLearnerName = defaultLearner; if ( args.hasArgument("learnertype") ) args.getValue("learnertype", 0, baseLearnerName); checkBaseLearner(baseLearnerName); if (verbose > 1) cout << "--> Using learner: " << baseLearnerName << endl; // This hould be changed: the user decides the strong learner BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->run(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("traintestmddag") ) { // -test <dataFile> <shypFile> <numIters> string shypFileName = args.getValue<string>("traintestmddag", 2); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->run(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("test") ) { // -test <dataFile> <shypFile> <numIters> string shypFileName = args.getValue<string>("test", 1); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->classify(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("cmatrix") ) { // -cmatrix <dataFile> <shypFile> string shypFileName = args.getValue<string>("cmatrix", 1); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->doConfusionMatrix(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("posteriors") ) { // -posteriors <dataFile> <shypFile> <outFileName> string shypFileName = args.getValue<string>("posteriors", 1); string baseLearnerName = UnSerialization::getWeakLearnerName(shypFileName); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner(baseLearnerName); pModel = pWeakHypothesisSource->createGenericStrongLearner( args ); pModel->doPosteriors(args); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("ssfeatures") ) { // ONLY for AdaBoostMH classifiers // -ssfeatures <dataFile> <shypFile> <outFile> <numIters> string testFileName = args.getValue<string>("ssfeatures", 0); string shypFileName = args.getValue<string>("ssfeatures", 1); string outFileName = args.getValue<string>("ssfeatures", 2); int numIterations = args.getValue<int>("ssfeatures", 3); cerr << "ERROR: ssfeatures has been deactivated for the moment!" << endl; //classifier.saveSingleStumpFeatureData(testFileName, shypFileName, outFileName, numIterations); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// else if ( args.hasArgument("encode") ) { // --encode <inputDataFile> <outputDataFile> <nIterations> <poolFile> <nBaseLearners> string labelsFileName = args.getValue<string>("encode", 0); string autoassociativeFileName = args.getValue<string>("encode", 1); string outputFileName = args.getValue<string>("encode", 2); int numIterations = args.getValue<int>("encode", 3); string poolFileName = args.getValue<string>("encode", 4); int numBaseLearners = args.getValue<int>("encode", 5); string outputInfoFile; const char* tmpArgv1[] = {"bla", // for ParasiteLearner "--pool", args.getValue<string>("encode", 4).c_str(), args.getValue<string>("encode", 5).c_str()}; args.readArguments(4,tmpArgv1); InputData* pAutoassociativeData = new InputData(); pAutoassociativeData->initOptions(args); pAutoassociativeData->load(autoassociativeFileName,IT_TRAIN,verbose); // for the original labels InputData* pLabelsData = new InputData(); pLabelsData->initOptions(args); pLabelsData->load(labelsFileName,IT_TRAIN,verbose); // set up all the InputData members identically to pAutoassociativeData EncodeData* pOnePoint = new EncodeData(); pOnePoint->initOptions(args); pOnePoint->load(autoassociativeFileName,IT_TRAIN,verbose); const int numExamples = pAutoassociativeData->getNumExamples(); BaseLearner* pWeakHypothesisSource = BaseLearner::RegisteredLearners().getLearner("ParasiteLearner"); pWeakHypothesisSource->declareArguments(args); ParasiteLearner* pWeakHypothesis; ofstream outFile(outputFileName.c_str()); if (!outFile.is_open()) { cerr << "ERROR: Cannot open strong hypothesis file <" << outputFileName << ">!" << endl; exit(1); } for (int i = 0; i < numExamples ; ++i) { vector<float> alphas; alphas.resize(numBaseLearners); fill(alphas.begin(), alphas.end(), 0); if (verbose >= 1) cout << "--> Encoding example no " << (i+1) << endl; pOnePoint->resetData(); pOnePoint->addExample( pAutoassociativeData->getExample(i) ); AlphaReal energy = 1; OutputInfo* pOutInfo = NULL; if ( args.hasArgument("outputinfo") ) { args.getValue("outputinfo", 0, outputInfoFile); pOutInfo = new OutputInfo(args); pOutInfo->initialize(pOnePoint); } for (int t = 0; t < numIterations; ++t) { pWeakHypothesis = (ParasiteLearner*)pWeakHypothesisSource->create(); pWeakHypothesis->initLearningOptions(args); pWeakHypothesis->setTrainingData(pOnePoint); energy *= pWeakHypothesis->run(); // if (verbose >= 2) // cout << "energy = " << energy << endl << flush; AdaBoostMHLearner adaBoostMHLearner; if (i == 0 && t == 0) { if ( pWeakHypothesis->getBaseLearners().size() < numBaseLearners ) numBaseLearners = pWeakHypothesis->getBaseLearners().size(); outFile << "%Hidden representation using autoassociative boosting" << endl << endl; outFile << "@RELATION " << outputFileName << endl << endl; outFile << "% numBaseLearners" << endl; for (int j = 0; j < numBaseLearners; ++j) outFile << "@ATTRIBUTE " << j << "_" << pWeakHypothesis->getBaseLearners()[j]->getId() << " NUMERIC" << endl; outFile << "@ATTRIBUTE class {" << pLabelsData->getClassMap().getNameFromIdx(0); for (int l = 1; l < pLabelsData->getClassMap().getNumNames(); ++l) outFile << ", " << pLabelsData->getClassMap().getNameFromIdx(l); outFile << "}" << endl<< endl<< "@DATA" << endl; } alphas[pWeakHypothesis->getSelectedIndex()] += pWeakHypothesis->getAlpha() * pWeakHypothesis->getSignOfAlpha(); if ( pOutInfo ) adaBoostMHLearner.printOutputInfo(pOutInfo, t, pOnePoint, NULL, pWeakHypothesis); adaBoostMHLearner.updateWeights(pOnePoint,pWeakHypothesis); } float sumAlphas = 0; for (int j = 0; j < numBaseLearners; ++j) sumAlphas += alphas[j]; for (int j = 0; j < numBaseLearners; ++j) outFile << alphas[j]/sumAlphas << ","; const vector<Label>& labels = pLabelsData->getLabels(i); for (int l = 0; l < labels.size(); ++l) if (labels[l].y > 0) outFile << pLabelsData->getClassMap().getNameFromIdx(labels[l].idx) << endl; delete pOutInfo; } outFile.close(); } if (pModel) delete pModel; return 0; }
void VJCascadeClassifier::savePosteriors(const string& dataFileName, const string& shypFileName, const string& outFileName, int numIterations) { // loading data InputData* pData = loadInputData(dataFileName, shypFileName); const int numOfExamples = pData->getNumExamples(); //get the index of positive label const NameMap& namemap = pData->getClassMap(); _positiveLabelIndex = namemap.getIdxFromName( _positiveLabelName ); if (_verbose > 0) cout << "Loading strong hypothesis..." << flush; // open outfile ofstream outRes(outFileName.c_str()); if (!outRes.is_open()) { cout << "Cannot open outfile!!! " << outFileName << endl; } // The class that loads the weak hypotheses UnSerialization us; // Where to put the weak hypotheses vector<vector<BaseLearner*> > weakHypotheses; // For stagewise thresholds vector<AlphaReal> thresholds(0); // loads them //us.loadHypotheses(shypFileName, weakHypotheses, pData); us.loadCascadeHypotheses(shypFileName, weakHypotheses, thresholds, pData); // output the number of stages outRes << "StageNum " << weakHypotheses.size() << endl; // output original labels outRes << "Labels"; for(int i=0; i<numOfExamples; ++i ) { const Example& example = pData->getExample(i); int labelY = example.getLabelY(_positiveLabelIndex); if (labelY>0) // pos label outRes << " 1"; else outRes << " 0"; } outRes << endl; // store result vector<CascadeOutputInformation> cascadeData(0); vector<CascadeOutputInformation>::iterator it; cascadeData.resize(numOfExamples); for( it=cascadeData.begin(); it != cascadeData.end(); ++it ) { it->active=true; } for(int stagei=0; stagei < weakHypotheses.size(); ++stagei ) { // for posteriors vector<AlphaReal> posteriors(0); // calculate the posteriors after stage VJCascadeLearner::calculatePosteriors( pData, weakHypotheses[stagei], posteriors, _positiveLabelIndex ); // update the data (posteriors, active element index etc.) //VJCascadeLearner::forecastOverAllCascade( pData, posteriors, activeInstances, thresholds[stagei] ); updateCascadeData(pData, weakHypotheses, stagei, posteriors, thresholds, _positiveLabelIndex, cascadeData); int numberOfActiveInstance = 0; for( int i = 0; i < numOfExamples; ++i ) if (cascadeData[i].active) numberOfActiveInstance++; if (_verbose > 0 ) cout << "Number of active instances: " << numberOfActiveInstance << "(" << numOfExamples << ")" << endl; // output stats outRes << "Stage " << stagei << " " << weakHypotheses[stagei].size() << endl; outRes << "Forecast"; for(int i=0; i<numOfExamples; ++i ) { outRes << " " << cascadeData[i].forecast; } outRes << endl; outRes << "Active"; for(int i=0; i<numOfExamples; ++i ) { if( cascadeData[i].active) outRes << " 1"; else outRes << " 0"; } outRes << endl; outRes << "Posteriors"; for(int i=0; i<numOfExamples; ++i ) { outRes << " " << cascadeData[i].score; } outRes << endl; } outRes.close(); // free memory allocation vector<vector<BaseLearner*> >::iterator bvIt; for( bvIt = weakHypotheses.begin(); bvIt != weakHypotheses.end(); ++bvIt ) { vector<BaseLearner* >::iterator bIt; for( bIt = (*bvIt).begin(); bIt != (*bvIt).end(); ++bIt ) delete *bIt; } }