/** * Return an instance of the bmrminnersolver based on user's argument in the * configuration file. * */ static CBMRMInnerSolver* GetBMRMInnerSolver(CModel &model, double lambda) { CBMRMInnerSolver* innerSolver = 0; Configuration &config = Configuration::GetInstance(); std::string innerSolverType = ""; if(config.IsSet("BMRM.innerSolverType")) innerSolverType = config.GetString("BMRM.innerSolverType"); else throw CBMRMException("No BMRM inner solver specified", "CBMRMInnerSolverFactory::GetBMRMInnerSolver()"); // select the innersolver specified by user (in configuration file) if(innerSolverType == "L2N2_DaiFletcherPGM") { innerSolver = new CL2N2_DaiFletcherPGM(lambda); } else if(innerSolverType == "L2N2_prLOQO") { innerSolver = new CL2N2_prLOQO(lambda); } else if(innerSolverType == "L2N2_LineSearch") { innerSolver = new CL2N2_LineSearch(lambda); } #ifdef HAVE_L1N1_INNER_SOLVER else if(innerSolverType == "L1N1_Clp") { int wLength = model.GetW().Length(); innerSolver = new CL1N1_Clp(lambda, wLength); } #endif else if(innerSolverType == "L2N2_qld") { innerSolver = new CL2N2_qld(lambda); } else { throw CBMRMException("unknown innerSolverType <" + innerSolverType + ">", "CBMRMInnerSolverFactory::GetBMRMInnerSolver()"); } return innerSolver; }
/** Return an instance of loss function based on user's argument in configuration file * * @param model [read] Pointer to model object * @param data [read] Pointer to data object * @return loss object */ static CLoss* GetLoss(CModel* &model, CData* &data) { CLoss* loss = 0; Configuration &config = Configuration::GetInstance(); // select the loss function specified by user (in configuration file) if(config.IsSet("Loss.lossFunctionType")) { std::string lossFunctionType = config.GetString("Loss.lossFunctionType"); if(lossFunctionType == "LINESEARCH_HINGE") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLineSearchLossFactory::GetLoss()"); } loss = new CLinesearch_HingeLoss(model, vecdata); } else if(lossFunctionType == "LINESEARCH_MULTI_LABEL_CLASSIFICATION") { CMultilabelVecData *mlvecdata = 0; if(! (mlvecdata = dynamic_cast<CMultilabelVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLineSearchLossFactory::GetLoss()"); } loss = new CLinesearch_MultilabelLoss(model, mlvecdata); } else { throw CBMRMException("ERROR: unrecognised loss function ("+lossFunctionType+")\n", "CLineSearchLossFactory::GetLoss()"); } } else { throw CBMRMException("ERROR: no loss function specified!\n", "CLineSearchLossFactory::GetLoss()"); } return loss; }
CConsVecData( unsigned int start = 0, unsigned int nparts = 1) { if (nparts != 1) { string msg = "CConsVecData does not support distribution of data (yet)"; throw CBMRMException(msg, "CConsVecData::CConsVecData"); } LoadConstraintData(NumOfLabel()); }
/** Return a data object based on user's argument in configuration file. * For serial computation, start and nparts are dummies. * For distributed/parallel computation, * load a portion of whole dataset: divide dataset into "nparts" parts and * load only the "start"-th part. * * @param start [read] The part of dataset (divided into nparts) this machine should load * @param nparts [read] The number of parts the original dataset will be divided into * @return data object */ static CData* GetData(unsigned int start=0, unsigned int nparts=1) { CData *ds = 0; Configuration &config = Configuration::GetInstance(); // default to this format std::string dataFormat = "VECTOR_LABEL_VECTOR_FEATURE"; // default when a constraints file is given if(config.IsSet("Data.constraintsFile")) dataFormat = "CONSTRAINTS_LABEL_VECTOR_FEATURE"; // unless the user specifies otherwise in the config if(config.IsSet("Data.format")) dataFormat = config.GetString("Data.format"); if(dataFormat == "VECTOR_LABEL_VECTOR_FEATURE") { ds = new CVecData(start,nparts); } else if(dataFormat == "VARIABLE_LENGTH_VECTOR_LABEL_VECTOR_FEATURE") { ds = new CMultilabelVecData(start,nparts); } else if(dataFormat == "CONSTRAINTS_LABEL_VECTOR_FEATURE") { ds = new CConsVecData(start,nparts); } // else if(dataFormat == "YOUR_DATA_FORMAT") //{ // ds = new CYourDataFormat(); //} else { throw CBMRMException("ERROR: unrecognised data format ("+dataFormat+")\n", "CDataFactory::GetData()"); } return ds; }
int main(int argc, char** argv) { // sanity check if(argc < 2) { std::cout << "Usage: ramp-bmrm-predict config.file" << std::endl; std::cout << "Check the configfiles directory for examples" << std::endl; std::cout << "ERROR: No configuration file given!" << std::endl; exit(EXIT_FAILURE); } // the very first thing to do! Configuration &config = Configuration::GetInstance(); config.ReadFromFile(argv[1]); CData* data = 0; CLoss* loss_vex = 0; CLoss* loss_cav = 0; CModel* model = 0; try { // serial computation with centralised data config.SetString("Computation.mode", "SERIAL_CENTRALISED_DS"); std::string modelFilename = config.GetString("Model.modelFile"); std::string programMode = config.GetString("Program.mode"); data = CDataFactory::GetData(); model = CModelFactory::GetModel(); model->Initialize(modelFilename, data->dim()); CRampLossFactory::GetRampLoss(model,data, loss_vex, loss_cav); if(programMode == "PREDICTION") loss_vex->Predict(model); else if(programMode == "EVALUATION") loss_vex->Evaluate(model); else throw CBMRMException("unknown program mode <" + programMode +">","main()"); // compute ramp loss function value Scalar lossVal_vex = 0.0; Scalar lossVal_cav = 0.0; loss_vex->ComputeLoss(lossVal_vex); loss_cav->ComputeLoss(lossVal_cav); std::cout << "a) Convex loss function value: " << lossVal_vex << std::endl; std::cout << "b) Concave loss function linearization value: " << lossVal_cav << std::endl; std::cout << "c) Ramp loss function value (a-b) : " << lossVal_vex - lossVal_cav << std::endl; // cleaning up delete model; delete loss_vex; delete loss_cav; delete data; } catch(CBMRMException e) { cout << e.Report() << endl; } return EXIT_SUCCESS; }
void ComputeLoss(Scalar& loss) { throw CBMRMException("ERROR: not implemented!\n", "CGraphMatchLoss::ComputeLoss()"); }
/** Instantiate one convex loss function and another linearization of the concave loss * based on user's argument in configuration file * * @param model [read] Pointer to model object * @param data [read] Pointer to data object * @param loss_vex [write] Convex loss function * @param loss_cav [write] Linearization of concave loss function corresponding to loss_vex */ static void GetRampLoss(CModel* &model, CData* &data, CLoss* &loss_vex, CLoss* &loss_cav) { Configuration &config = Configuration::GetInstance(); // select the loss function specified by user (in configuration file) if(config.IsSet("Loss.lossFunctionType")) { std::string lossFunctionType = config.GetString("Loss.lossFunctionType"); if(lossFunctionType == "WTA_MULTICLASS") { CVecData *vecdata = 0; if(not (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss_vex = new CWTAMulticlassLoss(model, vecdata); // in the ramp losses, users must know which loss_cav to use for their specific loss_vex! loss_cav = new CWTAMulticlassLoss(model, vecdata, false); // with additive label loss switched off } else if(lossFunctionType == "ROC_SCORE") { CVecData *vecdata = 0; if(not (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss_vex = new CROCScoreLoss(model, vecdata); // in the ramp losses, users must know which loss_cav to use for their specific loss_vex! loss_cav = new CROCScoreLoss(model, vecdata, false); // with additive label loss switched off } else if(lossFunctionType == "NDCG_RANK") { CVecData *vecdata = 0; if(not (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss_vex = new CNDCGRankLoss(model, vecdata); // in the ramp losses, users must know which loss_cav to use for their specific loss_vex! loss_cav = new CRampNDCGRankLoss(model, vecdata); } //else if(lossFunctionType == "YOUR_LOSS_FUNCTION") //{ // loss = new CYourLoss(w, data); //} else { throw CBMRMException("ERROR: unrecognised loss function ("+lossFunctionType+")\n", "CLossFactory::GetLoss()"); } } else { throw CBMRMException("ERROR: no loss function specified!\n", "CLossFactory::GetLoss()"); } }
/** Return an instance of loss function based on user's argument in configuration file * * @param data [read] Pointer to data object * @return loss object */ static CLoss* GetLoss(CModel* &model, CData* &data) { CLoss* loss = 0; Configuration &config = Configuration::GetInstance(); // select the loss function specified by user (in configuration file) if(config.IsSet("Loss.lossFunctionType")) { std::string lossFunctionType = config.GetString("Loss.lossFunctionType"); if(lossFunctionType == "HINGE") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CHingeLoss(model, vecdata); } else if(lossFunctionType == "SQUARED_HINGE") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CSquaredHingeLoss(model, vecdata); } else if(lossFunctionType == "HUBER_HINGE") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CHuberHingeLoss(model, vecdata); } else if(lossFunctionType == "LOGISTIC") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CLogisticLoss(model, vecdata); } else if(lossFunctionType == "EXPONENTIAL") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CExponentialLoss(model, vecdata); } #ifndef PARALLEL_BMRM else if(lossFunctionType == "ROC_SCORE") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CROCScoreLoss(model, vecdata); } else if(lossFunctionType == "F_BETA") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CFBetaLoss(model, vecdata); } #endif else if(lossFunctionType == "EPSILON_INSENSITIVE") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CEpsilonInsensitiveLoss(model, vecdata); } else if(lossFunctionType == "LEAST_SQUARES") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CLeastSquaresLoss(model, vecdata); } else if(lossFunctionType == "LEAST_ABSOLUTE_DEVIATION") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CLeastAbsDevLoss(model, vecdata); } else if(lossFunctionType == "QUANTILE_REGRESSION") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CQuantileLoss(model, vecdata); } else if(lossFunctionType == "POISSON_REGRESSION") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CPoissonLoss(model, vecdata); } else if(lossFunctionType == "HUBER_ROBUST_REGRESSION") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CHuberRobustLoss(model, vecdata); } else if(lossFunctionType == "NOVELTY_DETECTION") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CNoveltyLoss(model, vecdata); } else if(lossFunctionType == "WTA_MULTICLASS") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CWTAMulticlassLoss(model, vecdata); } else if(lossFunctionType == "MULTI_LABEL_CLASSIFICATION") { CMultilabelVecData *mlvecdata = 0; if(! (mlvecdata = dynamic_cast<CMultilabelVecData*>(data))) { throw CBMRMException("unable to cast data into CMultilabelVecData", "CLossFactory::GetLoss()"); } loss = new CMultilabelLoss(model, mlvecdata); } else if(lossFunctionType == "NDCG_RANK") { CVecData *vecdata = 0; if(! (vecdata = dynamic_cast<CVecData*>(data))) { throw CBMRMException("unable to cast data into CVecData", "CLossFactory::GetLoss()"); } loss = new CNDCGRankLoss(model, vecdata); } else if(lossFunctionType == "SOFT_MARGIN") { CConsVecData *consvecdata = 0; if(! (consvecdata = dynamic_cast<CConsVecData*>(data))) { throw CBMRMException("unable to cast data into CConsVecData", "CLossFactory::GetLoss()"); } loss = new SoftMarginLoss(model, consvecdata); } else { throw CBMRMException("ERROR: unrecognised loss function ("+lossFunctionType+")\n", "CLossFactory::GetLoss()"); } } else { throw CBMRMException("ERROR: no loss function specified!\n", "CLossFactory::GetLoss()"); } return loss; }
/** Read examples into memory */ void CSeqFeature::LoadFeatures() { unsigned int tmpFidx = 0; Scalar tmpFval = 0; unsigned int featureCnt = 0; unsigned int seqNum = 0; unsigned int phiNum = 0; unsigned int posNum1 = 0, posNum2 = 0; std::string line = ""; std::string token = ""; std::ifstream featureFp; featureFp.open(featureFile.c_str()); if(!featureFp.good()) { string msg = "Cannot open feature file <" + featureFile + ">!"; throw CBMRMException(msg, "CSeqFeature::ScanFeatureFile()"); } // read header information int headerInfoCnt = 3; // min duration, max duration, feature dimension do { getline(featureFp, line); trim(line); if(IsBlankLine(line)) continue; // blank line if(line[0] == '#') continue; // comment line if(sscanf(line.c_str(),"maxDuration:%d",&maxDuration)==1) headerInfoCnt--; if(sscanf(line.c_str(),"minDuration:%d",&minDuration)==1) headerInfoCnt--; if(sscanf(line.c_str(),"globalFeatureDim:%d",&featureDimension)==1) headerInfoCnt--; } while(!featureFp.eof() && (headerInfoCnt != 0)); assert(maxDuration >= minDuration); assert(featureDimension < (1<<30)); // featureDimension is normally less then 1 billion if(featureFp.eof()) throw CBMRMException("Feature file does not contain valid examples","CSeqFeature::LoadFeatures()"); // read sequences nnz = 0; while(!featureFp.eof()) { // read sequence number do { getline(featureFp, line); trim(line); if(IsBlankLine(line)) continue; // blank line if(line[0] == '#') continue; // comment line if(sscanf(line.c_str(),"sequence:%d",&seqNum)==1) break; } while(!featureFp.eof()); if(featureFp.eof()) throw CBMRMException("Feature file does not contain valid phi:*","CSeqFeature::LoadFeatures()"); // read phi:1 tag phiNum = 0; do { getline(featureFp, line); trim(line); if(IsBlankLine(line)) continue; // blank line if(line[0] == '#') continue; // comment line if(sscanf(line.c_str(),"phi:%d",&phiNum)==1) break; } while(!featureFp.eof()); if(featureFp.eof() || (phiNum != 1)) throw CBMRMException("Feature file does not contain valid phi:1","CSeqFeature::LoadFeatures()"); // read phi:1 sparse vectors do { getline(featureFp, line); trim(line); if(IsBlankLine(line)) continue; // blank line if(line[0] == '#') continue; // comment line if(sscanf(line.c_str(),"phi:%d",&phiNum) == 1) break; istringstream iss(line); iss >> token; if((sscanf(token.c_str(),"pos:%d",&posNum1) != 1)) throw CBMRMException("Feature file does not contain valid pos tag in phi:1","CSeqFeature::LoadFeatures()"); TheMatrix svec(1,featureDimension,SML::SPARSE); featureCnt = 0; while(!iss.eof()) { iss >> token; if(sscanf(token.c_str(),svec_feature_index_and_value_format.c_str(),&tmpFidx, &tmpFval) != 2) { ostringstream msg; msg << "Invalid #" << featureCnt + 1 << " sparse vector element in phi:"<< phiNum << " seq:" << seqNum << " pos:" << posNum1; throw CBMRMException(msg.str(),"CSeqFeature::LoadFeatures()"); } svec.Set(0,tmpFidx,tmpFval); nnz++; } if(featureCnt == 0) throw CBMRMException("Feature file does not contain valid phi:2 sparse vector","CSeqFeature::LoadFeatures()"); phi_1.push_back(svec); } while(!featureFp.eof()); if(phi_1.size() < 1) throw CBMRMException("Feature file does not contain valid phi:1","CSeqFeature::LoadFeatures()"); numOfSeq = phi_1.size(); if(featureFp.eof() || (phiNum != 2)) throw CBMRMException("Feature file does not contain valid phi:2","CSeqFeature::LoadFeatures()"); // read phi:2 sparse vectors unsigned int prevPosNum1 = 0, prevPosNum2 = 0; vector<TheMatrix> tmp_phi_2_svecs; featureCnt = 0; do { getline(featureFp, line); trim(line); if(IsBlankLine(line)) continue; // blank line if(line[0] == '#') continue; // comment line if((sscanf(line.c_str(),"phi:%d",&phiNum) == 1)) break; istringstream iss(line); iss >> token; if((sscanf(token.c_str(),"pos:%d,%d",&posNum1,&posNum2) != 2)) throw CBMRMException("Feature file does not containt valid pos tag in phi:2","CSeqFeature::LoadFeatures()"); if(prevPosNum2 >= posNum2) { ostringstream msg; msg << "previous posNum2 must be > current posNum2 in phi:2 (phi:2 pos:" << posNum1 << "," << posNum2; throw CBMRMException(msg.str(),"CSeqFeature::LoadFeatures()"); } if(prevPosNum1 >= posNum1) { ostringstream msg; msg << "previous posNum1 must be > current posNum1 in phi:2 (phi:2 pos:" << posNum1 << "," << posNum2; throw CBMRMException(msg.str(),"CSeqFeature::LoadFeatures()"); } if(posNum1 != prevPosNum1) { phi_2.push_back(tmp_phi_2_svecs); tmp_phi_2_svecs.clear(); } TheMatrix svec(1,featureDimension,SML::SPARSE); featureCnt = 0; while(!iss.eof()) { iss >> token; if(sscanf(token.c_str(),svec_feature_index_and_value_format.c_str(),&tmpFidx, &tmpFval) != 2) { ostringstream msg; msg << "Invalid #" << featureCnt + 1 << " sparse vector element in phi:"<< phiNum << " seq:" << seqNum << " pos:" << posNum1; throw CBMRMException(msg.str(),"CSeqFeature::LoadFeatures()"); } svec.Set(0,tmpFidx,tmpFval); nnz++; } if(featureCnt == 0) throw CBMRMException("Feature file does not containt valid phi:2 sparse vector","CSeqFeature::LoadFeatures()"); tmp_phi_2_svecs.push_back(svec); } while(!featureFp.eof()); if(phi_2.size() < 1) throw CBMRMException("Feature file does not contain phi:2","CSeqFeature::LoadFeatures()"); } // data matrix density density = ((double)nnz/featureDimension)/numOfSeq; featureFp.close(); }