inline Tp1 dot_product(const WeightVector<Tp1, Alloc1>& x, const WeightVector<Tp2, Alloc2>& y) { const size_t size = utils::bithack::min(x.size(), y.size()); return std::inner_product(x.begin(), x.begin() + size, y.begin(), Tp1()); }
int main(int argc, char** argv) { // handle parameters po::variables_map cfg; if (!init_params(argc,argv, &cfg)) exit(1); // something is wrong // init weights DenseWeightVector dense_weights; WeightVector weights; if (cfg.count("weights")) Weights::InitFromFile(cfg["weights"].as<string>(), &dense_weights); Weights::InitSparseVector(dense_weights, &weights); cerr << "Current Weight Vector:\n"; for (WeightVector::iterator i=weights.begin(); i!=weights.end(); ++i) cerr << i->first << " " << FD::Convert(i->first) << "=" << i->second << endl; /*cerr << "\nDense Weights:\n"; for (int i = 0 ; i < dense_weights.size(); ++i) cerr << i << " " << dense_weights[i] << endl;*/ cerr << "# of features: " << FD::NumFeats() << " (-1 dummy feature @ idx 0)\n\n"; // load instances vector<TrainingInstance> instances; loadInstances(cfg["input"].as<string>(), instances); // setup output directory //MkDirP(cfg["output"].as<string>()); //stringstream outss; //outss << cfg["output"].as<string>() << "/"; //const string out_path = outss.str(); // setup loss function ListwiseLossFunction* lossfunc = set_loss(&cfg); cerr << "listwise loss function: " << cfg["loss"].as<string>() << "\n"; // run AdaRank optimizer AdaRank adarank( instances, instances.size(), cfg["iterations"].as<int>(), cfg["epsilon"].as<double>(), dense_weights, lossfunc, cfg.count("verbose") ); adarank.run(); // write output weight vector DenseWeightVector new_dense_weights = adarank.GetWeightVector(); WeightVector new_weights; Weights::InitSparseVector(new_dense_weights, &new_weights); cerr << "Final Weight Vector:\n"; for (WeightVector::iterator i=new_weights.begin(); i!=new_weights.end(); ++i) cerr << i->first << " " << FD::Convert(i->first) << "=" << i->second << endl; Weights::WriteToFile(cfg["output"].as<string>(), new_dense_weights, true, NULL); }
void loadRelevanceWeights(const string& fname, WeightVector& rw) { rw.clear(); ReadFile in_file(fname); istream& in = *in_file.stream(); assert(in); string f; double v; while(in>>f) { in>>v; if (f.empty()) continue; rw.set_value(FD::Convert(f), v); } }
int main(int argc, char** argv) { if (argc < 2 || argc > 3) { cerr << "USAGE: view-instances <binary instance file> [<weights>]\n"; exit(1); } bool has_w = (argc==3); // load instances vector<Instance> instances; loadInstances(string(argv[1]), instances); // weights WeightVector weights; if (has_w) { DenseWeightVector dense_weights; Weights::InitFromFile(string(argv[2]), &dense_weights); Weights::InitSparseVector(dense_weights, &weights); cerr << "Current Weight Vector:\n"; for (WeightVector::iterator i=weights.begin(); i!=weights.end(); ++i) cerr << i->first << " " << FD::Convert(i->first) << "=" << i->second << endl; } double likelihood = 0.0; double likelihood_i=0.0; for (int i=0;i<instances.size();++i) { if (instances[i].ir_sorted) { if (has_w) likelihood_i = PlackettLuce::pl_likelihood(instances[i], weights); else likelihood_i = PlackettLuce::pl_likelihood(instances[i]); cout << "P(y|x;"; if (has_w) cout << "w)="; else cout << "D)="; cout << likelihood_i << "\n"; } if (has_w) cout << instances[i].AsString(weights) << endl; else cout << instances[i].AsString() << endl; likelihood +=likelihood_i; } cerr << "Likelihood=" << likelihood << "\n"; }
boost::shared_ptr<HypothesisInfo> MakeHypothesisInfo(Hypergraph& hg) { /* * create an HypothesisInfo with feature vector, translation and its relevance * relevance feature values are removed (and optionally any frozen features) */ boost::shared_ptr<HypothesisInfo> h(new HypothesisInfo); h->features = ViterbiFeatures(hg); h->rel = h->features.dot(relw); // clean relevance weights from feature vector for (WeightVector::iterator it=relw.begin(); it!=relw.end(); ++it) { h->features.set_value(it->first, .0); } ViterbiESentence(hg, &(h->hyp)); if (freeze) { for (unsigned x=0;x<frozen_features.size();++x) { h->features.set_value(frozen_features[x], .0); } } // for rel scaling: if (h->rel > MAX_REL) MAX_REL = h->rel; if (h->rel < MIN_REL) MIN_REL = h->rel; return h; }
inline Tp1 dot_product(const WeightVector<Tp1, Alloc1>& x, const WeightVector<Tp2, Alloc2>& y, BinaryOp op) { typedef WeightVector<Tp1, Alloc1> weight_vector1_type; typedef WeightVector<Tp2, Alloc2> weight_vector2_type; const size_t size = utils::bithack::min(x.size(), y.size()); Tp1 __dot = std::inner_product(x.begin(), x.begin() + size, y.begin(), Tp1(), std::plus<Tp1>(), op); if (x.size() > y.size()) { typename weight_vector1_type::const_iterator iter_end = x.end(); for (typename weight_vector1_type::const_iterator iter = x.begin() + size; iter != iter_end; ++ iter) __dot += op(*iter, Tp2()); } else { typename weight_vector2_type::const_iterator iter_end = y.end(); for (typename weight_vector2_type::const_iterator iter = y.begin() + size; iter != iter_end; ++ iter) __dot += op(Tp1(), *iter); } return __dot; }
/// Function that runs one it of the ParEGO algorithm. void universe::iterate_ParEGO() { int prior_it=0; int stopcounter=0; weights->changeWeights(iter, space->fWeightVectors); //fprintf(stdout, "%.2lf %.2lf weightvectors \n", space->fWeightVectors[0], space->fWeightVectors[1]); //fprintf(stdout, "fMEasureFIt\n"); for(int i=1;i<=iter;i++) { space->fMeasuredFit[i] = space->Tcheby(&space->fCostVectors[i][1]); //fprintf(stdout,"%lg ", space->fMeasuredFit[i]); if(space->fMeasuredFit[i]<model->ymin) { model->ymin=space->fMeasuredFit[i]; best_ever=i; } } //fprintf(stdout,"\n ymin: %lg\n", model->ymin); //fprintf(stdout,"best_ever: %d\n", best_ever); //chose the solutions to use to update the DACE model if(iter>11*space->fSearchSpaceDim+24) { model->fCorrelationSize = 11*space->fSearchSpaceDim+24; space->chooseUpdateSolutions(iter, model->fCorrelationSize); model->pax=&space->fSelectedXVectors; model->pay=&space->fSelectedMeasuredFit; } else { model->fCorrelationSize=iter; model->pax=&space->fXVectors; model->pay=&space->fMeasuredFit; } model->buildDACE(weights->change, iter); start = clock(); // BEGIN GA code double best_imp=INFTY; double* best_x; best_x = (double*)calloc(space->fSearchSpaceDim+1, sizeof(double)); //could change the GA not to be an object. have to think about adv and disadv // pop size inti 20 GeneticAlgorithm ga(20, space->fSearchSpaceDim); ga.run(space, model, iter, best_x, &best_imp); // END GA code end = clock(); cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; // timestr << iter << " " << cpu_time_used << endl; //fprintf(stdout, "ax\n"); for(int d=1;d<=space->fSearchSpaceDim;d++) { space->fXVectors[iter+1][d]=best_x[d]; //fprintf(stdout, "%lg ", space->fXVectors[iter+1][d]); } //fprintf(stdout, "\n"); space->fMeasuredFit[iter+1]=space->myfit(iter+1, iter+1); fprintf(stdout, "%d ", iter+1+prior_it); for(int d=1; d <=space->fSearchSpaceDim; d++) fprintf(stdout, "%lg ", space->fXVectors[iter+1][d]); fprintf(stdout, "decision\n"); fprintf(stdout,"%d ", iter+1); for(int i=1;i<=space->fNoObjectives;i++) { fprintf(stdout, "%lg ", space->fCostVectors[iter+1][i]); fprintf(plotfile, "%lg ", space->fCostVectors[iter+1][i]); //fprintf(plotfile, "%.5lf ", ff[iter+1][i]); } fprintf(plotfile, "\n"); fprintf(stdout, "objective\n"); //cout<<"ymin"<<model->ymin<<"\n"; improvements[iter+1]=improvements[iter]; if (space->fMeasuredFit[iter+1]>=model->ymin) { // fprintf(stdout,"No actual improver found\n"); stopcounter++; } else { improvements[iter+1]++; model->ymin = space->fMeasuredFit[iter+1]; stopcounter=0; best_ever=iter+1; } iter++; }
int main(int argc, char** argv) { po::variables_map cfg; if (!init_params(argc,argv,&cfg)) return 1; if (cfg.count("random_seed")) rng.reset(new MT19937(cfg["random_seed"].as<uint32_t>())); else rng.reset(new MT19937); // set variables lr = cfg["learningrate"].as<double>(); hope_select = cfg["hope"].as<int>(); fear_select = cfg["fear"].as<int>(); optimizer = cfg["optimizer"].as<int>(); freeze = cfg.count("freeze"); if (freeze) { const vector<string>& ffstrs = cfg["freeze"].as<vector<string> >(); stringstream ffss; ffss << "frozen features: "; for (vector<string>::const_iterator ffit=ffstrs.begin();ffit!=ffstrs.end();++ffit) { frozen_features.push_back(FD::Convert(*ffit)); ffss << *ffit << " "; } cerr << ffss.str() << endl; } scaling = cfg["scaling"].as<int>(); scalingfactor = cfg["scalingfactor"].as<double>(); cerr << "scaling="<< scaling << " scalingfactor=" << scalingfactor << endl; // setup decoder Decoder* decoder = setupDecoder(cfg); if (!decoder) { cerr << "error while loading decoder with" << cfg["decoder_config"].as<string>() << "!\n"; return 1; } TrainingObserver observer; // get reference to decoder weights vector<weight_t>& decoder_weights = decoder->CurrentWeightVector(); // the SMT weights (to be optimized) if (cfg.count("weights")) { Weights::InitFromFile(cfg["weights"].as<string>(), &decoder_weights); Weights::InitSparseVector(decoder_weights, &w); } else { cerr << "starting with EMPTY weights!\n"; } // the weight vector that gives the oracle loadRelevanceWeights(cfg["rweights"].as<string>(), relw); negrelw -= relw; relw_scaled = relw; // initial scaling if (scaling != 0) scaleRelevanceWeights(scalingfactor); // output some vector stats cerr << "W_REL=" << relw << endl; cerr << "W_REL_SCALED=" << relw_scaled << endl; cerr << "|W_REL|=" << relw_scaled.size() << endl; cerr << "|W_SMT|=" << w.size() << endl; cerr << "hope selection: " << hope_select << endl; const string input = decoder->GetConf()["input"].as<string>(); cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; ReadFile in_read(input); istream *in = in_read.stream(); assert(*in); string id, sentence; int cur_sent = 0; unsigned lc = 0; // line count double objective=0; double tot_loss = 0; WeightVector avg_w = w; //SparseVector<double> tot; //SparseVector<double> oldw = w; //tot.clear(); //tot += w; while(*in >> id) { in->ignore(1, '\t'); getline(*in, sentence); if (sentence.empty() || id.empty()) continue; cerr << "\nID="<<id << endl; decoder->SetId(cur_sent); decoder->Decode(sentence, &observer); // decode with decoder_weights cur_sent = observer.GetCurrentSent(); Hypergraph hg = observer.GetCurrentForest(); vector<boost::shared_ptr<HypothesisInfo> > S; MAX_REL = std::numeric_limits<double>::lowest(); MIN_REL = std::numeric_limits<double>::max(); // get viterbi boost::shared_ptr<HypothesisInfo> viterbi = MakeHypothesisInfo(hg); // get the true oracle (sets max_rel) hg.Reweight(relw); boost::shared_ptr<HypothesisInfo> oracle = MakeHypothesisInfo(hg); oracle->oracle = oracle; oracle->computeCost(); // get the worst derivation (to get min_rel) hg.Reweight(negrelw); boost::shared_ptr<HypothesisInfo> worst = MakeHypothesisInfo(hg); worst->oracle = oracle; worst->computeCost(); if (hope_select == 1) { // hope hg.Reweight(w + relw_scaled); S.push_back(MakeHypothesisInfo(hg)); S[0]->oracle = oracle; S[0]->computeCost(); } else { // true oracle S.push_back(oracle); } // S contains now ONE (hope/oracle) hypothesis S[0]->computeLoss(); boost::shared_ptr<HypothesisInfo> good = S[0]; viterbi->oracle = oracle; viterbi->computeCost(); viterbi->computeLoss(); cerr << "min_rel=" << MIN_REL << " max_rel=" << MAX_REL << endl; cerr << "S[0]=" << S[0] << endl; boost::shared_ptr<HypothesisInfo> fear; if (optimizer == 4) { // PA update (single dual coordinate step) cerr << "PA MIRA (single dual coordinate step)\n"; hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); fear->oracle = oracle; fear->computeCost(); fear->computeLoss(); cerr << "LOSS: " << fear->loss; if (fear->loss > 0.0) { double diffsqnorm = (good->features - fear->features).l2norm_sq(); double delta; if (diffsqnorm > 0) { delta = fear->loss / (diffsqnorm); if (delta > lr) delta = lr; w += good->features * delta; w -= fear->features * delta; } } } else if (optimizer == 1) {// sgd - nonadapted step size cerr << "SGD\n"; if (fear_select == 1) { hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); } else if (fear_select == 2) { fear = worst; } else if (fear_select == 3) { fear = viterbi; } w += good->features * lr; w -= fear->features * lr; } else if (optimizer == 2) { // PA MIRA with selection from cutting plane cerr << "PA MIRA with Selection from Cutting Plane\n"; hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); fear->oracle = oracle; fear->computeCost(); fear->computeLoss(); if (fear->loss < 0) { cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n"; abort(); } if (fear->loss > good->loss + SMO_EPS) { S.push_back(fear); OptimizeSet(S, 1); // only one iteration with a set of two constraints } else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; } } else if (optimizer == 3) { // Cutting Plane MIRA cerr << "Cutting Plane MIRA\n"; unsigned cp_iter=0; // Cutting Plane Iteration bool again = true; while (again && cp_iter<CP_ITER) { again = false; cerr << "CuttingPlane: " << cp_iter << endl; // find a fear derivation hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); fear->oracle = oracle; fear->computeCost(); fear->computeLoss(); if (fear->loss < 0) { cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n"; //abort(); } // find max loss hypothesis double max_loss_in_set = (*std::max_element(S.begin(), S.end(), lossComp))->loss; if (fear->loss > max_loss_in_set + SMO_EPS) { cerr << "Adding new fear " << fear << " to S\n"; S.push_back(fear); OptimizeSet(S); again = true; } else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; } cp_iter++; // update losses //for(unsigned i=0;i<S.size();i++) S[i]->computeLoss(); } } cerr << "|W|=" << w.size() << endl; tot_loss += relscale(viterbi->rel); //print objective after this sentence //double w_change = (w - oldw).l2norm_sq(); //double temp_objective = 0.5 * w_change;// + max_step_size * max_fear; for(int u=0;u!=S.size();u++) { cerr << "alpha=" << S[u]->alpha << " loss=" << S[u]->loss << endl; //temp_objective += S[u]->alpha * S[u]->loss; } //objective += temp_objective; //cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl; //tot += w; ++lc; avg_w *= lc; avg_w = (w + avg_w) / (lc+1); // set decoder weights for next sentence decoder_weights.clear(); w.init_vector(&decoder_weights); // rescale relevance weights to balance with new model after the update if (scaling == 2) { scaleRelevanceWeights(scalingfactor); cerr << "W_REL_SCALED=" << relw_scaled << endl; } // viterbi 2 for debugging //hg.Reweight(w); //boost::shared_ptr<HypothesisInfo> viterbi2 = MakeHypothesisInfo(hg); //viterbi2->oracle = oracle; //viterbi2->computeCost(); //viterbi2->computeLoss(); //fear->computeLoss(); //viterbi->computeLoss(); //good->computeLoss(); cerr << "FEAR : " << fear << " \n" << TD::GetString(fear->hyp) << endl; cerr << "BEST : " << viterbi << " \n" << TD::GetString(viterbi->hyp) << endl; //cerr << "BEST2: " << viterbi2 << " \n" << TD::GetString(viterbi2->hyp) << endl; cerr << "HOPE : " << good << " \n" << TD::GetString(good->hyp) << endl; cout << id << " ||| " << TD::GetString(fear->hyp) << " ||| " << TD::GetString(viterbi->hyp) << " ||| " << TD::GetString(good->hyp) << endl; S.clear(); fear.reset(); viterbi.reset(); //viterbi2.reset(); good.reset(); worst.reset(); oracle.reset(); } //cerr << "FINAL OBJECTIVE: "<< objective << endl; cerr << "Translated " << lc << " sentences\n"; cerr << " [AVG METRIC LAST PASS="******"]\n"; //tot_loss = 0; decoder_weights.clear(); w.init_vector(&decoder_weights); //Weights::ShowLargestFeatures(decoder_weights); // write weights int node_id = rng->next() * 100000; cerr << " Writing model to " << node_id << endl; ostringstream os; os << cfg["weights_output"].as<string>() << "/last." << node_id; string msg = "HGMIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc); Weights::WriteToFile(os.str(), decoder_weights, true, &msg); //SparseVector<double> x = tot; //x /= lc+1; ostringstream sa; string msga = "HGMIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc); sa << cfg["weights_output"].as<string>() << "/avg." << node_id; avg_w.init_vector(&decoder_weights); Weights::WriteToFile(sa.str(), decoder_weights, true, &msga); delete decoder; cerr << "\ndone.\n"; return 0; }
//updates relw_scaled by scaling it to the current norm of w, multiplied by the scalingfactor void scaleRelevanceWeights(const double scalingfactor) { relw_scaled /= relw_scaled.l2norm(); // to length 1: relw / ||relw|| const double cur_w_norm = w.l2norm(); // ||w|| if (cur_w_norm == 0) return; relw_scaled *= cur_w_norm * scalingfactor; // scale by ||w||*scalingfactor }