int main(int argc, char** argv) { po::variables_map cfg; if (!init_params(argc,argv,&cfg)) return 1; if (cfg.count("random_seed")) rng.reset(new MT19937(cfg["random_seed"].as<uint32_t>())); else rng.reset(new MT19937); // setup decoder Decoder* decoder = setupDecoder(cfg); if (!decoder) { cerr << "error while loading decoder with" << cfg["decoder_config"].as<string>() << "!\n"; return 1; } TrainingObserver observer; // get reference to decoder weights vector<weight_t>& decoder_weights = decoder->CurrentWeightVector(); // setup weights WeightVector w, w_hope, w_fear; // the SMT weights (to be optimized) Weights::InitFromFile(cfg["weights"].as<string>(), &decoder_weights); Weights::InitSparseVector(decoder_weights, &w); loadWeights(cfg["rweights"].as<string>(), w_hope); WeightVector w_inv = w*-1; WeightVector w_hope_inv = w_hope*-1; //cerr << "W " << w << endl; //cerr << "WINV " << w_inv << endl; //cerr << "R " << w_hope << endl; //cerr << "RINV " << w_hope_inv << endl; const string input = decoder->GetConf()["input"].as<string>(); //cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl << endl; ReadFile in_read(input); istream *in = in_read.stream(); assert(*in); string id, sentence; std::vector<HypergraphSampler::Hypothesis> samples; while(*in >> id) { in->ignore(1, '\t'); getline(*in, sentence); if (sentence.empty() || id.empty()) continue; //decoder->SetId(id); decoder->Decode(sentence, &observer); // decode with decoder_weights Hypergraph hg = observer.GetCurrentForest(); // get max model score double max_tscore = ViterbiFeatures(hg).dot(w); // get min model score hg.Reweight(w_inv); double min_tscore = -ViterbiFeatures(hg).dot(w_inv); // get max rel score hg.Reweight(w_hope); double max_rscore = ViterbiFeatures(hg).dot(w_hope); // get min rel_score hg.Reweight(w_hope_inv); double min_rscore = -ViterbiFeatures(hg).dot(w_hope_inv); //cerr << max_tscore << " " << min_tscore << " " << max_rscore << " " << min_rscore << endl; if (cfg.count("sample")) { HypergraphSampler::sample_hypotheses(hg, cfg["sample"].as<int>(), &(*rng), &samples); for (unsigned s=0;s<samples.size();++s) { const HypergraphSampler::Hypothesis& h = samples[s]; cout << id << "\t" << "S\t" << vscale(h.fmap.dot(w), min_tscore, max_tscore) << "\t" << vscale(h.fmap.dot(w_hope), min_rscore, max_rscore) << "\t" << TD::GetString(h.words) << endl; } } else if (cfg.count("kbest")) { typedef KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,KBest::FilterUnique> K; // get kbest model score derivations hg.Reweight(w); K kbest2(hg,cfg["kbest"].as<int>()); for (int i = 0; i < cfg["kbest"].as<int>(); ++i) { typename K::Derivation *d = kbest2.LazyKthBest(hg.nodes_.size() - 1, i); if (!d) break; cout << id << "\t" << "KBT\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) << "\t" << vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) << "\t" << TD::GetString(d->yield) << endl; } // get kworst model score derivations hg.Reweight(w_inv); K kbest3(hg,cfg["kbest"].as<int>()); for (int i = 0; i < cfg["kbest"].as<int>(); ++i) { typename K::Derivation *d = kbest3.LazyKthBest(hg.nodes_.size() - 1, i); if (!d) break; cout << id << "\t" << "KWT\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) << "\t" << vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) << "\t" << TD::GetString(d->yield) << endl; } // get kbest rel score derivations hg.Reweight(w_hope); K kbest4(hg,cfg["kbest"].as<int>()); for (int i = 0; i < cfg["kbest"].as<int>(); ++i) { typename K::Derivation *d = kbest4.LazyKthBest(hg.nodes_.size() - 1, i); if (!d) break; cout << id << "\t" << "KBR\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) << "\t" << vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) << "\t" << TD::GetString(d->yield) << endl; } // get kbest model score derivations hg.Reweight(w_hope_inv); K kbest(hg,cfg["kbest"].as<int>()); for (int i = 0; i < cfg["kbest"].as<int>(); ++i) { typename K::Derivation *d = kbest.LazyKthBest(hg.nodes_.size() - 1, i); if (!d) break; cout << id << "\t" << "KWR\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) << "\t" << vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) << "\t" << TD::GetString(d->yield) << endl; } } } delete decoder; return 0; }
int main(int argc, char** argv) { po::variables_map cfg; if (!init_params(argc,argv,&cfg)) return 1; if (cfg.count("random_seed")) rng.reset(new MT19937(cfg["random_seed"].as<uint32_t>())); else rng.reset(new MT19937); // set variables lr = cfg["learningrate"].as<double>(); hope_select = cfg["hope"].as<int>(); fear_select = cfg["fear"].as<int>(); optimizer = cfg["optimizer"].as<int>(); freeze = cfg.count("freeze"); if (freeze) { const vector<string>& ffstrs = cfg["freeze"].as<vector<string> >(); stringstream ffss; ffss << "frozen features: "; for (vector<string>::const_iterator ffit=ffstrs.begin();ffit!=ffstrs.end();++ffit) { frozen_features.push_back(FD::Convert(*ffit)); ffss << *ffit << " "; } cerr << ffss.str() << endl; } scaling = cfg["scaling"].as<int>(); scalingfactor = cfg["scalingfactor"].as<double>(); cerr << "scaling="<< scaling << " scalingfactor=" << scalingfactor << endl; // setup decoder Decoder* decoder = setupDecoder(cfg); if (!decoder) { cerr << "error while loading decoder with" << cfg["decoder_config"].as<string>() << "!\n"; return 1; } TrainingObserver observer; // get reference to decoder weights vector<weight_t>& decoder_weights = decoder->CurrentWeightVector(); // the SMT weights (to be optimized) if (cfg.count("weights")) { Weights::InitFromFile(cfg["weights"].as<string>(), &decoder_weights); Weights::InitSparseVector(decoder_weights, &w); } else { cerr << "starting with EMPTY weights!\n"; } // the weight vector that gives the oracle loadRelevanceWeights(cfg["rweights"].as<string>(), relw); negrelw -= relw; relw_scaled = relw; // initial scaling if (scaling != 0) scaleRelevanceWeights(scalingfactor); // output some vector stats cerr << "W_REL=" << relw << endl; cerr << "W_REL_SCALED=" << relw_scaled << endl; cerr << "|W_REL|=" << relw_scaled.size() << endl; cerr << "|W_SMT|=" << w.size() << endl; cerr << "hope selection: " << hope_select << endl; const string input = decoder->GetConf()["input"].as<string>(); cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; ReadFile in_read(input); istream *in = in_read.stream(); assert(*in); string id, sentence; int cur_sent = 0; unsigned lc = 0; // line count double objective=0; double tot_loss = 0; WeightVector avg_w = w; //SparseVector<double> tot; //SparseVector<double> oldw = w; //tot.clear(); //tot += w; while(*in >> id) { in->ignore(1, '\t'); getline(*in, sentence); if (sentence.empty() || id.empty()) continue; cerr << "\nID="<<id << endl; decoder->SetId(cur_sent); decoder->Decode(sentence, &observer); // decode with decoder_weights cur_sent = observer.GetCurrentSent(); Hypergraph hg = observer.GetCurrentForest(); vector<boost::shared_ptr<HypothesisInfo> > S; MAX_REL = std::numeric_limits<double>::lowest(); MIN_REL = std::numeric_limits<double>::max(); // get viterbi boost::shared_ptr<HypothesisInfo> viterbi = MakeHypothesisInfo(hg); // get the true oracle (sets max_rel) hg.Reweight(relw); boost::shared_ptr<HypothesisInfo> oracle = MakeHypothesisInfo(hg); oracle->oracle = oracle; oracle->computeCost(); // get the worst derivation (to get min_rel) hg.Reweight(negrelw); boost::shared_ptr<HypothesisInfo> worst = MakeHypothesisInfo(hg); worst->oracle = oracle; worst->computeCost(); if (hope_select == 1) { // hope hg.Reweight(w + relw_scaled); S.push_back(MakeHypothesisInfo(hg)); S[0]->oracle = oracle; S[0]->computeCost(); } else { // true oracle S.push_back(oracle); } // S contains now ONE (hope/oracle) hypothesis S[0]->computeLoss(); boost::shared_ptr<HypothesisInfo> good = S[0]; viterbi->oracle = oracle; viterbi->computeCost(); viterbi->computeLoss(); cerr << "min_rel=" << MIN_REL << " max_rel=" << MAX_REL << endl; cerr << "S[0]=" << S[0] << endl; boost::shared_ptr<HypothesisInfo> fear; if (optimizer == 4) { // PA update (single dual coordinate step) cerr << "PA MIRA (single dual coordinate step)\n"; hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); fear->oracle = oracle; fear->computeCost(); fear->computeLoss(); cerr << "LOSS: " << fear->loss; if (fear->loss > 0.0) { double diffsqnorm = (good->features - fear->features).l2norm_sq(); double delta; if (diffsqnorm > 0) { delta = fear->loss / (diffsqnorm); if (delta > lr) delta = lr; w += good->features * delta; w -= fear->features * delta; } } } else if (optimizer == 1) {// sgd - nonadapted step size cerr << "SGD\n"; if (fear_select == 1) { hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); } else if (fear_select == 2) { fear = worst; } else if (fear_select == 3) { fear = viterbi; } w += good->features * lr; w -= fear->features * lr; } else if (optimizer == 2) { // PA MIRA with selection from cutting plane cerr << "PA MIRA with Selection from Cutting Plane\n"; hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); fear->oracle = oracle; fear->computeCost(); fear->computeLoss(); if (fear->loss < 0) { cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n"; abort(); } if (fear->loss > good->loss + SMO_EPS) { S.push_back(fear); OptimizeSet(S, 1); // only one iteration with a set of two constraints } else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; } } else if (optimizer == 3) { // Cutting Plane MIRA cerr << "Cutting Plane MIRA\n"; unsigned cp_iter=0; // Cutting Plane Iteration bool again = true; while (again && cp_iter<CP_ITER) { again = false; cerr << "CuttingPlane: " << cp_iter << endl; // find a fear derivation hg.Reweight(w - relw_scaled); fear = MakeHypothesisInfo(hg); fear->oracle = oracle; fear->computeCost(); fear->computeLoss(); if (fear->loss < 0) { cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n"; //abort(); } // find max loss hypothesis double max_loss_in_set = (*std::max_element(S.begin(), S.end(), lossComp))->loss; if (fear->loss > max_loss_in_set + SMO_EPS) { cerr << "Adding new fear " << fear << " to S\n"; S.push_back(fear); OptimizeSet(S); again = true; } else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; } cp_iter++; // update losses //for(unsigned i=0;i<S.size();i++) S[i]->computeLoss(); } } cerr << "|W|=" << w.size() << endl; tot_loss += relscale(viterbi->rel); //print objective after this sentence //double w_change = (w - oldw).l2norm_sq(); //double temp_objective = 0.5 * w_change;// + max_step_size * max_fear; for(int u=0;u!=S.size();u++) { cerr << "alpha=" << S[u]->alpha << " loss=" << S[u]->loss << endl; //temp_objective += S[u]->alpha * S[u]->loss; } //objective += temp_objective; //cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl; //tot += w; ++lc; avg_w *= lc; avg_w = (w + avg_w) / (lc+1); // set decoder weights for next sentence decoder_weights.clear(); w.init_vector(&decoder_weights); // rescale relevance weights to balance with new model after the update if (scaling == 2) { scaleRelevanceWeights(scalingfactor); cerr << "W_REL_SCALED=" << relw_scaled << endl; } // viterbi 2 for debugging //hg.Reweight(w); //boost::shared_ptr<HypothesisInfo> viterbi2 = MakeHypothesisInfo(hg); //viterbi2->oracle = oracle; //viterbi2->computeCost(); //viterbi2->computeLoss(); //fear->computeLoss(); //viterbi->computeLoss(); //good->computeLoss(); cerr << "FEAR : " << fear << " \n" << TD::GetString(fear->hyp) << endl; cerr << "BEST : " << viterbi << " \n" << TD::GetString(viterbi->hyp) << endl; //cerr << "BEST2: " << viterbi2 << " \n" << TD::GetString(viterbi2->hyp) << endl; cerr << "HOPE : " << good << " \n" << TD::GetString(good->hyp) << endl; cout << id << " ||| " << TD::GetString(fear->hyp) << " ||| " << TD::GetString(viterbi->hyp) << " ||| " << TD::GetString(good->hyp) << endl; S.clear(); fear.reset(); viterbi.reset(); //viterbi2.reset(); good.reset(); worst.reset(); oracle.reset(); } //cerr << "FINAL OBJECTIVE: "<< objective << endl; cerr << "Translated " << lc << " sentences\n"; cerr << " [AVG METRIC LAST PASS="******"]\n"; //tot_loss = 0; decoder_weights.clear(); w.init_vector(&decoder_weights); //Weights::ShowLargestFeatures(decoder_weights); // write weights int node_id = rng->next() * 100000; cerr << " Writing model to " << node_id << endl; ostringstream os; os << cfg["weights_output"].as<string>() << "/last." << node_id; string msg = "HGMIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc); Weights::WriteToFile(os.str(), decoder_weights, true, &msg); //SparseVector<double> x = tot; //x /= lc+1; ostringstream sa; string msga = "HGMIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc); sa << cfg["weights_output"].as<string>() << "/avg." << node_id; avg_w.init_vector(&decoder_weights); Weights::WriteToFile(sa.str(), decoder_weights, true, &msga); delete decoder; cerr << "\ndone.\n"; return 0; }