Пример #1
0
int main(int argc, char** argv) {
	po::variables_map cfg;
	if (!init_params(argc,argv,&cfg)) return 1;

	if (cfg.count("random_seed"))
		rng.reset(new MT19937(cfg["random_seed"].as<uint32_t>()));
	else
		rng.reset(new MT19937);


	// setup decoder
	Decoder* decoder = setupDecoder(cfg);
	if (!decoder) {
		cerr << "error while loading decoder with" << cfg["decoder_config"].as<string>() << "!\n";
		return 1;
	}
	TrainingObserver observer;
	// get reference to decoder weights
	vector<weight_t>& decoder_weights = decoder->CurrentWeightVector();
	// setup weights
	WeightVector w, w_hope, w_fear;
	// the SMT weights (to be optimized)
	Weights::InitFromFile(cfg["weights"].as<string>(), &decoder_weights);
	Weights::InitSparseVector(decoder_weights, &w);
	loadWeights(cfg["rweights"].as<string>(), w_hope);
	WeightVector w_inv = w*-1;
	WeightVector w_hope_inv = w_hope*-1;

	//cerr << "W    " << w << endl;
	//cerr << "WINV " << w_inv << endl;
	//cerr << "R    " << w_hope << endl;
	//cerr << "RINV " << w_hope_inv << endl;

	const string input = decoder->GetConf()["input"].as<string>();
	//cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl << endl;
	ReadFile in_read(input);
	istream *in = in_read.stream();
	assert(*in);
	string id, sentence;
	std::vector<HypergraphSampler::Hypothesis> samples;

	while(*in >> id) {

		in->ignore(1, '\t');
		getline(*in, sentence);
		if (sentence.empty() || id.empty()) continue;

		//decoder->SetId(id);
		decoder->Decode(sentence, &observer); // decode with decoder_weights
		Hypergraph hg = observer.GetCurrentForest();

		// get max model score
		double max_tscore = ViterbiFeatures(hg).dot(w);
		// get min model score
		hg.Reweight(w_inv);
		double min_tscore = -ViterbiFeatures(hg).dot(w_inv);
		// get max rel score
		hg.Reweight(w_hope);
		double max_rscore = ViterbiFeatures(hg).dot(w_hope);
		// get min rel_score
		hg.Reweight(w_hope_inv);
		double min_rscore = -ViterbiFeatures(hg).dot(w_hope_inv);

		//cerr << max_tscore << " " << min_tscore << " " << max_rscore << " " << min_rscore << endl;

		if (cfg.count("sample")) {

			HypergraphSampler::sample_hypotheses(hg, cfg["sample"].as<int>(), &(*rng), &samples);
			for (unsigned s=0;s<samples.size();++s) {
				const HypergraphSampler::Hypothesis& h = samples[s];
				cout << id << "\t" << "S\t" << vscale(h.fmap.dot(w), min_tscore, max_tscore) <<
						"\t" <<  vscale(h.fmap.dot(w_hope), min_rscore, max_rscore) <<
						"\t" << TD::GetString(h.words) << endl;
			}

		} else if (cfg.count("kbest")) {
			typedef KBest::KBestDerivations<vector<WordID>, ESentenceTraversal,KBest::FilterUnique> K;
			// get kbest model score derivations
			hg.Reweight(w);
			K kbest2(hg,cfg["kbest"].as<int>());
			for (int i = 0; i < cfg["kbest"].as<int>(); ++i) {
			      typename K::Derivation *d = kbest2.LazyKthBest(hg.nodes_.size() - 1, i);
			      if (!d) break;
			      cout << id << "\t" << "KBT\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) <<
						"\t" <<  vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) <<
						"\t" << TD::GetString(d->yield) << endl;
			}

			// get kworst model score derivations
			hg.Reweight(w_inv);
			K kbest3(hg,cfg["kbest"].as<int>());
			for (int i = 0; i < cfg["kbest"].as<int>(); ++i) {
			      typename K::Derivation *d = kbest3.LazyKthBest(hg.nodes_.size() - 1, i);
			      if (!d) break;
			      cout << id << "\t" << "KWT\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) <<
						"\t" <<  vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) <<
						"\t" << TD::GetString(d->yield) << endl;
			}

			// get kbest rel score derivations
			hg.Reweight(w_hope);
			K kbest4(hg,cfg["kbest"].as<int>());
			for (int i = 0; i < cfg["kbest"].as<int>(); ++i) {
			      typename K::Derivation *d = kbest4.LazyKthBest(hg.nodes_.size() - 1, i);
			      if (!d) break;
			      cout << id << "\t" << "KBR\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) <<
						"\t" <<  vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) <<
						"\t" << TD::GetString(d->yield) << endl;
			}

			// get kbest model score derivations
			hg.Reweight(w_hope_inv);
			K kbest(hg,cfg["kbest"].as<int>());
			for (int i = 0; i < cfg["kbest"].as<int>(); ++i) {
			      typename K::Derivation *d = kbest.LazyKthBest(hg.nodes_.size() - 1, i);
			      if (!d) break;
			      cout << id << "\t" << "KWR\t" << vscale(d->feature_values.dot(w), min_tscore, max_tscore) <<
						"\t" <<  vscale(d->feature_values.dot(w_hope), min_rscore, max_rscore) <<
						"\t" << TD::GetString(d->yield) << endl;
			}

		}


	}

	delete decoder;
	return 0;

}
Пример #2
0
int main(int argc, char** argv) {
	po::variables_map cfg;
	if (!init_params(argc,argv,&cfg)) return 1;

	if (cfg.count("random_seed"))
		rng.reset(new MT19937(cfg["random_seed"].as<uint32_t>()));
	else
		rng.reset(new MT19937);

	// set variables
	lr = cfg["learningrate"].as<double>();
	hope_select = cfg["hope"].as<int>();
	fear_select = cfg["fear"].as<int>();
	optimizer = cfg["optimizer"].as<int>();
	freeze = cfg.count("freeze");
	if (freeze) {
		const vector<string>& ffstrs = cfg["freeze"].as<vector<string> >();
		stringstream ffss;
		ffss << "frozen features: ";
		for (vector<string>::const_iterator ffit=ffstrs.begin();ffit!=ffstrs.end();++ffit) {
			frozen_features.push_back(FD::Convert(*ffit));
			ffss << *ffit << " ";
		}
		cerr << ffss.str() << endl;
	}
	scaling = cfg["scaling"].as<int>();
	scalingfactor = cfg["scalingfactor"].as<double>();
	cerr << "scaling="<< scaling << " scalingfactor=" << scalingfactor << endl;

	// setup decoder
	Decoder* decoder = setupDecoder(cfg);
	if (!decoder) {
		cerr << "error while loading decoder with" << cfg["decoder_config"].as<string>() << "!\n";
		return 1;
	}
	TrainingObserver observer;
	// get reference to decoder weights
	vector<weight_t>& decoder_weights = decoder->CurrentWeightVector();
	// the SMT weights (to be optimized)
	if (cfg.count("weights")) {
		Weights::InitFromFile(cfg["weights"].as<string>(), &decoder_weights);
		Weights::InitSparseVector(decoder_weights, &w);
	} else {
		cerr << "starting with EMPTY weights!\n";
	}
	// the weight vector that gives the oracle
	loadRelevanceWeights(cfg["rweights"].as<string>(), relw);
	negrelw -= relw;
	relw_scaled = relw;
	// initial scaling
	if (scaling != 0) scaleRelevanceWeights(scalingfactor);

	// output some vector stats
	cerr << "W_REL=" << relw << endl;
	cerr << "W_REL_SCALED=" << relw_scaled << endl;
	cerr << "|W_REL|=" << relw_scaled.size() << endl;
	cerr << "|W_SMT|=" << w.size() << endl;

	cerr << "hope selection: " << hope_select << endl;
	const string input = decoder->GetConf()["input"].as<string>();
	cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
	ReadFile in_read(input);
	istream *in = in_read.stream();
	assert(*in);
	string id, sentence;
	int cur_sent = 0;
	unsigned lc = 0; // line count

	double objective=0;
	double tot_loss = 0;
	WeightVector avg_w = w;
	//SparseVector<double> tot;
	//SparseVector<double> oldw = w;
	//tot.clear();
	//tot += w;

	while(*in >> id) {

		in->ignore(1, '\t');
		getline(*in, sentence);
		if (sentence.empty() || id.empty()) continue;

		cerr << "\nID="<<id << endl;
		decoder->SetId(cur_sent);
		decoder->Decode(sentence, &observer); // decode with decoder_weights
		cur_sent = observer.GetCurrentSent();
		Hypergraph hg = observer.GetCurrentForest();

		vector<boost::shared_ptr<HypothesisInfo> > S;
		MAX_REL = std::numeric_limits<double>::lowest();
		MIN_REL = std::numeric_limits<double>::max();

		// get viterbi
		boost::shared_ptr<HypothesisInfo> viterbi = MakeHypothesisInfo(hg);

		// get the true oracle (sets max_rel)
		hg.Reweight(relw);
		boost::shared_ptr<HypothesisInfo> oracle = MakeHypothesisInfo(hg);
		oracle->oracle = oracle;
		oracle->computeCost();

		// get the worst derivation (to get min_rel)
		hg.Reweight(negrelw);
		boost::shared_ptr<HypothesisInfo> worst = MakeHypothesisInfo(hg);
		worst->oracle = oracle;
		worst->computeCost();

		if (hope_select == 1) { // hope
			hg.Reweight(w + relw_scaled);
			S.push_back(MakeHypothesisInfo(hg));
			S[0]->oracle = oracle;
			S[0]->computeCost();
		} else { // true oracle
			S.push_back(oracle);
		}
		// S contains now ONE (hope/oracle) hypothesis
		S[0]->computeLoss();
		boost::shared_ptr<HypothesisInfo> good = S[0];

		viterbi->oracle = oracle;
		viterbi->computeCost();
		viterbi->computeLoss();

		cerr << "min_rel=" << MIN_REL << " max_rel=" << MAX_REL << endl;
		cerr << "S[0]=" << S[0] << endl;

		boost::shared_ptr<HypothesisInfo> fear;

		if (optimizer == 4) { // PA update (single dual coordinate step)
			cerr << "PA MIRA (single dual coordinate step)\n";

			hg.Reweight(w - relw_scaled);
			fear = MakeHypothesisInfo(hg);
			fear->oracle = oracle;
			fear->computeCost();
			fear->computeLoss();
			cerr << "LOSS: " << fear->loss;
			if (fear->loss > 0.0) {
				double diffsqnorm = (good->features - fear->features).l2norm_sq();
				double delta;
				if (diffsqnorm > 0) {
					delta = fear->loss / (diffsqnorm);
					if (delta > lr) delta = lr;
					w += good->features * delta;
					w -= fear->features * delta;
				}
			}

		} else if (optimizer == 1) {// sgd - nonadapted step size
			cerr << "SGD\n";

			if (fear_select == 1) {
				hg.Reweight(w - relw_scaled);
				fear = MakeHypothesisInfo(hg);
			} else if (fear_select == 2) {
				fear = worst;
			} else if (fear_select == 3) {
				fear = viterbi;
			}
			w += good->features * lr;
			w -= fear->features * lr;

		} else if (optimizer == 2) { // PA MIRA with selection from  cutting plane
			cerr << "PA MIRA with Selection from Cutting Plane\n";

			hg.Reweight(w - relw_scaled);
			fear = MakeHypothesisInfo(hg);
			fear->oracle = oracle;
			fear->computeCost();
			fear->computeLoss();
			if (fear->loss < 0) {
				cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n";
				abort();
			}
			if (fear->loss > good->loss + SMO_EPS) {
				S.push_back(fear);
				OptimizeSet(S, 1); // only one iteration with a set of two constraints
			} else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; }

		} else if (optimizer == 3) { // Cutting Plane MIRA
			cerr << "Cutting Plane MIRA\n";

			unsigned cp_iter=0; // Cutting Plane Iteration
			bool again = true;
			while (again && cp_iter<CP_ITER) {
				again = false;
				cerr << "CuttingPlane: " << cp_iter << endl;
				// find a fear derivation
				hg.Reweight(w - relw_scaled);
				fear = MakeHypothesisInfo(hg);
				fear->oracle = oracle;
				fear->computeCost();
				fear->computeLoss();
				if (fear->loss < 0) {
					cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n";
					//abort();
				}
				// find max loss hypothesis
				double max_loss_in_set = (*std::max_element(S.begin(), S.end(), lossComp))->loss;
				if (fear->loss > max_loss_in_set + SMO_EPS) {
					cerr << "Adding new fear " << fear << " to S\n";
					S.push_back(fear);
					OptimizeSet(S);
					again = true;
				} else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; }
				cp_iter++;
				// update losses
				//for(unsigned i=0;i<S.size();i++) S[i]->computeLoss();
			}
		}

		cerr << "|W|=" << w.size() << endl;
		tot_loss += relscale(viterbi->rel);
		//print objective after this sentence
		//double w_change = (w - oldw).l2norm_sq();
		//double temp_objective = 0.5 * w_change;// + max_step_size * max_fear;
		for(int u=0;u!=S.size();u++) {
			cerr << "alpha=" << S[u]->alpha << " loss=" << S[u]->loss << endl;
			//temp_objective += S[u]->alpha * S[u]->loss;
		}
		//objective += temp_objective;
		//cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl;

		//tot += w;
		++lc;
		avg_w *= lc;
		avg_w = (w + avg_w) / (lc+1);

		// set decoder weights for next sentence
		decoder_weights.clear();
		w.init_vector(&decoder_weights);
		// rescale relevance weights to balance with new model after the update
		if (scaling == 2) {
			scaleRelevanceWeights(scalingfactor);
			cerr << "W_REL_SCALED=" << relw_scaled << endl;
		}

		// viterbi 2 for debugging
		//hg.Reweight(w);
		//boost::shared_ptr<HypothesisInfo> viterbi2 = MakeHypothesisInfo(hg);
		//viterbi2->oracle = oracle;
		//viterbi2->computeCost();
		//viterbi2->computeLoss();
		//fear->computeLoss();
		//viterbi->computeLoss();
		//good->computeLoss();
		cerr << "FEAR : " << fear << " \n" << TD::GetString(fear->hyp) << endl;
		cerr << "BEST : " << viterbi << " \n" << TD::GetString(viterbi->hyp) << endl;
		//cerr << "BEST2: " << viterbi2 << " \n" << TD::GetString(viterbi2->hyp) << endl;
		cerr << "HOPE : " << good << " \n" << TD::GetString(good->hyp) << endl;

		cout << id << " ||| " << TD::GetString(fear->hyp) << " ||| " << TD::GetString(viterbi->hyp) << " ||| " << TD::GetString(good->hyp) << endl;

		S.clear();
		fear.reset();
		viterbi.reset();
		//viterbi2.reset();
		good.reset();
		worst.reset();
		oracle.reset();

	}

    //cerr << "FINAL OBJECTIVE: "<< objective << endl;
    cerr << "Translated " << lc << " sentences\n";
    cerr << " [AVG METRIC LAST PASS="******"]\n";
    //tot_loss = 0;

	decoder_weights.clear();
	w.init_vector(&decoder_weights);
	//Weights::ShowLargestFeatures(decoder_weights);
	// write weights
	int node_id = rng->next() * 100000;
	cerr << " Writing model to " << node_id << endl;
	ostringstream os;
	os << cfg["weights_output"].as<string>() << "/last." << node_id;
	string msg = "HGMIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc);
	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);

	//SparseVector<double> x = tot;
	//x /= lc+1;
	ostringstream sa;
	string msga = "HGMIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc);
	sa << cfg["weights_output"].as<string>() << "/avg." << node_id;
	avg_w.init_vector(&decoder_weights);
	Weights::WriteToFile(sa.str(), decoder_weights, true, &msga);


	delete decoder;
	cerr << "\ndone.\n";
	return 0;

}