Example #1
0
 inline
 Tp1 dot_product(const WeightVector<Tp1, Alloc1>& x, const WeightVector<Tp2, Alloc2>& y)
 {
   const size_t size = utils::bithack::min(x.size(), y.size());
   
   return std::inner_product(x.begin(), x.begin() + size, y.begin(), Tp1());
 }
Example #2
0
int main(int argc, char** argv) {
	// handle parameters
	po::variables_map cfg;
	if (!init_params(argc,argv, &cfg)) exit(1); // something is wrong

	// init weights
	DenseWeightVector dense_weights;
	WeightVector weights;
	if (cfg.count("weights")) Weights::InitFromFile(cfg["weights"].as<string>(), &dense_weights);
	Weights::InitSparseVector(dense_weights, &weights);
	cerr << "Current Weight Vector:\n";
	for (WeightVector::iterator i=weights.begin(); i!=weights.end(); ++i)
		cerr << i->first << " " << FD::Convert(i->first) << "=" << i->second << endl;
	/*cerr << "\nDense Weights:\n";
	for (int i = 0 ; i < dense_weights.size(); ++i)
		cerr << i << " " << dense_weights[i] << endl;*/
	cerr << "# of features: " << FD::NumFeats() << " (-1 dummy feature @ idx 0)\n\n";

	// load instances
	vector<TrainingInstance> instances;
	loadInstances(cfg["input"].as<string>(), instances);

 	// setup output directory
	//MkDirP(cfg["output"].as<string>());
	//stringstream outss;
	//outss << cfg["output"].as<string>() << "/";
	//const string out_path = outss.str();

	// setup loss function
	ListwiseLossFunction* lossfunc = set_loss(&cfg);
	cerr << "listwise loss function: " << cfg["loss"].as<string>() << "\n";

	// run AdaRank optimizer
	AdaRank adarank(
			instances,
			instances.size(),
			cfg["iterations"].as<int>(),
			cfg["epsilon"].as<double>(),
			dense_weights,
			lossfunc,
			cfg.count("verbose")
			);

	adarank.run();

	// write output weight vector
	DenseWeightVector new_dense_weights = adarank.GetWeightVector();
	WeightVector new_weights;
	Weights::InitSparseVector(new_dense_weights, &new_weights);

	cerr << "Final Weight Vector:\n";
	for (WeightVector::iterator i=new_weights.begin(); i!=new_weights.end(); ++i)
		cerr << i->first << " " << FD::Convert(i->first) << "=" << i->second << endl;
	Weights::WriteToFile(cfg["output"].as<string>(), new_dense_weights, true, NULL);

}
Example #3
0
void loadRelevanceWeights(const string& fname, WeightVector& rw) {
	rw.clear();
	ReadFile in_file(fname);
	istream& in = *in_file.stream();
	assert(in);
	string f;
	double v;
	while(in>>f) {
		in>>v;
		if (f.empty()) continue;
		rw.set_value(FD::Convert(f), v);
	}
}
Example #4
0
int main(int argc, char** argv) {

	if (argc < 2 || argc > 3) {
		cerr << "USAGE: view-instances <binary instance file> [<weights>]\n";
		exit(1);
	}

	bool has_w = (argc==3);
		
	// load instances
	vector<Instance> instances;
	loadInstances(string(argv[1]), instances);
	
	// weights
	WeightVector weights;
	if (has_w) {
		DenseWeightVector dense_weights;
		Weights::InitFromFile(string(argv[2]), &dense_weights);
		Weights::InitSparseVector(dense_weights, &weights);
		cerr << "Current Weight Vector:\n";
		for (WeightVector::iterator i=weights.begin(); i!=weights.end(); ++i)
			cerr << i->first << " " << FD::Convert(i->first) << "=" << i->second << endl;
	}

	double likelihood = 0.0;
	double likelihood_i=0.0;
	for (int i=0;i<instances.size();++i) {
		if (instances[i].ir_sorted) {
			if (has_w)
				likelihood_i = PlackettLuce::pl_likelihood(instances[i], weights);
			else
				likelihood_i = PlackettLuce::pl_likelihood(instances[i]);
			cout << "P(y|x;";
			if (has_w) cout << "w)=";
			else cout << "D)=";
			cout << likelihood_i << "\n";
		}
		
		if (has_w)
			cout << instances[i].AsString(weights) << endl;
		else
			cout << instances[i].AsString() << endl;
		likelihood +=likelihood_i;
		

		
	}
	
	cerr << "Likelihood=" << likelihood << "\n";

}
Example #5
0
boost::shared_ptr<HypothesisInfo> MakeHypothesisInfo(Hypergraph& hg) {
	/*
	 * create an HypothesisInfo with feature vector, translation and its relevance
	 * relevance feature values are removed (and optionally any frozen features)
	 */
	boost::shared_ptr<HypothesisInfo> h(new HypothesisInfo);
	h->features = ViterbiFeatures(hg);
	h->rel = h->features.dot(relw);
	// clean relevance weights from feature vector
	for (WeightVector::iterator it=relw.begin(); it!=relw.end(); ++it) { h->features.set_value(it->first, .0); }
	ViterbiESentence(hg, &(h->hyp));
	if (freeze) { for (unsigned x=0;x<frozen_features.size();++x) { h->features.set_value(frozen_features[x], .0); } }
	// for rel scaling:
	if (h->rel > MAX_REL) MAX_REL = h->rel;
	if (h->rel < MIN_REL) MIN_REL = h->rel;
	return h;
}
Example #6
0
  inline
  Tp1 dot_product(const WeightVector<Tp1, Alloc1>& x, const WeightVector<Tp2, Alloc2>& y, BinaryOp op)
  {
    typedef WeightVector<Tp1, Alloc1> weight_vector1_type;
    typedef WeightVector<Tp2, Alloc2> weight_vector2_type;
    
    const size_t size = utils::bithack::min(x.size(), y.size());
    
    Tp1 __dot =  std::inner_product(x.begin(), x.begin() + size, y.begin(), Tp1(), std::plus<Tp1>(), op);
    
    if (x.size() > y.size()) {
      typename weight_vector1_type::const_iterator iter_end = x.end();
      for (typename weight_vector1_type::const_iterator iter = x.begin() + size; iter != iter_end; ++ iter)
	__dot += op(*iter, Tp2());
    } else {
      typename weight_vector2_type::const_iterator iter_end = y.end();
      for (typename weight_vector2_type::const_iterator iter = y.begin() + size; iter != iter_end; ++ iter)
	__dot += op(Tp1(), *iter);
    }
    return __dot;
  }
/// Function that runs one it of the ParEGO algorithm.
void universe::iterate_ParEGO()
{
    
    int prior_it=0;
    int stopcounter=0;
    
    weights->changeWeights(iter, space->fWeightVectors);
    //fprintf(stdout, "%.2lf %.2lf weightvectors \n", space->fWeightVectors[0], space->fWeightVectors[1]);
    //fprintf(stdout, "fMEasureFIt\n");
    for(int i=1;i<=iter;i++)
    {
        space->fMeasuredFit[i] = space->Tcheby(&space->fCostVectors[i][1]);
        //fprintf(stdout,"%lg ", space->fMeasuredFit[i]);
        if(space->fMeasuredFit[i]<model->ymin)
        {
            model->ymin=space->fMeasuredFit[i];
            best_ever=i;
        }
    }
    //fprintf(stdout,"\n ymin: %lg\n", model->ymin);
    //fprintf(stdout,"best_ever: %d\n", best_ever);
    //chose the solutions to use to update the DACE model
    if(iter>11*space->fSearchSpaceDim+24)
    {
        model->fCorrelationSize = 11*space->fSearchSpaceDim+24;

        space->chooseUpdateSolutions(iter, model->fCorrelationSize);
        model->pax=&space->fSelectedXVectors;
        model->pay=&space->fSelectedMeasuredFit;
    }
    else
    {
        model->fCorrelationSize=iter;
        model->pax=&space->fXVectors;
        model->pay=&space->fMeasuredFit;
    }
    
    model->buildDACE(weights->change, iter);
    
    start = clock();
    
    // BEGIN GA code
    double best_imp=INFTY;
    double* best_x;
    best_x = (double*)calloc(space->fSearchSpaceDim+1, sizeof(double));
    
    //could change the GA not to be an object. have to think about adv and disadv
    // pop size inti 20
    GeneticAlgorithm ga(20, space->fSearchSpaceDim);
    ga.run(space, model, iter, best_x, &best_imp);
    
       // END GA code
    
    end = clock();
    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
    // timestr << iter << " " << cpu_time_used << endl;
    
    //fprintf(stdout, "ax\n");
    for(int d=1;d<=space->fSearchSpaceDim;d++)
    {
        space->fXVectors[iter+1][d]=best_x[d];
        //fprintf(stdout, "%lg ", space->fXVectors[iter+1][d]);
    }
    //fprintf(stdout, "\n");
    space->fMeasuredFit[iter+1]=space->myfit(iter+1, iter+1);
    
    
    fprintf(stdout, "%d ", iter+1+prior_it);
    for(int d=1; d <=space->fSearchSpaceDim; d++)
        fprintf(stdout, "%lg ", space->fXVectors[iter+1][d]);
    fprintf(stdout, "decision\n");
    fprintf(stdout,"%d ", iter+1);
    for(int i=1;i<=space->fNoObjectives;i++)
    {
        fprintf(stdout, "%lg ", space->fCostVectors[iter+1][i]);
        fprintf(plotfile, "%lg ", space->fCostVectors[iter+1][i]);

        //fprintf(plotfile, "%.5lf ", ff[iter+1][i]);
    }
    fprintf(plotfile, "\n");
    fprintf(stdout, "objective\n");
    
    //cout<<"ymin"<<model->ymin<<"\n";

    improvements[iter+1]=improvements[iter];
    if (space->fMeasuredFit[iter+1]>=model->ymin)
    {
        // fprintf(stdout,"No actual improver found\n");
        stopcounter++;
    }
    
    else
    {
        improvements[iter+1]++;
        model->ymin = space->fMeasuredFit[iter+1];
        stopcounter=0;
        best_ever=iter+1;
    }
    
    iter++;
    
   }
Example #8
0
int main(int argc, char** argv) {
	po::variables_map cfg;
	if (!init_params(argc,argv,&cfg)) return 1;

	if (cfg.count("random_seed"))
		rng.reset(new MT19937(cfg["random_seed"].as<uint32_t>()));
	else
		rng.reset(new MT19937);

	// set variables
	lr = cfg["learningrate"].as<double>();
	hope_select = cfg["hope"].as<int>();
	fear_select = cfg["fear"].as<int>();
	optimizer = cfg["optimizer"].as<int>();
	freeze = cfg.count("freeze");
	if (freeze) {
		const vector<string>& ffstrs = cfg["freeze"].as<vector<string> >();
		stringstream ffss;
		ffss << "frozen features: ";
		for (vector<string>::const_iterator ffit=ffstrs.begin();ffit!=ffstrs.end();++ffit) {
			frozen_features.push_back(FD::Convert(*ffit));
			ffss << *ffit << " ";
		}
		cerr << ffss.str() << endl;
	}
	scaling = cfg["scaling"].as<int>();
	scalingfactor = cfg["scalingfactor"].as<double>();
	cerr << "scaling="<< scaling << " scalingfactor=" << scalingfactor << endl;

	// setup decoder
	Decoder* decoder = setupDecoder(cfg);
	if (!decoder) {
		cerr << "error while loading decoder with" << cfg["decoder_config"].as<string>() << "!\n";
		return 1;
	}
	TrainingObserver observer;
	// get reference to decoder weights
	vector<weight_t>& decoder_weights = decoder->CurrentWeightVector();
	// the SMT weights (to be optimized)
	if (cfg.count("weights")) {
		Weights::InitFromFile(cfg["weights"].as<string>(), &decoder_weights);
		Weights::InitSparseVector(decoder_weights, &w);
	} else {
		cerr << "starting with EMPTY weights!\n";
	}
	// the weight vector that gives the oracle
	loadRelevanceWeights(cfg["rweights"].as<string>(), relw);
	negrelw -= relw;
	relw_scaled = relw;
	// initial scaling
	if (scaling != 0) scaleRelevanceWeights(scalingfactor);

	// output some vector stats
	cerr << "W_REL=" << relw << endl;
	cerr << "W_REL_SCALED=" << relw_scaled << endl;
	cerr << "|W_REL|=" << relw_scaled.size() << endl;
	cerr << "|W_SMT|=" << w.size() << endl;

	cerr << "hope selection: " << hope_select << endl;
	const string input = decoder->GetConf()["input"].as<string>();
	cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
	ReadFile in_read(input);
	istream *in = in_read.stream();
	assert(*in);
	string id, sentence;
	int cur_sent = 0;
	unsigned lc = 0; // line count

	double objective=0;
	double tot_loss = 0;
	WeightVector avg_w = w;
	//SparseVector<double> tot;
	//SparseVector<double> oldw = w;
	//tot.clear();
	//tot += w;

	while(*in >> id) {

		in->ignore(1, '\t');
		getline(*in, sentence);
		if (sentence.empty() || id.empty()) continue;

		cerr << "\nID="<<id << endl;
		decoder->SetId(cur_sent);
		decoder->Decode(sentence, &observer); // decode with decoder_weights
		cur_sent = observer.GetCurrentSent();
		Hypergraph hg = observer.GetCurrentForest();

		vector<boost::shared_ptr<HypothesisInfo> > S;
		MAX_REL = std::numeric_limits<double>::lowest();
		MIN_REL = std::numeric_limits<double>::max();

		// get viterbi
		boost::shared_ptr<HypothesisInfo> viterbi = MakeHypothesisInfo(hg);

		// get the true oracle (sets max_rel)
		hg.Reweight(relw);
		boost::shared_ptr<HypothesisInfo> oracle = MakeHypothesisInfo(hg);
		oracle->oracle = oracle;
		oracle->computeCost();

		// get the worst derivation (to get min_rel)
		hg.Reweight(negrelw);
		boost::shared_ptr<HypothesisInfo> worst = MakeHypothesisInfo(hg);
		worst->oracle = oracle;
		worst->computeCost();

		if (hope_select == 1) { // hope
			hg.Reweight(w + relw_scaled);
			S.push_back(MakeHypothesisInfo(hg));
			S[0]->oracle = oracle;
			S[0]->computeCost();
		} else { // true oracle
			S.push_back(oracle);
		}
		// S contains now ONE (hope/oracle) hypothesis
		S[0]->computeLoss();
		boost::shared_ptr<HypothesisInfo> good = S[0];

		viterbi->oracle = oracle;
		viterbi->computeCost();
		viterbi->computeLoss();

		cerr << "min_rel=" << MIN_REL << " max_rel=" << MAX_REL << endl;
		cerr << "S[0]=" << S[0] << endl;

		boost::shared_ptr<HypothesisInfo> fear;

		if (optimizer == 4) { // PA update (single dual coordinate step)
			cerr << "PA MIRA (single dual coordinate step)\n";

			hg.Reweight(w - relw_scaled);
			fear = MakeHypothesisInfo(hg);
			fear->oracle = oracle;
			fear->computeCost();
			fear->computeLoss();
			cerr << "LOSS: " << fear->loss;
			if (fear->loss > 0.0) {
				double diffsqnorm = (good->features - fear->features).l2norm_sq();
				double delta;
				if (diffsqnorm > 0) {
					delta = fear->loss / (diffsqnorm);
					if (delta > lr) delta = lr;
					w += good->features * delta;
					w -= fear->features * delta;
				}
			}

		} else if (optimizer == 1) {// sgd - nonadapted step size
			cerr << "SGD\n";

			if (fear_select == 1) {
				hg.Reweight(w - relw_scaled);
				fear = MakeHypothesisInfo(hg);
			} else if (fear_select == 2) {
				fear = worst;
			} else if (fear_select == 3) {
				fear = viterbi;
			}
			w += good->features * lr;
			w -= fear->features * lr;

		} else if (optimizer == 2) { // PA MIRA with selection from  cutting plane
			cerr << "PA MIRA with Selection from Cutting Plane\n";

			hg.Reweight(w - relw_scaled);
			fear = MakeHypothesisInfo(hg);
			fear->oracle = oracle;
			fear->computeCost();
			fear->computeLoss();
			if (fear->loss < 0) {
				cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n";
				abort();
			}
			if (fear->loss > good->loss + SMO_EPS) {
				S.push_back(fear);
				OptimizeSet(S, 1); // only one iteration with a set of two constraints
			} else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; }

		} else if (optimizer == 3) { // Cutting Plane MIRA
			cerr << "Cutting Plane MIRA\n";

			unsigned cp_iter=0; // Cutting Plane Iteration
			bool again = true;
			while (again && cp_iter<CP_ITER) {
				again = false;
				cerr << "CuttingPlane: " << cp_iter << endl;
				// find a fear derivation
				hg.Reweight(w - relw_scaled);
				fear = MakeHypothesisInfo(hg);
				fear->oracle = oracle;
				fear->computeCost();
				fear->computeLoss();
				if (fear->loss < 0) {
					cerr << "FEAR LOSS < 0! THIS SHOULD NOT HAPPEN!\n";
					//abort();
				}
				// find max loss hypothesis
				double max_loss_in_set = (*std::max_element(S.begin(), S.end(), lossComp))->loss;
				if (fear->loss > max_loss_in_set + SMO_EPS) {
					cerr << "Adding new fear " << fear << " to S\n";
					S.push_back(fear);
					OptimizeSet(S);
					again = true;
				} else { cerr << "constraint not violated. fear loss:" << fear->loss << "\n"; }
				cp_iter++;
				// update losses
				//for(unsigned i=0;i<S.size();i++) S[i]->computeLoss();
			}
		}

		cerr << "|W|=" << w.size() << endl;
		tot_loss += relscale(viterbi->rel);
		//print objective after this sentence
		//double w_change = (w - oldw).l2norm_sq();
		//double temp_objective = 0.5 * w_change;// + max_step_size * max_fear;
		for(int u=0;u!=S.size();u++) {
			cerr << "alpha=" << S[u]->alpha << " loss=" << S[u]->loss << endl;
			//temp_objective += S[u]->alpha * S[u]->loss;
		}
		//objective += temp_objective;
		//cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl;

		//tot += w;
		++lc;
		avg_w *= lc;
		avg_w = (w + avg_w) / (lc+1);

		// set decoder weights for next sentence
		decoder_weights.clear();
		w.init_vector(&decoder_weights);
		// rescale relevance weights to balance with new model after the update
		if (scaling == 2) {
			scaleRelevanceWeights(scalingfactor);
			cerr << "W_REL_SCALED=" << relw_scaled << endl;
		}

		// viterbi 2 for debugging
		//hg.Reweight(w);
		//boost::shared_ptr<HypothesisInfo> viterbi2 = MakeHypothesisInfo(hg);
		//viterbi2->oracle = oracle;
		//viterbi2->computeCost();
		//viterbi2->computeLoss();
		//fear->computeLoss();
		//viterbi->computeLoss();
		//good->computeLoss();
		cerr << "FEAR : " << fear << " \n" << TD::GetString(fear->hyp) << endl;
		cerr << "BEST : " << viterbi << " \n" << TD::GetString(viterbi->hyp) << endl;
		//cerr << "BEST2: " << viterbi2 << " \n" << TD::GetString(viterbi2->hyp) << endl;
		cerr << "HOPE : " << good << " \n" << TD::GetString(good->hyp) << endl;

		cout << id << " ||| " << TD::GetString(fear->hyp) << " ||| " << TD::GetString(viterbi->hyp) << " ||| " << TD::GetString(good->hyp) << endl;

		S.clear();
		fear.reset();
		viterbi.reset();
		//viterbi2.reset();
		good.reset();
		worst.reset();
		oracle.reset();

	}

    //cerr << "FINAL OBJECTIVE: "<< objective << endl;
    cerr << "Translated " << lc << " sentences\n";
    cerr << " [AVG METRIC LAST PASS="******"]\n";
    //tot_loss = 0;

	decoder_weights.clear();
	w.init_vector(&decoder_weights);
	//Weights::ShowLargestFeatures(decoder_weights);
	// write weights
	int node_id = rng->next() * 100000;
	cerr << " Writing model to " << node_id << endl;
	ostringstream os;
	os << cfg["weights_output"].as<string>() << "/last." << node_id;
	string msg = "HGMIRA tuned weights ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc);
	Weights::WriteToFile(os.str(), decoder_weights, true, &msg);

	//SparseVector<double> x = tot;
	//x /= lc+1;
	ostringstream sa;
	string msga = "HGMIRA tuned weights AVERAGED ||| " + boost::lexical_cast<std::string>(node_id) + " ||| " + boost::lexical_cast<std::string>(lc);
	sa << cfg["weights_output"].as<string>() << "/avg." << node_id;
	avg_w.init_vector(&decoder_weights);
	Weights::WriteToFile(sa.str(), decoder_weights, true, &msga);


	delete decoder;
	cerr << "\ndone.\n";
	return 0;

}
Example #9
0
//updates relw_scaled by scaling it to the current norm of w, multiplied by the scalingfactor
void scaleRelevanceWeights(const double scalingfactor) {
	relw_scaled /= relw_scaled.l2norm(); // to length 1: relw / ||relw||
	const double cur_w_norm = w.l2norm(); // ||w||
	if (cur_w_norm == 0) return;
	relw_scaled *= cur_w_norm * scalingfactor; // scale by ||w||*scalingfactor
}