Example #1
0
 float score_value(
         const Shared & shared,
         const Value & value,
         rng_t & rng) const {
     Scorer scorer;
     scorer.init(shared, * this, rng);
     return scorer.eval(shared, value, rng);
 }
Example #2
0
int BoardScore() {
    int finalScore = 0;
    Scorer s;
    int row;
    int col;

    for( row = 0; row < 15; ++row ) {
        for( col = 0; col < 15; ++col ) {
            int cell = row * 15 + col;
            char c = board_state[cell];
            if( isalpha( c ) ) {
                s.AddLetter( c, cell );
            } else {
                finalScore += s.Commit();
            }
        }
        finalScore += s.Commit();
    }
    for( col = 0; col < 15; ++col ) {
        for( row = 0; row < 15; ++row ) {
            int cell = row * 15 + col;
            char c = board_state[cell];
            if( isalpha( c ) ) {
                s.AddLetter( c, cell );
            } else {
                finalScore += s.Commit();
            }
        }
        finalScore += s.Commit();
    }
    return finalScore;
}
typename Kernel::Model MaxConsensus
(
  const Kernel &kernel,
  const Scorer &scorer,
  std::vector<uint32_t> *best_inliers = nullptr,
  uint32_t max_iteration = 1024
)
{
  const uint32_t min_samples = Kernel::MINIMUM_SAMPLES;
  const uint32_t total_samples = kernel.NumSamples();

  size_t best_num_inliers = 0;
  typename Kernel::Model best_model;

  // Test if we have sufficient points to for the kernel.
  if (total_samples < min_samples) {
    if (best_inliers) {
      best_inliers->resize(0);
    }
    return best_model;
  }

  // In this robust estimator, the scorer always works on all the data points
  // at once. So precompute the list ahead of time.
  std::vector<uint32_t> all_samples(total_samples);
  std::iota(all_samples.begin(), all_samples.end(), 0);

  // Random number generator configuration
  std::mt19937 random_generator(std::mt19937::default_seed);

  std::vector<uint32_t> sample;
  for (uint32_t iteration = 0;  iteration < max_iteration; ++iteration) {
    UniformSample(min_samples, random_generator, &all_samples, &sample);

      std::vector<typename Kernel::Model> models;
      kernel.Fit(sample, &models);

      // Compute costs for each fit.
      for (const auto& model_it : models) {
        std::vector<uint32_t> inliers;
        scorer.Score(kernel, model_it, all_samples, &inliers);

        if (best_num_inliers < inliers.size()) {
          best_num_inliers = inliers.size();
          best_model = model_it;
          if (best_inliers) {
            best_inliers->swap(inliers);
          }
        }
      }
  }
  return best_model;
}
Example #4
0
/*
 * calculates the weighted term vector according to the given scoring method
 */
void Patent::calc_target_wvec(DfTable& dft, Scorer& scorer) {

	target_wvec.clear();
	//const int len = target_tfvec.size();
	const double len = vutils::vector_values_sum(target_tfvec).as_float();
	for(TermVector::iterator it = target_tfvec.begin(); it != target_tfvec.end(); ++it) {
		if (dft[it->first] == 0.0)
			cerr << ucid << ": suspicious df for '" << TD::Convert(it->first)
				 << "': " << dft.mTable[it->first]
				 << ". Skipping this term!" << endl;
		target_wvec[it->first] = scorer.computeWeight(it->second,dft[it->first],len);
	}
	// normalize weighted term vector
	vutils::normalize_vector(target_wvec);

}
typename Kernel::Model RANSAC(
  const Kernel &kernel,
  const Scorer &scorer,
  std::vector<size_t> *best_inliers = nullptr ,
  size_t *best_score = nullptr , // Found number of inliers
  double outliers_probability = 1e-2)
{
  assert(outliers_probability < 1.0);
  assert(outliers_probability > 0.0);
  size_t iteration = 0;
  const size_t min_samples = Kernel::MINIMUM_SAMPLES;
  const size_t total_samples = kernel.NumSamples();

  size_t max_iterations = 100;
  const size_t really_max_iterations = 4096;

  size_t best_num_inliers = 0;
  double best_inlier_ratio = 0.0;
  typename Kernel::Model best_model;

  // Test if we have sufficient points for the kernel.
  if (total_samples < min_samples) {
    if (best_inliers) {
      best_inliers->resize(0);
    }
    return best_model;
  }

  // In this robust estimator, the scorer always works on all the data points
  // at once. So precompute the list ahead of time [0,..,total_samples].
  std::vector<size_t> all_samples(total_samples);
  std::iota(all_samples.begin(), all_samples.end(), 0);

  std::vector<size_t> sample;
  for (iteration = 0;
    iteration < max_iterations &&
    iteration < really_max_iterations; ++iteration) {
      UniformSample(min_samples, &all_samples, &sample);

      std::vector<typename Kernel::Model> models;
      kernel.Fit(sample, &models);

      // Compute the inlier list for each fit.
      for (size_t i = 0; i < models.size(); ++i) {
        std::vector<size_t> inliers;
        scorer.Score(kernel, models[i], all_samples, &inliers);

        if (best_num_inliers < inliers.size()) {
          best_num_inliers = inliers.size();
          best_inlier_ratio = inliers.size() / double(total_samples);
          best_model = models[i];
          if (best_inliers) {
            best_inliers->swap(inliers);
          }
        }
        if (best_inlier_ratio) {
          max_iterations = IterationsRequired(min_samples,
            outliers_probability,
            best_inlier_ratio);
        }
      }
  }
  if (best_score)
    *best_score = best_num_inliers;
  return best_model;
}
Example #6
0
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Convert alignments to the common format used by maligner dp.
// maligner_ix handles reverse alignments by reversing the reference (since we index the reverse of the reference)
// maligner_dp handles reverse alignments by reversing the query and aligning it to the forward of the reference.
// Therefore, we must carefully convert indices and orientation in this function to end up with a maligner_dp::Alignment.
maligner_dp::AlignmentVec convert_alignments(
  const RefAlignmentVec& ref_alignments,
  const MapWrapper& query,
  MapDB& ref_map_db,
  const Scorer& scorer) {

  using maligner_dp::Score;
  using maligner_dp::MatchedChunk;
  using maligner_dp::MatchedChunkVec;
  using maligner_dp::Chunk;
  using maligner_dp::ChunkVec;
  using maligner_dp::MapData;
  using maligner_dp::AlignmentRescaledScoreComp;

  maligner_dp::AlignmentVec alns;

  const MapData& query_map_data = query.map_data_;
  const Map& query_map = query.map_;

  bool is_circular = false;
  bool is_bounded = false;

  Score zero_score;

  //////////////////////////////////////////////////////////////////////
  // Our approach is to represent the query_chunks the same way that the query map would
  // be presented in maligner_dp for forward and reverse alignments.
  // This means that query_chunks_forward and query_chunks_reverse both must have increasing indices,
  // but query_chunks_reverse has the chunk sizes in the *reverse* direction.
  ChunkVec query_chunks_forward;
  ChunkVec query_chunks_reverse;
  const size_t num_query_frags = query_map.frags_.size();
  for(size_t i = 1; i < num_query_frags - 1; i++) {

    const bool is_boundary_chunk_query = (i == 0) || (i == num_query_frags - 1);

    query_chunks_forward.emplace_back(i, i + 1, query_map.frags_[i], is_boundary_chunk_query);

    query_chunks_reverse.emplace_back(i, i + 1, query_map.frags_[i], is_boundary_chunk_query);
    query_chunks_reverse.back().reverse_coords(num_query_frags);

  }
  std::reverse(query_chunks_reverse.begin(), query_chunks_reverse.end());

  
  ///////////////////////////////////////////////////////////////////////////////////
  // In maligner_ix, ref alignments are oriented forward with respect to the query.
  // In maligner_dp, MatchedChunks must be constructed, oriented with respect to forward strand of reference.
  // This means if the alignment is reverse, we need to reverse the reference chunks,
  // and use the reverse representation of the query chunks.
  for(auto i = ref_alignments.begin(); i != ref_alignments.end(); i++) {
    
    const ReferenceAlignment& ref_alignment = *i;
    const Map * p_ref_map = ref_alignment.get_map();
    const MapWrapper& ref = ref_map_db.find(p_ref_map->name_)->second;
    const MapData& ref_map_data = ref.map_data_;
    const bool ref_is_circular = ref.is_circular();

    /////////////////////////////////////////////////////////////////////////////////////////
    // Extract the reference chunks as maligner_dp chunks.
    ChunkVec ref_chunks;
    for(auto rc = ref_alignment.chunks_.begin(); rc != ref_alignment.chunks_.end(); rc++) {

      const MapChunk* p_chunk = *rc;

      bool ref_chunk_is_boundary = ref_is_circular && !opt::ref_is_bounded && 
        ( (p_chunk->start_ == 0) || (p_chunk->end_ == ref.num_frags()) );

      ref_chunks.emplace_back(p_chunk->start_, p_chunk->end_, p_chunk->size_, ref_chunk_is_boundary);

    }

    // Orient the reference chunks forward if necessary
    ChunkVec * p_query_chunks = &query_chunks_forward;
    if(ref_alignment.is_reverse()) {
      std::reverse(ref_chunks.begin(), ref_chunks.end());
      p_query_chunks = &query_chunks_reverse;
    }


    // std::cerr << "query:\n\t" << *p_query_chunks << "\n"
    //           << "ref:\n\t" << ref_chunks << "\n"
    //           << "is_forward: " << ref_alignment.is_forward()
    //           << "\n";

    /////////////////////////
    // Build matched chunks
    const ChunkVec& query_chunks = *p_query_chunks;
    if (query_chunks.size() != ref_chunks.size()) {
      std::cerr << "query_frags: " << query_map.frags_.size() << " query_chunks: " << query_chunks.size() << " ref_chunks: " << ref_chunks.size() << std::endl;
      throw std::runtime_error("query chunks does not match ref_chunks size.");
    }

    MatchedChunkVec matched_chunks;
    Score total_score;
    const size_t num_matched_chunks = query_chunks.size();
    for(size_t i = 0; i < num_matched_chunks; i++) {
      Score score = scorer.compute_score(query_chunks[i], ref_chunks[i]);
      total_score += score;
      matched_chunks.emplace_back(query_chunks[i], ref_chunks[i], score);
    }

    /////////////////////////
    // Construct Alignment
    bool aln_is_forward = ref_alignment.is_forward();
    maligner_dp::Alignment aln(matched_chunks, total_score, query_map_data, ref_map_data, aln_is_forward);
    if (!aln_is_forward) aln.flip_query_coords();
    aln.add_alignment_locs(query.ix_to_locs_, ref.ix_to_locs_);
    alns.push_back(std::move(aln));

  }

  // Sort the alignments by total rescaled score
  std::sort(alns.begin(), alns.end(), AlignmentRescaledScoreComp());

  alns = sift_alignments(alns, ref_map_db);

  return alns;

}
Example #7
0
int main(int argc, char** argv) {

	// handle parameters
	po::variables_map cfg;
	if (!init_params(argc,argv, &cfg)) exit(1); // something is wrong

	// set number of jobs
	#ifdef _OPENMP
	if (cfg.count("jobs")) {
		omp_set_num_threads(cfg["jobs"].as<int>());
		cerr << "CLIR::Using " << cfg["jobs"].as<int>() << " threads.\n";
	}
	#else
	#endif

	const bool no_qtf = cfg.count("no-qtf");
	string run_id = cfg["run-id"].as<string>();
	const int K = cfg["K"].as<int>();

	// load queries
	vector<Query> queries;
	CLIR::loadQueries(cfg["queries"].as<string>(), queries, cfg.count("psq"), cfg.count("text-input"));
	size_t queries_size = queries.size();
	size_t done = queries_size / 10.0;
	cerr.precision(1);

	// setup result vector
	vector<vector<Score> > results(queries_size);

	if (cfg.count("index")) {

		Index::Index idx;
		Index::LoadIndex(cfg["index"].as<string>(), idx);
		IndexScorer* ranker = CLIR::setupIndexScorer(cfg["model"].as<string>(), &idx);
		cerr << "CLIR::Retrieving from in-memory inverted index ...";
		# pragma omp parallel for
		for ( int i = 0 ; i < queries_size ; ++ i ) {
			vector<double> scores(idx.NumberOfDocuments(), .0);
			ranker->score(queries[i], scores, no_qtf);
			Scores kbest(K);
			for (int d=0;d<idx.NumberOfDocuments();++d) {
				if (scores[d]>0) {
					kbest.update( Score(idx.GetDocID(d), prob_t(scores[d]) ) );
				}
			}
			results[i] = kbest.k_largest();
			i%done == 0 ? cerr << done/queries_size << "%.." : cerr ;
		}
		delete ranker;
		cerr << "ok.\n";

	} else if (cfg.count("documents")) {

		// load DfTable
		DfTable dft(cfg["dftable"].as<string>());
		cerr << "CLIR::DF table loaded (" << dft.size() << " entries)." << endl;
		// load document collection
		vector<CLIR::Document> documents;
		double avgDocLen = CLIR::loadDocuments(cfg["documents"].as<string>(), documents);
		size_t N = documents.size();
		if (N < dft.mMaxDf) {
			cerr << "CLIR::maxDf=" << dft.mMaxDf << " > N=" << N << ", setting N to maxDf.\n";
			N = dft.mMaxDf;
		}

		Scorer* ranker = CLIR::setupScorer(cfg["model"].as<string>(), N, avgDocLen, &dft);
		cerr << "CLIR::Retrieving from in-memory documents ...";
		# pragma omp parallel for
		for ( int i = 0 ; i < queries_size ; ++ i ) {
			const Query& query = queries[i];
			Scores scores(K);
			for ( int d = 0 ; d < N ; ++d ) {
				Document& doc = documents[d];
				scores.update( Score(doc.id_, ranker->score(query, doc, no_qtf) ) );
			}
			results[i] = scores.k_largest();
			i%done == 0 ? cerr << done/queries_size << "%.." : cerr ;
		}
		delete ranker;
		cerr << "ok.\n";

	} else {

		// load DfTable
		DfTable dft(cfg["dftable"].as<string>());
		cerr << "CLIR::DF table loaded (" << dft.size() << " entries)." << endl;
		size_t N = cfg["N"].as<int>();
		if (N < dft.mMaxDf) {
			cerr << "CLIR::maxDf=" << dft.mMaxDf << " > N=" << N << ", setting N to maxDf.\n";
			N = dft.mMaxDf;
		}
		double avgDocLen = cfg["avg_len"].as<double>();
		cerr << "CLIR::Retrieving from STDIN documents ...";
		
		Scorer* ranker = CLIR::setupScorer(cfg["model"].as<string>(), N, avgDocLen, &dft);
		vector<Scores> scores (queries_size, Scores(K)); // score vectors for each query
		string docid, raw;
		int len, c = 0;
		cerr << "reporter:status:scanned="<< c << "\n";
		TermVector doc;
		while (cin >> docid) {
			cin.ignore(1,'\t');
			cin >> len;
			cin.ignore(1,'\t');
			getline(cin, raw);

			if (docid.size() == 0 || len <= 0 || raw.size() == 0)
				continue;

			Document doc(vutils::read_vector(raw), docid, len);

			// for each query compute score between current document and query
			# pragma omp parallel for
			for ( size_t i = 0 ; i < queries_size ; ++i ) {
				scores[i].update( Score(doc.id_, ranker->score(queries[i], doc, no_qtf) ) );
			}

			c++;
			c%10==0 ? cerr << "reporter:status:scanned="<< c << "\n" : cerr ;

		}

		delete ranker;

		// create kbest lists
		for ( size_t i = 0; i < scores.size(); ++i ) {
			results[i] = scores[i].k_largest();
		}
	}

	// retrieval done; output results
	WriteFile out(cfg["output"].as<string>());
	if (cfg.count("qrels")) {
		cerr << "CLIR::Evaluating results ...";
		CLIR::IRScorer irs(queries_size, cfg["qrels"].as<string>());
		# pragma omp parallel for
		for ( int i = 0 ; i < queries_size ; ++ i ) {
			irs.evaluateIthSegment(i, queries[i].id(), results[i]);
		}
		*out << "num_q\t" << irs.N() << "\tnum_rel_ret\t" << irs.NUMRELRET() << "\tMAP\t" << irs.MAP() << "\tNDCG\t" << irs.NDCG() << endl;
	} else {
		cerr << "CLIR::Writing results ...";
		for ( int i = 0 ; i < queries_size ; ++ i ) {
			CLIR::writeResult(*out, queries[i], results[i], run_id);
		}
	}

	cerr << "ok.\nCLIR::done.\n";

}
Example #8
0
File: main.cpp Project: CCJY/coliru
 virtual void CloseAfterRunEnd(){
     scorer.CloseAfterRunEnd();
 }
Example #9
0
File: main.cpp Project: CCJY/coliru
 virtual void SetEvent(size_t new_incident){
     scorer.SetEvent(new_incident);
 }
int main()
{
// #############  set some enviroment variables here  ####################

	// set the project path here
	string project_path = string("/home/panos/Desktop/LINUX_BACKUP/Opinion_MIning_and_Sentiment_Analysis/project/");

	// set the path for the judgment files	
	string judge_path = project_path+"dataset/cars/judgments/2008/";

	// set the reviews' source directory
	string reviews_src_path = project_path+"dataset/cars/data/2008/";

	// set the directory for the parsed reviews
	string reviews_path = project_path+"bin/reviews/";

	// set the path to read the reviews' inverted index
	string ii_path = project_path+"bin/invertedIndexes/"+"test2008ii";

	// set the WordNet path
	string wordnet_path = "/home/panos/Desktop/LINUX_BACKUP/Opinion_MIning_and_Sentiment_Analysis/project/bin/WordNet/dict/";

	// set the number of the judgment cases
	int jud_numbers =100;

	// set the results filename
	string results_file = "results.txt";

// #######################################################################

	// help functions
	Utils* utils = new Utils();

	// parse reviews
	utils->parse_reviews(reviews_src_path, reviews_path);
	cout << "parse reviews ....... [ok]" << endl;

	// get judgement files
	vector<string> files = vector<string>();
	utils->getFilesList(judge_path,files, true);

	// open file to write the results
	ofstream results (results_file.c_str());
	if (results.is_open())
	{
		int result_counter = 1;

		// load WordNet
		WordNet* wordnet = new WordNet(wordnet_path);
		
		// load Pos Tagger
		Tagger* pos_tagger = new Tagger();

		// create an invertedIndexer instance
		InvertedIndexer* invertedIndexer = new InvertedIndexer();

		// load the invertedIndex from the disk to memory
		invertedIndexer->fileToInvertedIndex(ii_path);
		cout << "load invertedIndex of reviews ...... [ok]" << endl;

		// for every judgment
		for(int it_j=0; it_j<jud_numbers; it_j++)
		{
			string filename = string(files[it_j]);

			typedef vector< boost::tuple<string, string> > judge_list;
			typedef vector< boost::tuple<string, string> > query_list;

			Judgment* my_judgment = new Judgment(filename.c_str());

			// parse judgment and keep it in memory
			query_list query = my_judgment->get_query();
			judge_list judge = my_judgment->get_judge();
			
			vector<string> category = my_judgment->get_cat();
			string category_str = utils->getQueryStr(category);
	
			// aspect expansion using WordNet
			vector<string> aspects;
                        for(int i=0; i<category.size(); i++)
                        {
				string aspect = category.at(i);
                        	vector<string> synonyms = wordnet->getSynonyms(aspect);

				aspects.push_back(aspect);
				for(int i=0; i<synonyms.size(); i++)
				{
					aspects.push_back(synonyms.at(i));
				}
                      	}

			// score entities using BM25 value of review file on query aspects
			Scorer* scorer = new Scorer();
			vector<boost::tuple<string, double> > scores;

			for(judge_list::const_iterator i = judge.begin(); i != judge.end(); ++i)
			{
				string entity = i->get<0>();
				double score = 0.0;

				string entityPath = reviews_path + entity;

				// calculate the BM25 value of the review file (entityPath) to the aspects query
				score = invertedIndexer->getBM25(entityPath, aspects);

				scores.push_back( boost::tuple<std::string, double>( entity, score ) );
			}

                        // get ideal scores
                        vector<boost::tuple<string, double > > i_scores = my_judgment->getIdealScores();

			// calculate the nDCG value of the ranking list, on the top 10 results
			double nDCG = scorer->getNDCG(scores, i_scores);

			cout << " nDCG= " << nDCG << endl;

			// write results to file
			results << result_counter << "\t" << nDCG << "\n";
			result_counter++;

		}

		results.close();

	}
	else
	{
		cout << "Unable to open file to write results" << endl;
	}

	utils->deleteFiles("reviews");
}
Example #11
0
int main(int argc, char** argv) {
    //defaults
    string scorerType("BLEU");
    string scorerConfig("");
    string referenceFile("");
    string nbestFile("");
    string scoreDataFile("statscore.data");
    string featureDataFile("features.data");
    string prevScoreDataFile("");
    string prevFeatureDataFile("");
    bool binmode = false;
    int verbosity = 0;
    int c;
    while ((c=getopt_long (argc,argv, "s:r:n:S:F:R:E:v:hb", long_options, &option_index)) != -1) {
        switch(c) {
            case 's':
                scorerType = string(optarg);
                break;
            case 'c':
                scorerConfig = string(optarg);
                break;
            case 'r':
                referenceFile = string(optarg);
                break;
            case 'b':
                binmode = true;
                break;
            case 'n':
                nbestFile = string(optarg);
                break;
            case 'S':
                scoreDataFile = string(optarg);
                break;
            case 'F':
                featureDataFile = string(optarg);
                break;
            case 'E':
                prevFeatureDataFile = string(optarg);
                break;
            case 'R':
                prevScoreDataFile = string(optarg);
                break;
            case 'v':
                verbosity = atoi(optarg);
                break;
            default:
                usage();
        }
    }
    try {

//check whether score statistics file is specified
    if (scoreDataFile.length() == 0){
	throw runtime_error("Error: output score statistics file is not specified");
    }

//check wheter feature file is specified
    if (featureDataFile.length() == 0){
        throw runtime_error("Error: output feature file is not specified");
    }

//check whether reference file is specified when nbest is specified
    if ((nbestFile.length() > 0 && referenceFile.length() == 0)){
        throw runtime_error("Error: reference file is not specified; you can not score the nbest");

    }
 
    vector<string> nbestFiles;
    if (nbestFile.length() > 0){
        std::string substring;
        while (!nbestFile.empty()){
                getNextPound(nbestFile, substring, ",");
                nbestFiles.push_back(substring);
        }
    }

    vector<string> referenceFiles;
    if (referenceFile.length() > 0){
			std::string substring;
			while (!referenceFile.empty()){
				getNextPound(referenceFile, substring, ",");
				referenceFiles.push_back(substring);
			}
    }

    vector<string> prevScoreDataFiles;
    if (prevScoreDataFile.length() > 0){
			std::string substring;
			while (!prevScoreDataFile.empty()){
				getNextPound(prevScoreDataFile, substring, ",");
				prevScoreDataFiles.push_back(substring);
			}
    }

    vector<string> prevFeatureDataFiles;
    if (prevFeatureDataFile.length() > 0){
        std::string substring;
        while (!prevFeatureDataFile.empty()){
                getNextPound(prevFeatureDataFile, substring, ",");
                prevFeatureDataFiles.push_back(substring);
        }
    }

    if (prevScoreDataFiles.size() != prevFeatureDataFiles.size()){
			throw runtime_error("Error: there is a different number of previous score and feature files");
    }

		
		if (binmode) cerr << "Binary write mode is selected" << endl;
		else cerr << "Binary write mode is NOT selected" << endl;
			
		TRACE_ERR("Scorer type: " << scorerType << endl);
		ScorerFactory sfactory;
		Scorer* scorer = sfactory.getScorer(scorerType,scorerConfig);
				
		Timer timer;
		timer.start("Starting...");

		//load references        
		if (referenceFiles.size() > 0)
			scorer->setReferenceFiles(referenceFiles);

		Data data(*scorer);
		
	//load old data
		for (size_t i=0;i < prevScoreDataFiles.size(); i++){
			data.load(prevFeatureDataFiles.at(i), prevScoreDataFiles.at(i));
		}

	//computing score statistics of each nbest file
		for (size_t i=0;i < nbestFiles.size(); i++){
			data.loadnbest(nbestFiles.at(i));
		}
								
		if (binmode)
			cerr << "Binary write mode is selected" << endl;
		else
			cerr << "Binary write mode is NOT selected" << endl;
			
		data.save(featureDataFile, scoreDataFile, binmode);
		timer.stop("Stopping...");
		return EXIT_SUCCESS;
    } catch (const exception& e) {
			cerr << "Exception: " << e.what() << endl;
			return EXIT_FAILURE;
    }

}
Example #12
0
int main(int argc, char** argv) {

	// handle parameters
	po::variables_map cfg;
	if (!init_params(argc,argv, &cfg)) exit(1); // something is wrong

	DfTable dft (cfg["dftable"].as<string>());
	cerr << "DF table loaded (" << dft.size() << " entries)." << endl;

	std::string metric;
	bool normalize = false;
	Scorer* scorer = setup_metric(cfg, metric, normalize);
	if (cfg.count("N"))
		scorer->setDocCount(cfg["N"].as<double>());
	else
		scorer->setDocCount(dft.mMaxDf);

	cerr << "Term weighting metric: " << metric << " " << scorer->print() << " [normalization=" << normalize << "]" << endl;

	bool verbose = false;
	if (cfg.count("verbose"))
		verbose = true;


	string docid;
	int len = -1;
	string v_str;
	TermVector v;

	int c = 0;
	int skipped = 0;
	int empty_input = 0;
	int empty_output = 0;

	while (cin >> docid) {
		cin.ignore(1, '\t');
		cin >> len;
		cin.ignore(1, '\t');
		getline(cin, v_str);

		if (docid.size() == 0 || len <= 0)
			continue;

		if (v_str.size() == 0) {
			cerr << "WARNING: input vector (id=" << docid << ",len=" << len << ") empty!" << endl;
			cout << docid << "\t" << len << "\t" << endl;
			empty_input++;
			continue;
		}

		v = vutils::read_vector(v_str);

		Document d(v, scorer, docid, len);
		skipped += d.calc_wvec(dft, normalize, verbose);

		if (d.wvec_.size() == 0) {
			if (verbose)
				cerr << "WARNING! " << d.docid_ << " output vector is empty!" << endl;
			empty_output++;
		}

		// write to output
		cout << d.docid_ << "\t" << d.len_ << "\t";
		vutils::write_vector(d.wvec_, cout);
		cout << endl;

		c++;
		c%1000==0 ? cerr << "." << c << "." : cerr ;


	}

	delete scorer;

	cerr << "done." << endl << endl
		 << c << "\tlines read/written." << endl
		 << empty_input << "\tempty input vectors." << endl
		 << empty_output << "\tempty output vectors." << endl
		 << skipped << "\tterms skipped due missing df value." << endl;

}
typename Kernel::Model RANSAC(
  const Kernel &kernel,
  const Scorer &scorer,
  std::vector<size_t> *best_inliers = NULL,
  double *best_score = NULL,
  double outliers_probability = 1e-2)
{
  assert(outliers_probability < 1.0);
  assert(outliers_probability > 0.0);
  size_t iteration = 0;
  const size_t min_samples = Kernel::MINIMUM_SAMPLES;
  const size_t total_samples = kernel.NumSamples();

  size_t max_iterations = 100;
  const size_t really_max_iterations = 4096;

  size_t best_num_inliers = 0;
  double best_cost = std::numeric_limits<double>::infinity();
  double best_inlier_ratio = 0.0;
  typename Kernel::Model best_model;

  // Test if we have sufficient points for the kernel.
  if (total_samples < min_samples) {
    if (best_inliers) {
      best_inliers->resize(0);
    }
    return best_model;
  }

  // In this robust estimator, the scorer always works on all the data points
  // at once. So precompute the list ahead of time.
  std::vector<size_t> all_samples;
  for (size_t i = 0; i < total_samples; ++i) {
    all_samples.push_back(i);
  }

  std::vector<size_t> sample;
  for (iteration = 0;
    iteration < max_iterations &&
    iteration < really_max_iterations; ++iteration) {
      UniformSample(min_samples, total_samples, &sample);

      std::vector<typename Kernel::Model> models;
      kernel.Fit(sample, &models);

      // Compute costs for each fit.
      for (size_t i = 0; i < models.size(); ++i) {
        std::vector<size_t> inliers;
        double cost = scorer.Score(kernel, models[i], all_samples, &inliers);

        if (cost < best_cost) {
          best_cost = cost;
          best_inlier_ratio = inliers.size() / double(total_samples);
          best_num_inliers = inliers.size();
          best_model = models[i];
          if (best_inliers) {
            best_inliers->swap(inliers);
          }
        }
        if (best_inlier_ratio) {
          max_iterations = IterationsRequired(min_samples,
            outliers_probability,
            best_inlier_ratio);
        }
      }
  }
  if (best_score)
    *best_score = best_cost;
  return best_model;
}