ScorerPtr DisjunctionMaxWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer)
 {
     Collection<ScorerPtr> scorers(Collection<ScorerPtr>::newInstance(weights.size()));
     int32_t idx = 0;
     for (Collection<WeightPtr>::iterator wt = weights.begin(); wt != weights.end(); ++wt)
     {
         ScorerPtr subScorer((*wt)->scorer(reader, true, false));
         if (subScorer && subScorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS)
             scorers[idx++] = subScorer;
     }
     if (idx == 0)
         return ScorerPtr(); // all scorers did not have documents
     DisjunctionMaxScorerPtr result(newLucene<DisjunctionMaxScorer>(query->tieBreakerMultiplier, similarity, scorers, idx));
     return result;
 }
示例#2
0
int main(int argc, char ** argv)
{
  try {
    options(argc, argv);

    cicada::optimize::LineSearch::value_min = value_lower;
    cicada::optimize::LineSearch::value_max = value_upper;

    if (scorer_list) {
      std::cout << cicada::eval::Scorer::lists();
      return 0;
    }    
    
    if (regularize_l1 && regularize_l2)
      throw std::runtime_error("you cannot use both of L1 and L2...");
    
    if (regularize_l1 || regularize_l2) {
      if (C <= 0.0)
	throw std::runtime_error("the scaling for L1/L2 must be positive");
    }
    
    if (weight_normalize_l1 && weight_normalize_l2)
      throw std::runtime_error("you cannot use both of L1 and L2 for weight normalization...");


    threads = utils::bithack::max(threads, 1);
    
    // read reference set
    scorer_document_type scorers(scorer_name);
    read_refset(refset_files, scorers);

    const size_t scorers_size = scorers.size();
    
    if (iterative && tstset_files.size() > 1) {
      scorer_document_type scorers_iterative(scorer_name);
      scorers_iterative.resize(scorers.size() * tstset_files.size());
      
      for (size_t i = 0; i != tstset_files.size(); ++ i)
	std::copy(scorers.begin(), scorers.end(), scorers_iterative.begin() + scorers.size() * i);
      
      scorers.swap(scorers_iterative);
    }

    if (debug)
      std::cerr << "# of references: " << scorers.size() << std::endl;


    if (debug)
      std::cerr << "reading kbests" << std::endl;

    hypothesis_map_type kbests(scorers.size());
    
    read_tstset(tstset_files, kbests, scorers_size);

    initialize_score(kbests, scorers);

    // collect initial weights
    weight_set_collection_type weights;
    
    if (! feature_weights_files.empty()) {

      for (path_set_type::const_iterator fiter = feature_weights_files.begin(); fiter != feature_weights_files.end(); ++ fiter) {
	if (*fiter != "-" && ! boost::filesystem::exists(*fiter))
	  throw std::runtime_error("no file? " + fiter->string());
	
	utils::compress_istream is(*fiter);
	
	weights.push_back(weight_set_type());
	is >> weights.back();
      }
      
      if (initial_average && weights.size() > 1) {
	weight_set_type weight;
	
	weight_set_collection_type::const_iterator witer_end = weights.end();
	for (weight_set_collection_type::const_iterator witer = weights.begin(); witer != witer_end; ++ witer)
	  weight += *witer;
	
	weight *= (1.0 / weights.size());
	
	weights.push_back(weight);
      }
      
      std::set<weight_set_type, std::less<weight_set_type>, std::allocator<weight_set_type> > uniques;
      uniques.insert(weights.begin(), weights.end());
      weights.clear();
      weights.insert(weights.end(), uniques.begin(), uniques.end());
    } else {
      weights.push_back(weight_set_type());
      
      // all one weight...
      for (feature_type::id_type id = 0; id < feature_type::allocated(); ++ id)
	if (! feature_type(id).empty())
	  weights.back()[feature_type(id)] = 1.0;
    }
    
    // collect lower/upper bounds
    weight_set_type bound_lower;
    weight_set_type bound_upper;
    
    if (! bound_lower_file.empty()) {
      if (bound_lower_file == "-" || boost::filesystem::exists(bound_lower_file)) {
	typedef cicada::FeatureVector<double> feature_vector_type;
	
	feature_vector_type bounds;
	  
	utils::compress_istream is(bound_lower_file);
	is >> bounds;
	
	bound_lower.allocate(cicada::optimize::LineSearch::value_min);
	for (feature_vector_type::const_iterator biter = bounds.begin(); biter != bounds.end(); ++ biter)
	  bound_lower[biter->first] = biter->second;
      } else
	throw std::runtime_error("no lower-bound file?" + bound_lower_file.string());
    }
int main(int argc, char ** argv)
{
  utils::mpi_world mpi_world(argc, argv);
  
  const int mpi_rank = MPI::COMM_WORLD.Get_rank();
  const int mpi_size = MPI::COMM_WORLD.Get_size();
  
  try {
    options(argc, argv);

    cicada::optimize::LineSearch::value_min = value_lower;
    cicada::optimize::LineSearch::value_max = value_upper;
    
    if (scorer_list) {
      std::cout << cicada::eval::Scorer::lists();
      return 0;
    }
    
    if (int(yield_sentence) + yield_alignment + yield_span > 1)
      throw std::runtime_error("specify either sentence|alignment|span yield");
    if (int(yield_sentence) + yield_alignment + yield_span == 0)
      yield_sentence = true;

    if (weights_file.empty() || ! boost::filesystem::exists(weights_file))
      throw std::runtime_error("no weight file? " + weights_file.string());
    if (direction_name.empty())
      throw std::runtime_error("no direction?");
    
    // read reference set
    scorer_document_type scorers(scorer_name);
    
    read_refset(refset_files, scorers);
    
    if (mpi_rank == 0 && debug)
      std::cerr << "# of references: " << scorers.size() << std::endl;

    // read test set
    
    if (mpi_rank == 0 && debug)
      std::cerr << "reading hypergraphs" << std::endl;

    hypergraph_set_type graphs(scorers.size());
    
    read_tstset(tstset_files, graphs);
    
    weight_set_type weights;
    {
      utils::compress_istream is(weights_file, 1024 * 1024);
      is >> weights;
    }
    
    weight_set_type direction;
    direction[direction_name] = 1.0;
    
    segment_document_type segments(graphs.size());
    
    compute_envelope(scorers, graphs, weights, direction, segments);

    if (mpi_rank == 0) {
      line_search_type line_search(debug);
      
      utils::compress_ostream os(output_file, 1024 * 1024);
      
      line_search(segments, value_lower, value_upper, OutputIterator(os, weights[direction_name]));
    }
  }
  catch (const std::exception& err) {
    std::cerr << "error: " << err.what() << std::endl;
    return 1;
  }
  return 0;
}