コード例 #1
0
ファイル: evalb_main.cpp プロジェクト: tarowatanabe/trance
int main(int argc, char** argv)
{
  if (argc != 3) {
    std::cerr << argv[0] << " [gold file] [test file]" << std::endl;
    return 1;
  }
  
  trance::Tree gold;
  trance::Tree test;

  trance::Evalb       evalb;
  trance::EvalbScorer scorer;
  
  utils::compress_istream ig(argv[1]);
  utils::compress_istream it(argv[2]);
  
  for (;;) {
    ig >> gold;
    it >> test;

    if (! ig || ! it) break;
    
    scorer.assign(gold);
    evalb += scorer(test);
  }
  
  if (ig || it)
    throw std::runtime_error("# of trees does not match");

  std::cout << "scor: " << evalb() << " match: " << evalb.match_ << " gold: " << evalb.gold_ << " test: " << evalb.test_<< std::endl;
}
コード例 #2
0
 double ScoreCachingWrappingScorer::score()
 {
     ScorerPtr scorer(_scorer);
     int32_t doc = scorer->docID();
     if (doc != curDoc)
     {
         curScore = scorer->score();
         curDoc = doc;
     }
     return curScore;
 }
コード例 #3
0
 ScorerPtr FilteredQueryWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer)
 {
     ScorerPtr scorer(weight->scorer(reader, true, false));
     if (!scorer)
         return ScorerPtr();
     DocIdSetPtr docIdSet(query->filter->getDocIdSet(reader));
     if (!docIdSet)
         return ScorerPtr();
     DocIdSetIteratorPtr docIdSetIterator(docIdSet->iterator());
     if (!docIdSetIterator)
         return ScorerPtr();
     return newLucene<FilteredQueryWeightScorer>(shared_from_this(), scorer, docIdSetIterator, similarity);
 }
コード例 #4
0
ファイル: KENLM.cpp プロジェクト: a455bcd9/mosesdecoder
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore,
                             float &ngramScore, std::size_t &oovCount) const
{
  fullScore = 0;
  ngramScore = 0;
  oovCount = 0;

  if (!phrase.GetSize()) return;

  lm::ngram::ChartState discarded_sadly;
  lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);

  size_t position;
  if (m_bos == phrase[0][m_factorType]) {
    scorer.BeginSentence();
    position = 1;
  } else {
    position = 0;
  }

  size_t ngramBoundary = m_ngram->Order() - 1;

  size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
  for (; position < end_loop; ++position) {
    const SCFG::Word &word = phrase[position];
    if (word.isNonTerminal) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  float before_boundary = fullScore + scorer.Finish();
  for (; position < phrase.GetSize(); ++position) {
    const SCFG::Word &word = phrase[position];
    if (word.isNonTerminal) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  fullScore += scorer.Finish();

  ngramScore = TransformLMScore(fullScore - before_boundary);
  fullScore = TransformLMScore(fullScore);
}
コード例 #5
0
ファイル: Backward.cpp プロジェクト: akartbayev/mosesdecoder
/**
 * Pre-calculate the n-gram probabilities for the words in the specified phrase.
 *
 * Note that when this method is called, we do not have access to the context
 * in which this phrase will eventually be applied.
 *
 * In other words, we know what words are in this phrase,
 * but we do not know what words will come before or after this phrase.
 *
 * The parameters fullScore, ngramScore, and oovCount are all output parameters.
 *
 * The value stored in oovCount is the number of words in the phrase
 * that are not in the language model's vocabulary.
 *
 * The sum of the ngram scores for all words in this phrase are stored in fullScore.
 *
 * The value stored in ngramScore is similar, but only full-order ngram scores are included.
 *
 * This is best shown by example:
 *
 * Assume a trigram backward language model and a phrase "a b c d e f g"
 *
 * fullScore would represent the sum of the logprob scores for the following values:
 *
 * p(g)
 * p(f | g)
 * p(e | g f)
 * p(d | f e)
 * p(c | e d)
 * p(b | d c)
 * p(a | c b)
 *
 * ngramScore would represent the sum of the logprob scores for the following values:
 *
 * p(g)
 * p(f | g)
 * p(e | g f)
 * p(d | f e)
 * p(c | e d)
 * p(b | d c)
 * p(a | c b)
 */
template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
{
  fullScore = 0;
  ngramScore = 0;
  oovCount = 0;

  if (!phrase.GetSize()) return;

  lm::ngram::ChartState discarded_sadly;
  lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);

  UTIL_THROW_IF(
    (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)),
    util::Exception,
    "BackwardLanguageModel does not currently support rules that include <s>"
  );

  float before_boundary = 0.0f;

  int lastWord = phrase.GetSize() - 1;
  int ngramBoundary = m_ngram->Order() - 1;
  int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary;

  int position;
  for (position = lastWord; position >= 0; position-=1) {
    const Word &word = phrase.GetWord(position);
    UTIL_THROW_IF(
      (word.IsNonTerminal()),
      util::Exception,
      "BackwardLanguageModel does not currently support rules that include non-terminals "
    );

    lm::WordIndex index = TranslateID(word);
    scorer.Terminal(index);
    if (!index) ++oovCount;

    if (position==boundary) {
      before_boundary = scorer.Finish();
    }

  }

  fullScore = scorer.Finish();

  ngramScore = TransformLMScore(fullScore - before_boundary);
  fullScore = TransformLMScore(fullScore);

}
コード例 #6
0
ファイル: Backward.cpp プロジェクト: akartbayev/mosesdecoder
template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phrase &phrase, const FFState *ps, float &returnedScore) const
{

  returnedScore = 0.0f;

  const lm::ngram::ChartState &previous = static_cast<const BackwardLMState&>(*ps).state;

  std::auto_ptr<BackwardLMState> ret(new BackwardLMState());

  lm::ngram::RuleScore<Model> scorer(*m_ngram, ret->state);

  int ngramBoundary = m_ngram->Order() - 1;
  int lastWord = phrase.GetSize() - 1;

  // Get scores for words at the end of the previous phrase
  // that are now adjacent to words at the the beginning of this phrase
  for (int position=std::min( lastWord,  ngramBoundary - 1); position >= 0; position-=1) {
    const Word &word = phrase.GetWord(position);
    UTIL_THROW_IF(
      (word.IsNonTerminal()),
      util::Exception,
      "BackwardLanguageModel does not currently support rules that include non-terminals "
    );

    lm::WordIndex index = TranslateID(word);
    scorer.Terminal(index);
  }
  scorer.NonTerminal(previous);
  returnedScore = scorer.Finish();
  /*
  out->PlusEquals(this, score);


    UTIL_THROW_IF(
      (1==1),
      util::Exception,
      "This method (BackwardLanguageModel<Model>::Evaluate) is not yet fully implemented"
      );
  */
  return ret.release();



}
コード例 #7
0
    Explanation* explain(IndexReader* reader, int32_t doc) {
        ConstantScorer* cs = (ConstantScorer*)scorer(reader);
        bool exists = cs->bits->get(doc);
        _CLDELETE(cs);

        ComplexExplanation* result = _CLNEW ComplexExplanation();

        if (exists) {
            StringBuffer buf(100);
            buf.append(_T("ConstantScoreQuery("));

            TCHAR* tmp = parentQuery->filter->toString();
            buf.append(tmp);
            _CLDELETE_LCARRAY(tmp);

            buf.append(_T("), product of:"));

            result->setDescription(buf.getBuffer());
            result->setValue(queryWeight);
            result->setMatch(true);
            result->addDetail(_CLNEW Explanation(parentQuery->getBoost(), _T("boost")));
            result->addDetail(_CLNEW Explanation(queryNorm, _T("queryNorm")));
        } else {
            StringBuffer buf(100);
            buf.append(_T("ConstantScoreQuery("));

            TCHAR* tmp = parentQuery->filter->toString();
            buf.append(tmp);
            _CLLDELETE(tmp);

            buf.append(_T(") doesn't match id "));
            buf.appendInt(doc);

            result->setDescription(buf.getBuffer());
            result->setValue(0);
            result->setMatch(true);
        }

        _CLLDELETE(cs);
        return result;
    }
コード例 #8
0
int main (
	int argc
	, char **argv
	, char **envp
)
{
	parse_argv(argc, argv);

	create_queues( /* no args */ );
	create_players( /* no args */ );

	signal(SIGINT, parent_sig_int);
	sleep(sleep_time_seconds);
	scorer(scorer_queue);

	stop_players( /* no args */ );
	close_queues( /* no args */ );
	remove_queues( /* no args */ );

	return(EXIT_SUCCESS);
}
コード例 #9
0
int main (
	int argc
	, char **argv
	, char **envp
)
{
	parse_argv(argc, argv);

	create_queues( /* no args */ );
	create_players( /* no args */ );
	//size_queues();

	// i should probably install a SIGCHLD handler too.  I'd have to
	// keep track of if the child is exiting on an error or as expected.
	signal(SIGINT, parent_sig_int);
	sleep(sleep_time_seconds);
	scorer(scorer_queue);

	stop_players_threads( /* no args */ );
	close_queues( /* no args */ );
	remove_queues( /* no args */ );

	return(EXIT_SUCCESS);
}
コード例 #10
0
 ExplanationPtr PhraseWeight::explain(IndexReaderPtr reader, int32_t doc)
 {
     ExplanationPtr result(newLucene<Explanation>());
     result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:");
     
     StringStream docFreqsBuffer;
     StringStream queryBuffer;
     queryBuffer << L"\"";
     docFreqsBuffer << idfExp->explain();
     for (Collection<TermPtr>::iterator term = query->terms.begin(); term != query->terms.end(); ++term)
     {
         if (term != query->terms.begin())
             queryBuffer << L" ";
         queryBuffer << (*term)->text();
     }
     queryBuffer << L"\"";
     
     ExplanationPtr idfExpl(newLucene<Explanation>(idf, L"idf(" + query->field + L":" + docFreqsBuffer.str() + L")"));
     
     // explain query weight
     ExplanationPtr queryExpl(newLucene<Explanation>());
     queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:");
     
     ExplanationPtr boostExpl(newLucene<Explanation>(query->getBoost(), L"boost"));
     if (query->getBoost() != 1.0)
         queryExpl->addDetail(boostExpl);
     queryExpl->addDetail(idfExpl);
     
     ExplanationPtr queryNormExpl(newLucene<Explanation>(queryNorm, L"queryNorm"));
     queryExpl->addDetail(queryNormExpl);
     
     queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue());
     result->addDetail(queryExpl);
     
     // explain field weight
     ExplanationPtr fieldExpl(newLucene<Explanation>());
     fieldExpl->setDescription(L"fieldWeight(" +    query->field + L":" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:");
     
     PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast<PhraseScorer>(scorer(reader, true, false)));
     if (!phraseScorer)
         return newLucene<Explanation>(0.0, L"no matching docs");
         
     ExplanationPtr tfExplanation(newLucene<Explanation>());
     int32_t d = phraseScorer->advance(doc);
     double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0;
     tfExplanation->setValue(similarity->tf(phraseFreq));
     tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")");
     
     fieldExpl->addDetail(tfExplanation);
     fieldExpl->addDetail(idfExpl);
     
     ExplanationPtr fieldNormExpl(newLucene<Explanation>());
     ByteArray fieldNorms(reader->norms(query->field));
     double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0;
     fieldNormExpl->setValue(fieldNorm);
     fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")");
     fieldExpl->addDetail(fieldNormExpl);
     
     fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue());
     
     result->addDetail(fieldExpl);
     
     // combine them
     result->setValue(queryExpl->getValue() * fieldExpl->getValue());
     
     if (queryExpl->getValue() == 1.0)
         return fieldExpl;
     
     return result;
 }
コード例 #11
0
ファイル: main.cpp プロジェクト: sliplove/peptideMasters
int main(int argc, char *argv[])
{
	srand (42);
	cxxopts::Options options(argv[0], "Markov Chain Monte Carlo method");
	options.add_options("General")
		("h,help", "Print help")
		("s,spectrum", "Input spectrum", cxxopts::value<std::string>(), "FILE")
		("m,matrix", "Fragmentation Marix", cxxopts::value<std::string>(), "FILE")
		("r,rule", "Rule graph", cxxopts::value<std::string>(), "FILE")
		("precursor", "Precursor mass", cxxopts::value<double>(), "FLOAT")
		("min", "Min score", cxxopts::value<double>()->default_value("0"), "FLOAT")
		("max", "Max score", cxxopts::value<double>(), "FLOAT")
		("charge", "Spectrum parameter", cxxopts::value<unsigned>()->default_value("1"), "FLOAT");
	
	options.add_options("Advanced")
		("phi_begin", "Initial phi value (WL option)", cxxopts::value<double>()->default_value("1.822"), "FLOAT")
		("phi_end", "Final phi value (WL option)", cxxopts::value<double>()->default_value("1"), "FLOAT")
		("step", "Length of Wang-Landau iteration", cxxopts::value<unsigned>()->default_value("10000"), "N")  
		("run_iter", "Number of Monte-Carlo iterations", cxxopts::value<unsigned>()->default_value("50000"), "N")  
		("eps", "Accuracy", cxxopts::value<double>()->default_value("0.02"), "FLOAT")
		("level", "Quantile level for confident interval", cxxopts::value<double>()->default_value("0.95"), "FLOAT")
		("product_ion_thresh", "Score parameter", cxxopts::value<double>()->default_value("0.5"), "FLOAT");
	    

    const std::vector<std::string> all_groups({"General", "Advanced"});
 
    // options.parse_positional(std::vector<std::string>({"spectrum", "matrix", "rule"}));
	options.parse(argc, argv);
    
	if (options.count("help")) {
        std::cout << options.help(all_groups) << std::endl;
        exit(0);
    }

	std::ifstream file_mat(options["matrix"].as<std::string>());
	std::ifstream file_rule(options["rule"].as<std::string>());
	std::ifstream file_spectrum(options["spectrum"].as<std::string>());
	double NLP_MASS = options["precursor"].as<double>();
	double MIN_SCORE = options["min"].as<double>();
	double MAX_SCORE = options["max"].as<double>();
	unsigned CHARGE = options["charge"].as<unsigned>();
	double PHI_B = options["phi_begin"].as<double>();
	double PHI_E = options["phi_end"].as<double>();	
	unsigned STEP_LENGTH = options["step"].as<unsigned>();
	unsigned MIN_STEPS_RUN = options["run_iter"].as<unsigned>();
	double EPS = options["eps"].as<double>();	
	double LEVEL = options["level"].as<double>();
	double PRODUCT_ION_THRESH = options["product_ion_thresh"].as<double>();


	std::vector<std::vector<double> > mat;

	double elem;
	std::string line;
	
	while(!file_mat.eof()) {
		getline(file_mat, line);
		std::istringstream iss(line);
		std::vector<double> row;
		while (iss >> elem) {
			row.push_back(elem);
		}
		mat.push_back(row);
	}
	
	int nrow = mat.size() - 1;
	int ncol = mat[0].size();

	std::cout << nrow << " " << ncol << std::endl;
	
	std::vector<std::pair<unsigned, unsigned> > rule;
	double elem1, elem2;
	int i = 0;
	std::istringstream iss(line);

	while(file_rule >> elem1 >> elem2) {
		rule.push_back(std::make_pair(elem1, elem2));
	}


	std::vector<double> exp_spectrum;

	while(!file_spectrum.eof()) {
		getline(file_spectrum, line);
		std::istringstream iss(line);
		while (iss >> elem) {
			exp_spectrum.push_back(elem);
		}
	}

	file_mat.close();
	file_rule.close();
	file_spectrum.close();
	
	pcg_extras::seed_seq_from<std::random_device> rd;   
	// for (int i = 0; i < nrow; ++i) {
	// for (int j = 0; j < ncol; ++j) {
	// 	std::cout << mat[i][j] << " ";
	// 	}
	// 	std::cout << std::endl;
	// }

	// std::cout << " ----------------- " << std::endl;
	// for (int i = 0; i < rule.size(); ++i) {
	// 	std::cout << rule[i].first << " " << rule[i].second << std::endl;
	// }

	// std::cout << " ----------------- " << std::endl;


	// std::cout << MIN_SCORE << " " << MAX_SCORE << " " << PHI_B << " " <<
	// 								 PHI_E << " " << STEP_LENGTH << " " << NLP_MASS << std::endl;

	
	// set scorer and metropolis parameters 
	Spectrum spectrum(exp_spectrum, CHARGE);
	SPCScorer scorer(PRODUCT_ION_THRESH);
	// std::vector<double> start_mass(st_m, st_m + sizeof(st_m) / sizeof(st_m[0]));
	// MHstate state(start_mass);

	MHstate state(ncol, NLP_MASS, rd);

	Peptide peptide(mat, rule, state.get_current_state_(), NLP_MASS);
	Metropolis mh(mat, rule, NLP_MASS, MIN_SCORE, MAX_SCORE, state, peptide, spectrum, scorer);

	// get weights	
	WLsimulator wl(mh, PHI_B, PHI_E, STEP_LENGTH);
	// wl.print();

	// wl.wl_step(rd, PHI_B, true);
	 
	std::vector<double> weights;
	weights = wl.wl_full(rd, false);

	// std::cout << "Wang-Landau weights" << std::endl;
	// for (auto & w: weights) {
	// 	std::cout << w << " " ;
	// }

	// std::cout << std::endl << std::endl;

	// // mh step
	mh.hit_run(rd, MIN_STEPS_RUN, EPS, LEVEL, weights);

	return 0;
}