int main(int argc, char** argv) { if (argc != 3) { std::cerr << argv[0] << " [gold file] [test file]" << std::endl; return 1; } trance::Tree gold; trance::Tree test; trance::Evalb evalb; trance::EvalbScorer scorer; utils::compress_istream ig(argv[1]); utils::compress_istream it(argv[2]); for (;;) { ig >> gold; it >> test; if (! ig || ! it) break; scorer.assign(gold); evalb += scorer(test); } if (ig || it) throw std::runtime_error("# of trees does not match"); std::cout << "scor: " << evalb() << " match: " << evalb.match_ << " gold: " << evalb.gold_ << " test: " << evalb.test_<< std::endl; }
double ScoreCachingWrappingScorer::score() { ScorerPtr scorer(_scorer); int32_t doc = scorer->docID(); if (doc != curDoc) { curScore = scorer->score(); curDoc = doc; } return curScore; }
ScorerPtr FilteredQueryWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { ScorerPtr scorer(weight->scorer(reader, true, false)); if (!scorer) return ScorerPtr(); DocIdSetPtr docIdSet(query->filter->getDocIdSet(reader)); if (!docIdSet) return ScorerPtr(); DocIdSetIteratorPtr docIdSetIterator(docIdSet->iterator()); if (!docIdSetIterator) return ScorerPtr(); return newLucene<FilteredQueryWeightScorer>(shared_from_this(), scorer, docIdSetIterator, similarity); }
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); size_t position; if (m_bos == phrase[0][m_factorType]) { scorer.BeginSentence(); position = 1; } else { position = 0; } size_t ngramBoundary = m_ngram->Order() - 1; size_t end_loop = std::min(ngramBoundary, phrase.GetSize()); for (; position < end_loop; ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } float before_boundary = fullScore + scorer.Finish(); for (; position < phrase.GetSize(); ++position) { const SCFG::Word &word = phrase[position]; if (word.isNonTerminal) { fullScore += scorer.Finish(); scorer.Reset(); } else { lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; } } fullScore += scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
/** * Pre-calculate the n-gram probabilities for the words in the specified phrase. * * Note that when this method is called, we do not have access to the context * in which this phrase will eventually be applied. * * In other words, we know what words are in this phrase, * but we do not know what words will come before or after this phrase. * * The parameters fullScore, ngramScore, and oovCount are all output parameters. * * The value stored in oovCount is the number of words in the phrase * that are not in the language model's vocabulary. * * The sum of the ngram scores for all words in this phrase are stored in fullScore. * * The value stored in ngramScore is similar, but only full-order ngram scores are included. * * This is best shown by example: * * Assume a trigram backward language model and a phrase "a b c d e f g" * * fullScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) * * ngramScore would represent the sum of the logprob scores for the following values: * * p(g) * p(f | g) * p(e | g f) * p(d | f e) * p(c | e d) * p(b | d c) * p(a | c b) */ template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { fullScore = 0; ngramScore = 0; oovCount = 0; if (!phrase.GetSize()) return; lm::ngram::ChartState discarded_sadly; lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly); UTIL_THROW_IF( (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)), util::Exception, "BackwardLanguageModel does not currently support rules that include <s>" ); float before_boundary = 0.0f; int lastWord = phrase.GetSize() - 1; int ngramBoundary = m_ngram->Order() - 1; int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary; int position; for (position = lastWord; position >= 0; position-=1) { const Word &word = phrase.GetWord(position); UTIL_THROW_IF( (word.IsNonTerminal()), util::Exception, "BackwardLanguageModel does not currently support rules that include non-terminals " ); lm::WordIndex index = TranslateID(word); scorer.Terminal(index); if (!index) ++oovCount; if (position==boundary) { before_boundary = scorer.Finish(); } } fullScore = scorer.Finish(); ngramScore = TransformLMScore(fullScore - before_boundary); fullScore = TransformLMScore(fullScore); }
template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phrase &phrase, const FFState *ps, float &returnedScore) const { returnedScore = 0.0f; const lm::ngram::ChartState &previous = static_cast<const BackwardLMState&>(*ps).state; std::auto_ptr<BackwardLMState> ret(new BackwardLMState()); lm::ngram::RuleScore<Model> scorer(*m_ngram, ret->state); int ngramBoundary = m_ngram->Order() - 1; int lastWord = phrase.GetSize() - 1; // Get scores for words at the end of the previous phrase // that are now adjacent to words at the the beginning of this phrase for (int position=std::min( lastWord, ngramBoundary - 1); position >= 0; position-=1) { const Word &word = phrase.GetWord(position); UTIL_THROW_IF( (word.IsNonTerminal()), util::Exception, "BackwardLanguageModel does not currently support rules that include non-terminals " ); lm::WordIndex index = TranslateID(word); scorer.Terminal(index); } scorer.NonTerminal(previous); returnedScore = scorer.Finish(); /* out->PlusEquals(this, score); UTIL_THROW_IF( (1==1), util::Exception, "This method (BackwardLanguageModel<Model>::Evaluate) is not yet fully implemented" ); */ return ret.release(); }
Explanation* explain(IndexReader* reader, int32_t doc) { ConstantScorer* cs = (ConstantScorer*)scorer(reader); bool exists = cs->bits->get(doc); _CLDELETE(cs); ComplexExplanation* result = _CLNEW ComplexExplanation(); if (exists) { StringBuffer buf(100); buf.append(_T("ConstantScoreQuery(")); TCHAR* tmp = parentQuery->filter->toString(); buf.append(tmp); _CLDELETE_LCARRAY(tmp); buf.append(_T("), product of:")); result->setDescription(buf.getBuffer()); result->setValue(queryWeight); result->setMatch(true); result->addDetail(_CLNEW Explanation(parentQuery->getBoost(), _T("boost"))); result->addDetail(_CLNEW Explanation(queryNorm, _T("queryNorm"))); } else { StringBuffer buf(100); buf.append(_T("ConstantScoreQuery(")); TCHAR* tmp = parentQuery->filter->toString(); buf.append(tmp); _CLLDELETE(tmp); buf.append(_T(") doesn't match id ")); buf.appendInt(doc); result->setDescription(buf.getBuffer()); result->setValue(0); result->setMatch(true); } _CLLDELETE(cs); return result; }
int main ( int argc , char **argv , char **envp ) { parse_argv(argc, argv); create_queues( /* no args */ ); create_players( /* no args */ ); signal(SIGINT, parent_sig_int); sleep(sleep_time_seconds); scorer(scorer_queue); stop_players( /* no args */ ); close_queues( /* no args */ ); remove_queues( /* no args */ ); return(EXIT_SUCCESS); }
int main ( int argc , char **argv , char **envp ) { parse_argv(argc, argv); create_queues( /* no args */ ); create_players( /* no args */ ); //size_queues(); // i should probably install a SIGCHLD handler too. I'd have to // keep track of if the child is exiting on an error or as expected. signal(SIGINT, parent_sig_int); sleep(sleep_time_seconds); scorer(scorer_queue); stop_players_threads( /* no args */ ); close_queues( /* no args */ ); remove_queues( /* no args */ ); return(EXIT_SUCCESS); }
ExplanationPtr PhraseWeight::explain(IndexReaderPtr reader, int32_t doc) { ExplanationPtr result(newLucene<Explanation>()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); StringStream docFreqsBuffer; StringStream queryBuffer; queryBuffer << L"\""; docFreqsBuffer << idfExp->explain(); for (Collection<TermPtr>::iterator term = query->terms.begin(); term != query->terms.end(); ++term) { if (term != query->terms.begin()) queryBuffer << L" "; queryBuffer << (*term)->text(); } queryBuffer << L"\""; ExplanationPtr idfExpl(newLucene<Explanation>(idf, L"idf(" + query->field + L":" + docFreqsBuffer.str() + L")")); // explain query weight ExplanationPtr queryExpl(newLucene<Explanation>()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene<Explanation>(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) queryExpl->addDetail(boostExpl); queryExpl->addDetail(idfExpl); ExplanationPtr queryNormExpl(newLucene<Explanation>(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight ExplanationPtr fieldExpl(newLucene<Explanation>()); fieldExpl->setDescription(L"fieldWeight(" + query->field + L":" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast<PhraseScorer>(scorer(reader, true, false))); if (!phraseScorer) return newLucene<Explanation>(0.0, L"no matching docs"); ExplanationPtr tfExplanation(newLucene<Explanation>()); int32_t d = phraseScorer->advance(doc); double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; tfExplanation->setValue(similarity->tf(phraseFreq)); tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); fieldExpl->addDetail(tfExplanation); fieldExpl->addDetail(idfExpl); ExplanationPtr fieldNormExpl(newLucene<Explanation>()); ByteArray fieldNorms(reader->norms(query->field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) return fieldExpl; return result; }
int main(int argc, char *argv[]) { srand (42); cxxopts::Options options(argv[0], "Markov Chain Monte Carlo method"); options.add_options("General") ("h,help", "Print help") ("s,spectrum", "Input spectrum", cxxopts::value<std::string>(), "FILE") ("m,matrix", "Fragmentation Marix", cxxopts::value<std::string>(), "FILE") ("r,rule", "Rule graph", cxxopts::value<std::string>(), "FILE") ("precursor", "Precursor mass", cxxopts::value<double>(), "FLOAT") ("min", "Min score", cxxopts::value<double>()->default_value("0"), "FLOAT") ("max", "Max score", cxxopts::value<double>(), "FLOAT") ("charge", "Spectrum parameter", cxxopts::value<unsigned>()->default_value("1"), "FLOAT"); options.add_options("Advanced") ("phi_begin", "Initial phi value (WL option)", cxxopts::value<double>()->default_value("1.822"), "FLOAT") ("phi_end", "Final phi value (WL option)", cxxopts::value<double>()->default_value("1"), "FLOAT") ("step", "Length of Wang-Landau iteration", cxxopts::value<unsigned>()->default_value("10000"), "N") ("run_iter", "Number of Monte-Carlo iterations", cxxopts::value<unsigned>()->default_value("50000"), "N") ("eps", "Accuracy", cxxopts::value<double>()->default_value("0.02"), "FLOAT") ("level", "Quantile level for confident interval", cxxopts::value<double>()->default_value("0.95"), "FLOAT") ("product_ion_thresh", "Score parameter", cxxopts::value<double>()->default_value("0.5"), "FLOAT"); const std::vector<std::string> all_groups({"General", "Advanced"}); // options.parse_positional(std::vector<std::string>({"spectrum", "matrix", "rule"})); options.parse(argc, argv); if (options.count("help")) { std::cout << options.help(all_groups) << std::endl; exit(0); } std::ifstream file_mat(options["matrix"].as<std::string>()); std::ifstream file_rule(options["rule"].as<std::string>()); std::ifstream file_spectrum(options["spectrum"].as<std::string>()); double NLP_MASS = options["precursor"].as<double>(); double MIN_SCORE = options["min"].as<double>(); double MAX_SCORE = options["max"].as<double>(); unsigned CHARGE = options["charge"].as<unsigned>(); double PHI_B = options["phi_begin"].as<double>(); double PHI_E = options["phi_end"].as<double>(); unsigned STEP_LENGTH = options["step"].as<unsigned>(); unsigned MIN_STEPS_RUN = options["run_iter"].as<unsigned>(); double EPS = options["eps"].as<double>(); double LEVEL = options["level"].as<double>(); double PRODUCT_ION_THRESH = options["product_ion_thresh"].as<double>(); std::vector<std::vector<double> > mat; double elem; std::string line; while(!file_mat.eof()) { getline(file_mat, line); std::istringstream iss(line); std::vector<double> row; while (iss >> elem) { row.push_back(elem); } mat.push_back(row); } int nrow = mat.size() - 1; int ncol = mat[0].size(); std::cout << nrow << " " << ncol << std::endl; std::vector<std::pair<unsigned, unsigned> > rule; double elem1, elem2; int i = 0; std::istringstream iss(line); while(file_rule >> elem1 >> elem2) { rule.push_back(std::make_pair(elem1, elem2)); } std::vector<double> exp_spectrum; while(!file_spectrum.eof()) { getline(file_spectrum, line); std::istringstream iss(line); while (iss >> elem) { exp_spectrum.push_back(elem); } } file_mat.close(); file_rule.close(); file_spectrum.close(); pcg_extras::seed_seq_from<std::random_device> rd; // for (int i = 0; i < nrow; ++i) { // for (int j = 0; j < ncol; ++j) { // std::cout << mat[i][j] << " "; // } // std::cout << std::endl; // } // std::cout << " ----------------- " << std::endl; // for (int i = 0; i < rule.size(); ++i) { // std::cout << rule[i].first << " " << rule[i].second << std::endl; // } // std::cout << " ----------------- " << std::endl; // std::cout << MIN_SCORE << " " << MAX_SCORE << " " << PHI_B << " " << // PHI_E << " " << STEP_LENGTH << " " << NLP_MASS << std::endl; // set scorer and metropolis parameters Spectrum spectrum(exp_spectrum, CHARGE); SPCScorer scorer(PRODUCT_ION_THRESH); // std::vector<double> start_mass(st_m, st_m + sizeof(st_m) / sizeof(st_m[0])); // MHstate state(start_mass); MHstate state(ncol, NLP_MASS, rd); Peptide peptide(mat, rule, state.get_current_state_(), NLP_MASS); Metropolis mh(mat, rule, NLP_MASS, MIN_SCORE, MAX_SCORE, state, peptide, spectrum, scorer); // get weights WLsimulator wl(mh, PHI_B, PHI_E, STEP_LENGTH); // wl.print(); // wl.wl_step(rd, PHI_B, true); std::vector<double> weights; weights = wl.wl_full(rd, false); // std::cout << "Wang-Landau weights" << std::endl; // for (auto & w: weights) { // std::cout << w << " " ; // } // std::cout << std::endl << std::endl; // // mh step mh.hit_run(rd, MIN_STEPS_RUN, EPS, LEVEL, weights); return 0; }