bool operator==(const ScoreStats& s1, const ScoreStats& s2) { size_t size = s1.size(); if (size != s2.size()) return false; for (size_t k=0; k < size; k++) { if (s1.get(k) != s2.get(k)) return false; } return true; }
// really not the right place... float sentenceLevelBleuPlusOne( ScoreStats &stats ) { float logbleu = 0.0; const unsigned int bleu_order = 4; for (unsigned int j=0; j<bleu_order; j++) { //cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " "; logbleu += log(stats.get(2*j)+1) - log(stats.get(2*j+1)+1); } logbleu /= bleu_order; float brevity = 1.0 - (float)stats.get(bleu_order*2)/stats.get(1); if (brevity < 0.0) { logbleu += brevity; } //cerr << brevity << " -> " << exp(logbleu) << endl; return exp(logbleu); }
void InterpolatedScorer::setScoreData(ScoreData* data) { size_t last = 0; m_score_data = data; for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin(); itsc != m_scorers.end(); ++itsc) { int numScoresScorer = (*itsc)->NumberOfScores(); ScoreData* newData =new ScoreData(*itsc); for (size_t i = 0; i < data->size(); i++) { ScoreArray scoreArray = data->get(i); ScoreArray newScoreArray; std::string istr; std::stringstream out; out << i; istr = out.str(); size_t numNBest = scoreArray.size(); //cout << " Datasize " << data->size() << " NumNBest " << numNBest << endl ; for (size_t j = 0; j < numNBest ; j++) { ScoreStats scoreStats = data->get(i, j); //cout << "Scorestats " << scoreStats << " i " << i << " j " << j << endl; ScoreStats newScoreStats; for (size_t k = last; k < size_t(numScoresScorer + last); k++) { ScoreStatsType score = scoreStats.get(k); newScoreStats.add(score); } //cout << " last " << last << " NumScores " << numScoresScorer << "newScorestats " << newScoreStats << endl; newScoreArray.add(newScoreStats); } newScoreArray.setIndex(istr); newData->add(newScoreArray); } //newData->dump(); // NOTE: This class takes the ownership of the heap allocated // ScoreData objects to avoid the memory leak issues. m_scorers_score_data.push_back(newData); (*itsc)->setScoreData(newData); last += numScoresScorer; } }
void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs, statscores_t& scores) const { if (!m_score_data) { throw runtime_error("Score data not loaded"); } // calculate the score for the candidates if (m_score_data->size() == 0) { throw runtime_error("Score data is empty"); } if (candidates.size() == 0) { throw runtime_error("No candidates supplied"); } int numCounts = m_score_data->get(0,candidates[0]).size(); vector<int> totals(numCounts); for (size_t i = 0; i < candidates.size(); ++i) { ScoreStats stats = m_score_data->get(i,candidates[i]); if (stats.size() != totals.size()) { stringstream msg; msg << "Statistics for (" << "," << candidates[i] << ") have incorrect " << "number of fields. Found: " << stats.size() << " Expected: " << totals.size(); throw runtime_error(msg.str()); } for (size_t k = 0; k < totals.size(); ++k) { totals[k] += stats.get(k); } } scores.push_back(calculateScore(totals)); candidates_t last_candidates(candidates); // apply each of the diffs, and get new scores for (size_t i = 0; i < diffs.size(); ++i) { for (size_t j = 0; j < diffs[i].size(); ++j) { size_t sid = diffs[i][j].first; size_t nid = diffs[i][j].second; size_t last_nid = last_candidates[sid]; for (size_t k = 0; k < totals.size(); ++k) { int diff = m_score_data->get(sid,nid).get(k) - m_score_data->get(sid,last_nid).get(k); totals[k] += diff; } last_candidates[sid] = nid; } scores.push_back(calculateScore(totals)); } // Regularisation. This can either be none, or the min or average as described in // Cer, Jurafsky and Manning at WMT08. if (m_regularization_type == NONE || m_regularization_window <= 0) { // no regularisation return; } // window size specifies the +/- in each direction statscores_t raw_scores(scores); // copy scores for (size_t i = 0; i < scores.size(); ++i) { size_t start = 0; if (i >= m_regularization_window) { start = i - m_regularization_window; } const size_t end = min(scores.size(), i + m_regularization_window + 1); if (m_regularization_type == AVERAGE) { scores[i] = score_average(raw_scores,start,end); } else { scores[i] = score_min(raw_scores,start,end); } } }