bool operator==(const ScoreStats& s1, const ScoreStats& s2) { size_t size = s1.size(); if (size != s2.size()) return false; for (size_t k=0; k < size; k++) { if (s1.get(k) != s2.get(k)) return false; } return true; }
void ScoreStats::Copy(const ScoreStats &stats) { m_available_size = stats.available(); m_entries = stats.size(); m_array = new ScoreStatsType[m_available_size]; memcpy(m_array, stats.getArray(), GetArraySizeWithBytes()); }
int main(int argc, char **argv) { if (argc == 1) { cerr << "Usage: ./sentence-bleu ref1 [ref2 ...] < candidate > bleu-scores" << endl; return 1; } vector<string> refFiles(argv + 1, argv + argc); // TODO all of these are empty for now string config; string factors; string filter; BleuScorer scorer(config); scorer.setFactors(factors); scorer.setFilter(filter); // initialize reference streams vector<boost::shared_ptr<ifstream> > refStreams; for (vector<string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) { TRACE_ERR("Loading reference from " << *refFile << endl); boost::shared_ptr<ifstream> ifs(new ifstream(refFile->c_str())); UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile); refStreams.push_back(ifs); } // load sentences, preparing statistics, score string hypothesisLine; size_t sid = 0; while (getline(std::cin, hypothesisLine)) { Reference ref; if (!scorer.GetNextReferenceFromStreams(refStreams, ref)) { UTIL_THROW2("Missing references"); } ScoreStats scoreStats; scorer.CalcBleuStats(ref, hypothesisLine, scoreStats); vector<float> stats(scoreStats.getArray(), scoreStats.getArray() + scoreStats.size()); std::cout << smoothedSentenceBleu(stats) << std::endl; ++sid; } return 0; }
void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs, statscores_t& scores) const { if (!m_score_data) { throw runtime_error("Score data not loaded"); } // calculate the score for the candidates if (m_score_data->size() == 0) { throw runtime_error("Score data is empty"); } if (candidates.size() == 0) { throw runtime_error("No candidates supplied"); } int numCounts = m_score_data->get(0,candidates[0]).size(); vector<int> totals(numCounts); for (size_t i = 0; i < candidates.size(); ++i) { ScoreStats stats = m_score_data->get(i,candidates[i]); if (stats.size() != totals.size()) { stringstream msg; msg << "Statistics for (" << "," << candidates[i] << ") have incorrect " << "number of fields. Found: " << stats.size() << " Expected: " << totals.size(); throw runtime_error(msg.str()); } for (size_t k = 0; k < totals.size(); ++k) { totals[k] += stats.get(k); } } scores.push_back(calculateScore(totals)); candidates_t last_candidates(candidates); // apply each of the diffs, and get new scores for (size_t i = 0; i < diffs.size(); ++i) { for (size_t j = 0; j < diffs[i].size(); ++j) { size_t sid = diffs[i][j].first; size_t nid = diffs[i][j].second; size_t last_nid = last_candidates[sid]; for (size_t k = 0; k < totals.size(); ++k) { int diff = m_score_data->get(sid,nid).get(k) - m_score_data->get(sid,last_nid).get(k); totals[k] += diff; } last_candidates[sid] = nid; } scores.push_back(calculateScore(totals)); } // Regularisation. This can either be none, or the min or average as described in // Cer, Jurafsky and Manning at WMT08. if (m_regularization_type == NONE || m_regularization_window <= 0) { // no regularisation return; } // window size specifies the +/- in each direction statscores_t raw_scores(scores); // copy scores for (size_t i = 0; i < scores.size(); ++i) { size_t start = 0; if (i >= m_regularization_window) { start = i - m_regularization_window; } const size_t end = min(scores.size(), i + m_regularization_window + 1); if (m_regularization_type == AVERAGE) { scores[i] = score_average(raw_scores,start,end); } else { scores[i] = score_min(raw_scores,start,end); } } }