void Data::loadNBest(const string &file) { TRACE_ERR("loading nbest from " << file << endl); inputfilestream inp(file); // matches a stream with a file. Opens the file if (!inp.good()) throw runtime_error("Unable to open: " + file); ScoreStats scoreentry; string line, sentence_index, sentence, feature_str; while (getline(inp, line, '\n')) { if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); getNextPound(line, sentence_index, "|||"); // first field getNextPound(line, sentence, "|||"); // second field getNextPound(line, feature_str, "|||"); // third field m_scorer->prepareStats(sentence_index, sentence, scoreentry); m_score_data->add(scoreentry, sentence_index); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, sentence_index); } inp.close(); }
void Data::loadNBest(const string &file, bool oneBest) { TRACE_ERR("loading nbest from " << file << endl); util::FilePiece in(file.c_str()); ScoreStats scoreentry; string sentence, feature_str, alignment; int sentence_index; while (true) { try { StringPiece line = in.ReadLine(); if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||")); sentence_index = ParseInt(*it); if (oneBest && m_score_data->exists(sentence_index)) continue; ++it; sentence = it->as_string(); ++it; feature_str = it->as_string(); ++it; if (it) { ++it; // skip model score. if (it) { alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment ++it; if (it) { alignment = it->as_string(); //sixth field (if present) is word alignment } } } //TODO check alignment exists if scorers need it if (m_scorer->useAlignment()) { sentence += "|||"; sentence += alignment; } m_scorer->prepareStats(sentence_index, sentence, scoreentry); m_score_data->add(scoreentry, sentence_index); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, sentence_index); } catch (util::EndOfFileException &e) { PrintUserTime("Loaded N-best lists"); break; } } }
void DataAsiya::loadNBest(const string &file) { TRACE_ERR("loading nbest from DataAsiya " << file << endl); inputfilestream inp(file); // matches a stream with a file. Opens the file if (!inp.good()) throw runtime_error("Unable to open: " + file); ScoreStats scoreentry; string line, sentence_index, sentence, feature_str, alignment; AsiyaScorer* a_scorer = dynamic_cast<AsiyaScorer*>(m_scorer); /*todo. change this loop. instead of obtaining the score for each sentence, obtain all the scores at once!*/ while (getline(inp, line, '\n')) { if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); getNextPound(line, sentence_index, "|||"); // first field getNextPound(line, sentence, "|||"); // second field getNextPound(line, feature_str, "|||"); // third field if (line.length() > 0) { string temp; getNextPound(line, temp, "|||"); //fourth field sentence score if (line.length() > 0) { getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer } } //TODO check alignment exists if scorers need it if (a_scorer->useAlignment()) { sentence += "|||"; sentence += alignment; } // prepare stats gets all the scores for sentence_i of sentence_index // a_scorer->addCandidateSentence(sentence_index, sentence); a_scorer->prepareStats(atoi(sentence_index.c_str()), sentence, scoreentry); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, atoi(sentence_index.c_str())); } a_scorer->doScoring(); // TRACE_ERR("before getAllScoreStats" << endl); std::vector<std::vector <ScoreStats> > allScoreStats = a_scorer->getAllScoreStats(); for (int i = 0; i < allScoreStats.size(); ++i) for(int j = 0; j < allScoreStats[i].size(); ++j) { stringstream ss; ss << i; m_score_data->add(allScoreStats[i][j], atoi(ss.str().c_str())); // TRACE_ERR("allScoreStats[" << i << "].size() " << allScoreStats[i].size() << " " << allScoreStats[i][j] << endl); } inp.close(); // a_scorer->doScoring( m_score_data ); //score each sentence //a_scorer->prepareStats(sentence_index, sentence, scoreentry); // save the score for previous sentence. Do it aling with previous function //m_score_data->add(scoreentry, sentence_index); }