void Data::loadNBest(const string &file) { TRACE_ERR("loading nbest from " << file << endl); inputfilestream inp(file); // matches a stream with a file. Opens the file if (!inp.good()) throw runtime_error("Unable to open: " + file); ScoreStats scoreentry; string line, sentence_index, sentence, feature_str; while (getline(inp, line, '\n')) { if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); getNextPound(line, sentence_index, "|||"); // first field getNextPound(line, sentence, "|||"); // second field getNextPound(line, feature_str, "|||"); // third field m_scorer->prepareStats(sentence_index, sentence, scoreentry); m_score_data->add(scoreentry, sentence_index); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, sentence_index); } inp.close(); }
void Data::loadNBest(const string &file, bool oneBest) { TRACE_ERR("loading nbest from " << file << endl); util::FilePiece in(file.c_str()); ScoreStats scoreentry; string sentence, feature_str, alignment; int sentence_index; while (true) { try { StringPiece line = in.ReadLine(); if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||")); sentence_index = ParseInt(*it); if (oneBest && m_score_data->exists(sentence_index)) continue; ++it; sentence = it->as_string(); ++it; feature_str = it->as_string(); ++it; if (it) { ++it; // skip model score. if (it) { alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment ++it; if (it) { alignment = it->as_string(); //sixth field (if present) is word alignment } } } //TODO check alignment exists if scorers need it if (m_scorer->useAlignment()) { sentence += "|||"; sentence += alignment; } m_scorer->prepareStats(sentence_index, sentence, scoreentry); m_score_data->add(scoreentry, sentence_index); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, sentence_index); } catch (util::EndOfFileException &e) { PrintUserTime("Loaded N-best lists"); break; } } }
void Data::loadnbest(const std::string &file) { TRACE_ERR("loading nbest from " << file << std::endl); FeatureStats featentry; ScoreStats scoreentry; std::string sentence_index; inputfilestream inp(file); // matches a stream with a file. Opens the file if (!inp.good()) throw runtime_error("Unable to open: " + file); std::string substring, subsubstring, stringBuf; std::string theSentence; std::string::size_type loc; while (getline(inp,stringBuf,'\n')) { if (stringBuf.empty()) continue; // TRACE_ERR("stringBuf: " << stringBuf << std::endl); getNextPound(stringBuf, substring, "|||"); //first field sentence_index = substring; getNextPound(stringBuf, substring, "|||"); //second field theSentence = substring; // adding statistics for error measures featentry.reset(); scoreentry.clear(); theScorer->prepareStats(sentence_index, theSentence, scoreentry); scoredata->add(scoreentry, sentence_index); getNextPound(stringBuf, substring, "|||"); //third field // examine first line for name of features if (!existsFeatureNames()) { std::string stringsupport=substring; std::string features=""; std::string tmpname=""; size_t tmpidx=0; while (!stringsupport.empty()) { // TRACE_ERR("Decompounding: " << substring << std::endl); getNextPound(stringsupport, subsubstring); // string ending with ":" are skipped, because they are the names of the features if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) { features+=tmpname+"_"+stringify(tmpidx)+" "; tmpidx++; } // ignore sparse feature name else if (subsubstring.find("_") != string::npos) { // also ignore its value getNextPound(stringsupport, subsubstring); } // update current feature name else { tmpidx=0; tmpname=subsubstring.substr(0,subsubstring.size() - 1); } } featdata->setFeatureMap(features); } // adding features while (!substring.empty()) { // TRACE_ERR("Decompounding: " << substring << std::endl); getNextPound(substring, subsubstring); // no ':' -> feature value that needs to be stored if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) { featentry.add(ATOFST(subsubstring.c_str())); } // sparse feature name? store as well else if (subsubstring.find("_") != string::npos) { std::string name = subsubstring; getNextPound(substring, subsubstring); featentry.addSparse( name, atof(subsubstring.c_str()) ); _sparse_flag = true; } } //cerr << "number of sparse features: " << featentry.getSparse().size() << endl; featdata->add(featentry,sentence_index); } inp.close(); }
void DataAsiya::loadNBest(const string &file) { TRACE_ERR("loading nbest from DataAsiya " << file << endl); inputfilestream inp(file); // matches a stream with a file. Opens the file if (!inp.good()) throw runtime_error("Unable to open: " + file); ScoreStats scoreentry; string line, sentence_index, sentence, feature_str, alignment; AsiyaScorer* a_scorer = dynamic_cast<AsiyaScorer*>(m_scorer); /*todo. change this loop. instead of obtaining the score for each sentence, obtain all the scores at once!*/ while (getline(inp, line, '\n')) { if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); getNextPound(line, sentence_index, "|||"); // first field getNextPound(line, sentence, "|||"); // second field getNextPound(line, feature_str, "|||"); // third field if (line.length() > 0) { string temp; getNextPound(line, temp, "|||"); //fourth field sentence score if (line.length() > 0) { getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer } } //TODO check alignment exists if scorers need it if (a_scorer->useAlignment()) { sentence += "|||"; sentence += alignment; } // prepare stats gets all the scores for sentence_i of sentence_index // a_scorer->addCandidateSentence(sentence_index, sentence); a_scorer->prepareStats(atoi(sentence_index.c_str()), sentence, scoreentry); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, atoi(sentence_index.c_str())); } a_scorer->doScoring(); // TRACE_ERR("before getAllScoreStats" << endl); std::vector<std::vector <ScoreStats> > allScoreStats = a_scorer->getAllScoreStats(); for (int i = 0; i < allScoreStats.size(); ++i) for(int j = 0; j < allScoreStats[i].size(); ++j) { stringstream ss; ss << i; m_score_data->add(allScoreStats[i][j], atoi(ss.str().c_str())); // TRACE_ERR("allScoreStats[" << i << "].size() " << allScoreStats[i].size() << " " << allScoreStats[i][j] << endl); } inp.close(); // a_scorer->doScoring( m_score_data ); //score each sentence //a_scorer->prepareStats(sentence_index, sentence, scoreentry); // save the score for previous sentence. Do it aling with previous function //m_score_data->add(scoreentry, sentence_index); }