void WordTranslationFeature::Load(AllOptions::ptr const& opts) { m_options = opts; // load word list for restricted feature set if (m_filePathSource.empty()) { return; } //else if (tokens.size() == 8) { FEATUREVERBOSE(1, "Loading word translation word lists from " << m_filePathSource << " and " << m_filePathTarget << std::endl); if (m_domainTrigger) { // domain trigger terms for each input document ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabDomain.resize(m_vocabDomain.size() + 1); vector<string> termVector; boost::split(termVector, line, boost::is_any_of("\t ")); for (size_t i=0; i < termVector.size(); ++i) m_vocabDomain.back().insert(termVector[i]); } inFileSource.close(); } else if (!m_filePathSource.empty() || !m_filePathTarget.empty()) { return; // restricted source word vocabulary ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabSource.insert(line); } inFileSource.close(); // restricted target word vocabulary ifstream inFileTarget(m_filePathTarget.c_str()); UTIL_THROW_IF2(!inFileTarget, "could not open file " << m_filePathTarget); while (getline(inFileTarget, line)) { m_vocabTarget.insert(line); } inFileTarget.close(); m_unrestricted = false; } }
void PhrasePairFeature::Load() { if (m_domainTrigger) { // domain trigger terms for each input document ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { std::set<std::string> terms; vector<string> termVector; boost::split(termVector, line, boost::is_any_of("\t ")); for (size_t i=0; i < termVector.size(); ++i) terms.insert(termVector[i]); // add term set for current document m_vocabDomain.push_back(terms); } inFileSource.close(); } else { // restricted source word vocabulary ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabSource.insert(line); } inFileSource.close(); /* // restricted target word vocabulary ifstream inFileTarget(filePathTarget.c_str()); if (!inFileTarget) { cerr << "could not open file " << filePathTarget << endl; return false; } while (getline(inFileTarget, line)) { m_vocabTarget.insert(line); } inFileTarget.close();*/ m_unrestricted = false; } }
bool GlobalLexicalModelUnlimited::Load(const std::string &filePathSource, const std::string &filePathTarget) { // restricted source word vocabulary ifstream inFileSource(filePathSource.c_str()); if (!inFileSource) { cerr << "could not open file " << filePathSource << endl; return false; } std::string line; while (getline(inFileSource, line)) { m_vocabSource.insert(line); } inFileSource.close(); // restricted target word vocabulary ifstream inFileTarget(filePathTarget.c_str()); if (!inFileTarget) { cerr << "could not open file " << filePathTarget << endl; return false; } while (getline(inFileTarget, line)) { m_vocabTarget.insert(line); } inFileTarget.close(); m_unrestricted = false; return true; }