void PhraseDictionaryOnDisk::InitializeForInput(InputType const& source) { const StaticData &staticData = StaticData::Instance(); ReduceCache(); OnDiskPt::OnDiskWrapper *obj = new OnDiskPt::OnDiskWrapper(); obj->BeginLoad(m_filePath); UTIL_THROW_IF2(obj->GetMisc("Version") != OnDiskPt::OnDiskWrapper::VERSION_NUM, "On-disk phrase table is version " << obj->GetMisc("Version") << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM); UTIL_THROW_IF2(obj->GetMisc("NumSourceFactors") != m_input.size(), "On-disk phrase table has " << obj->GetMisc("NumSourceFactors") << " source factors." << ". The ini file specified " << m_input.size() << " source factors"); UTIL_THROW_IF2(obj->GetMisc("NumTargetFactors") != m_output.size(), "On-disk phrase table has " << obj->GetMisc("NumTargetFactors") << " target factors." << ". The ini file specified " << m_output.size() << " target factors"); UTIL_THROW_IF2(obj->GetMisc("NumScores") != m_numScoreComponents, "On-disk phrase table has " << obj->GetMisc("NumScores") << " scores." << ". The ini file specified " << m_numScoreComponents << " scores"); m_implementation.reset(obj); }
bool Vocab::Load(FileHandler* vcbin, const FactorDirection& direction, const FactorList& factors, bool closed) { // load vocab id -> word mapping m_words2ids.clear(); // reset mapping m_ids2words.clear(); std::string line, word_str; wordID_t id; std::istream &ret = getline(*vcbin, line); UTIL_THROW_IF2(!ret, "Couldn't read file"); std::istringstream first(line.c_str()); uint32_t vcbsize(0); first >> vcbsize; uint32_t loadedsize = 0; while (loadedsize++ < vcbsize && getline(*vcbin, line)) { std::istringstream entry(line.c_str()); entry >> word_str; Word word; word.CreateFromString( direction, factors, word_str, false); // TODO set correctly isNonTerminal entry >> id; // may be no id (i.e. file may just be a word list) if (id == 0 && word != GetkOOVWord()) id = m_ids2words.size() + 1; // assign ids sequentially starting from 1 UTIL_THROW_IF2(m_ids2words.count(id) != 0 || m_words2ids.count(word) != 0, "Error"); m_ids2words[id] = word; m_words2ids[word] = id; } m_closed = closed; // once loaded fix vocab ? std::cerr << "Loaded vocab with " << m_ids2words.size() << " words." << std::endl; return true; }
void Model1Vocabulary::Load(const std::string& fileName) { InputFileStream inFile(fileName); FactorCollection &factorCollection = FactorCollection::Instance(); std::string line; unsigned i = 0; if ( getline(inFile, line) ) { // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this ++i; std::vector<std::string> tokens = Tokenize(line); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); unsigned id = atoll( tokens[0].c_str() ); if (! ( (id == 1) && (tokens[1] == "UNK") )) { const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading? bool stored = Store(factor, id); UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry."); } } while ( getline(inFile, line) ) { ++i; std::vector<std::string> tokens = Tokenize(line); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); unsigned id = atoll( tokens[0].c_str() ); const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading? bool stored = Store(factor, id); UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry."); } inFile.Close(); }
/*** * print surface factor only for the given phrase */ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors) const { UTIL_THROW_IF2(outputFactorOrder.size() == 0, "Cannot be empty phrase"); if (reportAllFactors == true) { out << phrase; } else { size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); out << *factor; UTIL_THROW_IF2(factor == NULL, "Empty factor 0 at position " << pos); for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); UTIL_THROW_IF2(factor == NULL, "Empty factor " << i << " at position " << pos); out << "|" << *factor; } out << " "; } } }
void WordTranslationFeature::Load(AllOptions::ptr const& opts) { m_options = opts; // load word list for restricted feature set if (m_filePathSource.empty()) { return; } //else if (tokens.size() == 8) { FEATUREVERBOSE(1, "Loading word translation word lists from " << m_filePathSource << " and " << m_filePathTarget << std::endl); if (m_domainTrigger) { // domain trigger terms for each input document ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabDomain.resize(m_vocabDomain.size() + 1); vector<string> termVector; boost::split(termVector, line, boost::is_any_of("\t ")); for (size_t i=0; i < termVector.size(); ++i) m_vocabDomain.back().insert(termVector[i]); } inFileSource.close(); } else if (!m_filePathSource.empty() || !m_filePathTarget.empty()) { return; // restricted source word vocabulary ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabSource.insert(line); } inFileSource.close(); // restricted target word vocabulary ifstream inFileTarget(m_filePathTarget.c_str()); UTIL_THROW_IF2(!inFileTarget, "could not open file " << m_filePathTarget); while (getline(inFileTarget, line)) { m_vocabTarget.insert(line); } inFileTarget.close(); m_unrestricted = false; } }
void PhrasePairFeature::Load() { if (m_domainTrigger) { // domain trigger terms for each input document ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { std::set<std::string> terms; vector<string> termVector; boost::split(termVector, line, boost::is_any_of("\t ")); for (size_t i=0; i < termVector.size(); ++i) terms.insert(termVector[i]); // add term set for current document m_vocabDomain.push_back(terms); } inFileSource.close(); } else { // restricted source word vocabulary ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabSource.insert(line); } inFileSource.close(); /* // restricted target word vocabulary ifstream inFileTarget(filePathTarget.c_str()); if (!inFileTarget) { cerr << "could not open file " << filePathTarget << endl; return false; } while (getline(inFileTarget, line)) { m_vocabTarget.insert(line); } inFileTarget.close();*/ m_unrestricted = false; } }
void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t, size_t> > &ntAlign) { vector<string> toks; Tokenize(toks, phrase, " "); for (size_t i = 0; i < toks.size(); ++i) { string &tok = toks[i]; size_t tokLen = tok.size(); if (tok.substr(0, 1) == "[" && tok.substr(tokLen - 1, 1) == "]") { // no-term vector<string> split = Tokenize(tok, ","); UTIL_THROW_IF2(split.size() != 2, "Incorrectly formmatted non-terminal: " << tok); tok = "[X]" + split[0] + "]"; size_t coIndex = Scan<size_t>(split[1]); pair<size_t, size_t> &alignPoint = ntAlign[coIndex]; if (sourceTarget == 0) { alignPoint.first = i; } else { alignPoint.second = i; } } } phrase = Join(" ", toks) + " [X]"; }
bool SoftMatchingFeature::Load(const std::string& filePath) { StaticData &SD = StaticData::InstanceNonConst(); InputFileStream inStream(filePath); std::string line; while(getline(inStream, line)) { std::vector<std::string> tokens = Tokenize(line); UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line"); // no soft matching necessary if LHS and RHS are the same if (tokens[0] == tokens[1]) { continue; } Word LHS, RHS; LHS.CreateFromString(Output, SD.options()->output.factor_order, tokens[0], true); RHS.CreateFromString(Output, SD.options()->output.factor_order, tokens[1], true); m_softMatches[RHS[0]->GetId()].push_back(LHS); GetOrSetFeatureName(RHS, LHS); } SD.SetSoftMatches(m_softMatches); return true; }
std::vector<float> ConstrainedDecoding::DefaultWeights() const { UTIL_THROW_IF2(m_numScoreComponents != 1, "ConstrainedDecoding must only have 1 score"); vector<float> ret(1, 1); return ret; }
std::vector<float> ControlRecombination::DefaultWeights() const { UTIL_THROW_IF2(m_numScoreComponents, "ControlRecombination should not have any scores"); vector<float> ret(0); return ret; }
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) { AlignmentInfo::CollType alignTerm, alignNonTerm; for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) { util::TokenIter<util::SingleCharacter, false> dash(*token, util::SingleCharacter('-')); char *endptr; size_t sourcePos = strtoul(dash->data(), &endptr, 10); UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); ++dash; size_t targetPos = strtoul(dash->data(), &endptr, 10); UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); UTIL_THROW_IF2(++dash, "Extra gunk in alignment " << *token); if (GetWord(targetPos).IsNonTerminal()) { alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } else { alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } } SetAlignTerm(alignTerm); SetAlignNonTerm(alignNonTerm); }
void TargetPhraseImpl::SetAlignmentInfo(const std::string &alignString) { AlignmentInfo::CollType alignTerm, alignNonTerm; vector<string> toks = Tokenize(alignString); for (size_t i = 0; i < toks.size(); ++i) { vector<size_t> alignPair = Tokenize<size_t>(toks[i], "-"); UTIL_THROW_IF2(alignPair.size() != 2, "Wrong alignment format"); size_t sourcePos = alignPair[0]; size_t targetPos = alignPair[1]; if ((*this)[targetPos].isNonTerminal) { alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } else { alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } } SetAlignTerm(alignTerm); SetAlignNonTerm(alignNonTerm); // cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n"; //cerr << "alignTerm=" << alignTerm.size() << endl; //cerr << "alignNonTerm=" << alignNonTerm.size() << endl; }
ChartHypothesis *RuleCubeItem::ReleaseHypothesis() { UTIL_THROW_IF2(m_hypothesis == NULL, "Hypothesis is NULL"); ChartHypothesis *hypo = m_hypothesis; m_hypothesis = NULL; return hypo; }
void ChartParser::CreateInputPaths(const InputType &input) { size_t size = input.GetSize(); m_inputPathMatrix.resize(size); UTIL_THROW_IF2(input.GetType() != SentenceInput && input.GetType() != TreeInputType, "Input must be a sentence or a tree, not lattice or confusion networks"); for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) { for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) { size_t endPos = startPos + phaseSize -1; vector<InputPath*> &vec = m_inputPathMatrix[startPos]; WordsRange range(startPos, endPos); Phrase subphrase(input.GetSubString(WordsRange(startPos, endPos))); const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos); InputPath *node; if (range.GetNumWordsCovered() == 1) { node = new InputPath(subphrase, labels, range, NULL, NULL); vec.push_back(node); } else { const InputPath &prevNode = GetInputPath(startPos, endPos - 1); node = new InputPath(subphrase, labels, range, &prevNode, NULL); vec.push_back(node); } //m_inputPathQueue.push_back(node); } } }
const PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(long translationId) const { std::map<long, PhraseDictionaryNodeMemory>::const_iterator iter = m_collection.find(translationId); UTIL_THROW_IF2(iter == m_collection.end(), "Couldn't find root node for input: " << translationId); return iter->second; }
std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const { std::vector<TargetPhrase*> ret; string outPath = outDir + "/out.txt"; ifstream outStream(outPath.c_str()); string line; while (getline(outStream, line)) { vector<string> toks; Tokenize(toks, line, "\t"); UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore"); TargetPhrase *tp = new TargetPhrase(); Word &word = tp->AddWord(); word.CreateFromString(Output, m_output, toks[0], false); float score = Scan<float>(toks[1]); tp->GetScoreBreakdown().PlusEquals(this, score); // score of all other ff when this rule is being loaded tp->Evaluate(sourcePhrase, GetFeaturesToApply()); ret.push_back(tp); } outStream.close(); return ret; }
void LexicalReorderingTableCompact:: Load(std::string filePath) { std::FILE* pFile = std::fopen(filePath.c_str(), "r"); UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened"); //if(m_inMemory) m_hash.Load(pFile); //else //m_hash.LoadIndex(pFile); size_t read = 0; read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile); read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, pFile); if(m_multipleScoreTrees) { m_scoreTrees.resize(m_numScoreComponent); for(size_t i = 0; i < m_numScoreComponent; i++) m_scoreTrees[i] = new CanonicalHuffman<float>(pFile); } else { m_scoreTrees.resize(1); m_scoreTrees[0] = new CanonicalHuffman<float>(pFile); } if(m_inMemory) m_scoresMemory.load(pFile, false); else m_scoresMapped.load(pFile, true); }
const Word &InputPath::GetLastWord() const { size_t len = m_phrase.GetSize(); UTIL_THROW_IF2(len == 0, "Input path phrase cannot be empty"); const Word &ret = m_phrase.GetWord(len - 1); return ret; }
void Manager::OutputBest(OutputCollector *collector) const { if (!collector) { return; } std::ostringstream out; FixPrecision(out); const SHyperedge *best = GetBestSHyperedge(); if (best == NULL) { VERBOSE(1, "NO BEST TRANSLATION" << std::endl); if (StaticData::Instance().GetOutputHypoScore()) { out << "0 "; } out << '\n'; } else { if (StaticData::Instance().GetOutputHypoScore()) { out << best->label.score << " "; } Phrase yield = GetOneBestTargetYield(*best); // delete 1st & last UTIL_THROW_IF2(yield.GetSize() < 2, "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); yield.RemoveWord(0); yield.RemoveWord(yield.GetSize()-1); out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); out << '\n'; } collector->Write(m_source.GetTranslationId(), out.str()); }
Scores LexicalReorderingTableTree:: auxFindScoreForContext(const Candidates& cands, const Phrase& context) { if(m_FactorsC.empty()) { UTIL_THROW_IF2(cands.size() > 1, "Error"); return (cands.size() == 1) ? cands[0].GetScore(0) : Scores(); } else { std::vector<std::string> cvec; for(size_t i = 0; i < context.GetSize(); ++i) cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false)); IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId); IPhrase sub_c; IPhrase::iterator start = c.begin(); for(size_t j = 0; j <= context.GetSize(); ++j, ++start) { sub_c.assign(start, c.end()); for(size_t cand = 0; cand < cands.size(); ++cand) { IPhrase p = cands[cand].GetPhrase(0); if(cands[cand].GetPhrase(0) == sub_c) return cands[cand].GetScore(0); } } return Scores(); } }
Scores LexicalReorderingTableTree:: GetScore(const Phrase& f, const Phrase& e, const Phrase& c) { if((!m_FactorsF.empty() && 0 == f.GetSize()) || (!m_FactorsE.empty() && 0 == e.GetSize())) { //NOTE: no check for c as c might be empty, e.g. start of sentence //not a proper key // phi: commented out, since e may be empty (drop-unknown) //std::cerr << "Not a proper key!\n"; return Scores(); } CacheType::iterator i; if(m_UseCache) { std::pair<CacheType::iterator, bool> r; r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates())); if(!r.second) return auxFindScoreForContext((r.first)->second, c); i = r.first; } else if((i = m_Cache.find(MakeCacheKey(f,e))) != m_Cache.end()) // although we might not be caching now, cache might be none empty! return auxFindScoreForContext(i->second, c); // not in cache => go to file... Candidates cands; m_Table->GetCandidates(MakeTableKey(f,e), &cands); if(cands.empty()) return Scores(); if(m_UseCache) i->second = cands; if(m_FactorsC.empty()) { UTIL_THROW_IF2(1 != cands.size(), "Error"); return cands[0].GetScore(0); } else return auxFindScoreForContext(cands, c); };
ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory( const ChartParser &parser, const ChartCellCollectionBase &cellColl, const PhraseDictionaryMemory &ruleTable) : ChartRuleLookupManagerCYKPlus(parser, cellColl) , m_ruleTable(ruleTable) { UTIL_THROW_IF2(m_dottedRuleColls.size() != 0, "Dotted rule collection not correctly initialized"); size_t sourceSize = parser.GetSize(); m_dottedRuleColls.resize(sourceSize); const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode(); for (size_t ind = 0; ind < m_dottedRuleColls.size(); ++ind) { #ifdef USE_BOOST_POOL DottedRuleInMemory *initDottedRule = m_dottedRulePool.malloc(); new (initDottedRule) DottedRuleInMemory(rootNode); #else DottedRuleInMemory *initDottedRule = new DottedRuleInMemory(rootNode); #endif DottedRuleColl *dottedRuleColl = new DottedRuleColl(sourceSize - ind + 1); dottedRuleColl->Add(0, initDottedRule); // init rule. stores the top node in tree m_dottedRuleColls[ind] = dottedRuleColl; } }
bool SoftMatchingFeature::Load(const std::string& filePath) { StaticData &staticData = StaticData::InstanceNonConst(); InputFileStream inStream(filePath); std::string line; while(getline(inStream, line)) { std::vector<std::string> tokens = Tokenize(line); UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line"); // no soft matching necessary if LHS and RHS are the same if (tokens[0] == tokens[1]) { continue; } Word LHS, RHS; LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true); RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true); m_soft_matches[LHS].insert(RHS); m_soft_matches_reverse[RHS].insert(LHS); } staticData.Set_Soft_Matches(Get_Soft_Matches()); staticData.Set_Soft_Matches_Reverse(Get_Soft_Matches_Reverse()); return true; }
InputPath &ChartParser::GetInputPath(size_t startPos, size_t endPos) { size_t offset = endPos - startPos; UTIL_THROW_IF2(offset >= m_inputPathMatrix[startPos].size(), "Out of bound: " << offset); return *m_inputPathMatrix[startPos][offset]; }
ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk( const ChartParser &parser, const ChartCellCollectionBase &cellColl, const PhraseDictionaryOnDisk &dictionary, OnDiskPt::OnDiskWrapper &dbWrapper, const std::vector<FactorType> &inputFactorsVec, const std::vector<FactorType> &outputFactorsVec) : ChartRuleLookupManagerCYKPlus(parser, cellColl) , m_dictionary(dictionary) , m_dbWrapper(dbWrapper) , m_inputFactorsVec(inputFactorsVec) , m_outputFactorsVec(outputFactorsVec) { UTIL_THROW_IF2(m_expandableDottedRuleListVec.size() != 0, "Dotted rule collection not correctly initialized"); size_t sourceSize = parser.GetSize(); m_expandableDottedRuleListVec.resize(sourceSize); for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) { DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode()); DottedRuleStackOnDisk *processedStack = new DottedRuleStackOnDisk(sourceSize - ind + 1); processedStack->Add(0, initDottedRule); // init rule. stores the top node in tree m_expandableDottedRuleListVec[ind] = processedStack; } }
RuleTrieCYKPlus::Node *RuleTrieCYKPlus::Node::GetOrCreateNonTerminalChild(const Word &targetNonTerm) { UTIL_THROW_IF2(!targetNonTerm.IsNonTerminal(), "Not a non-terminal: " << targetNonTerm); return &m_nonTermMap[targetNonTerm]; }
const OnDiskPt::OnDiskWrapper &PhraseDictionaryOnDisk::GetImplementation() const { OnDiskPt::OnDiskWrapper* dict; dict = m_implementation.get(); UTIL_THROW_IF2(dict == NULL, "Dictionary object not yet created for this thread"); return *dict; }
const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool isNonTerminal) { FactorFriend to_ins; to_ins.in.m_string = factorString; to_ins.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId; Set & set = (isNonTerminal) ? m_set : m_setNonTerminal; // If we're threaded, hope a read-only lock is sufficient. #ifdef WITH_THREADS { // read=lock scope boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock); Set::const_iterator i = set.find(to_ins); if (i != set.end()) return &i->in; } boost::unique_lock<boost::shared_mutex> lock(m_accessLock); #endif // WITH_THREADS std::pair<Set::iterator, bool> ret(set.insert(to_ins)); if (ret.second) { ret.first->in.m_string.set( memcpy(m_string_backing.Allocate(factorString.size()), factorString.data(), factorString.size()), factorString.size()); if (isNonTerminal) { m_factorIdNonTerminal++; UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile"); } else { m_factorId++; } } return &ret.first->in; }
PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDictionaryNodeMemory &rootNode , const Phrase &source , const TargetPhrase &target , const Word *sourceLHS) { cerr << source << endl << target << endl; const size_t size = source.GetSize(); const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm(); AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin(); PhraseDictionaryNodeMemory *currNode = &rootNode; for (size_t pos = 0 ; pos < size ; ++pos) { const Word& word = source.GetWord(pos); if (word.IsNonTerminal()) { // indexed by source label 1st const Word &sourceNonTerm = word; UTIL_THROW_IF2(iterAlign == alignmentInfo.end(), "No alignment for non-term at position " << pos); UTIL_THROW_IF2(iterAlign->first != pos, "Alignment info incorrect at position " << pos); size_t targetNonTermInd = iterAlign->second; ++iterAlign; const Word &targetNonTerm = target.GetWord(targetNonTermInd); #if defined(UNLABELLED_SOURCE) currNode = currNode->GetOrCreateNonTerminalChild(targetNonTerm); #else currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm); #endif } else { currNode = currNode->GetOrCreateChild(word); } UTIL_THROW_IF2(currNode == NULL, "Node not found at position " << pos); } // finally, the source LHS //currNode = currNode->GetOrCreateChild(sourceLHS); return *currNode; }
void TranslationOptionCollectionLattice::CreateTranslationOptions() { GetTargetPhraseCollectionBatch(); VERBOSE(2,"Translation Option Collection\n " << *this << endl); const vector <DecodeGraph*> &decodeGraphs = StaticData::Instance().GetDecodeGraphs(); UTIL_THROW_IF2(decodeGraphs.size() != 1, "Multiple decoder graphs not supported yet"); const DecodeGraph &decodeGraph = *decodeGraphs[0]; UTIL_THROW_IF2(decodeGraph.GetSize() != 1, "Factored decomposition not supported yet"); const DecodeStep &decodeStep = **decodeGraph.begin(); const PhraseDictionary &phraseDictionary = *decodeStep.GetPhraseDictionaryFeature(); for (size_t i = 0; i < m_inputPathQueue.size(); ++i) { const InputPath &path = *m_inputPathQueue[i]; const TargetPhraseCollection *tpColl = path.GetTargetPhrases(phraseDictionary); const WordsRange &range = path.GetWordsRange(); if (tpColl) { TargetPhraseCollection::const_iterator iter; for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) { const TargetPhrase &tp = **iter; TranslationOption *transOpt = new TranslationOption(range, tp); transOpt->SetInputPath(path); transOpt->Evaluate(m_source); Add(transOpt); } } else if (path.GetPhrase().GetSize() == 1) { // unknown word processing ProcessOneUnknownWord(path, path.GetWordsRange().GetEndPos(), 1, path.GetInputScore()); } } // Prune Prune(); Sort(); // future score matrix CalcFutureScore(); // Cached lex reodering costs CacheLexReordering(); }