// Generate the target tree of the derivation d. TreePointer KBestExtractor::GetOutputTree(const Derivation &d) { const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation); if (const PhraseProperty *property = phrase.GetProperty("Tree")) { const std::string *tree = property->GetValueString(); TreePointer mytree (boost::make_shared<InternalTree>(*tree)); //get subtrees (in target order) std::vector<TreePointer> previous_trees; for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { const Word &word = phrase.GetWord(pos); if (word.IsNonTerminal()) { size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos]; const Derivation &subderivation = *d.subderivations[nonTermInd]; const TreePointer prev_tree = GetOutputTree(subderivation); previous_trees.push_back(prev_tree); } } mytree->Combine(previous_trees); return mytree; } else { UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found"); } }
FFState* SyntaxConstraintFeature::EvaluateChart(const ChartHypothesis& cur_hypo , int featureID /* used to index the state in the previous hypotheses */ , ScoreComponentCollection* accumulator) const { std::string tree; bool found = 0; cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", tree, found); TreePointer mytree (new InternalTree(tree)); //get subtrees (in target order) std::vector<TreePointer> previous_trees; for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) { const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos); if (word.IsNonTerminal()) { size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos]; const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd); const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID)); const TreePointer prev_tree = prev->GetTree(); previous_trees.push_back(prev_tree); } } mytree->Combine(previous_trees); return new TreeState(mytree); }
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) { if (tree->GetLength() > 0) { string head = getHead(tree); if (head.empty()) { for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) { extractHeadWordChain(*it, history, hwc); } } else { vector<string> new_history(kHwcmOrder); new_history[0] = head; hwc[0][head]++; for (size_t hist_idx = 0; hist_idx < kHwcmOrder-1; hist_idx++) { if (!history[hist_idx].empty()) { string chain = history[hist_idx] + " " + head; hwc[hist_idx+1][chain]++; if (hist_idx+2 < kHwcmOrder) { new_history[hist_idx+1] = chain; } } } for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) { extractHeadWordChain(*it, new_history, hwc); } } } }
string HwcmScorer::getHead(TreePointer tree) { // assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head) // if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) { TreePointer child = *it; if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) { return child->GetChildren()[0]->GetLabel(); } } return ""; }
void TBranchCollection::addBranchToTree(std::string branchLabel, std::string varType, TreePointer tree, bool isSingleValuePerEvent) { if ( isSingleValuePerEvent ) { // Make variable associated with this branch varMap_[branchLabel] = VarPointer(new float( -99 ) ); // Make branch tree->Branch( branchLabel.c_str(), varMap_[branchLabel].get(), ( branchLabel+"/"+varType).c_str() ); } else { // Make variable associated with this branch varVectorMap_[branchLabel] = VarVectorPointer(new std::vector<float>() ); // Make branch tree->Branch( branchLabel.c_str(), "std::vector<float>", varVectorMap_[branchLabel].get() ); } }
// define NT labels (ints) that are mapped from strings for quicker comparison. void TreeStructureFeature::AddNTLabels(TreePointer root) const { std::string label = root->GetLabel(); if (root->IsTerminal()) { return; } std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label); if (it != m_labelset->string_to_label.end()) { root->SetNTLabel(it->second); } std::vector<TreePointer> children = root->GetChildren(); for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) { AddNTLabels(*it2); } }
FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo , int featureID /* used to index the state in the previous hypotheses */ , ScoreComponentCollection* accumulator) const { if (const PhraseProperty *property = cur_hypo.GetCurrTargetPhrase().GetProperty("Tree")) { const std::string *tree = property->GetValueString(); TreePointer mytree (new InternalTree(*tree)); if (m_labelset) { AddNTLabels(mytree); } //get subtrees (in target order) std::vector<TreePointer> previous_trees; for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) { const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos); if (word.IsNonTerminal()) { size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos]; const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd); const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID)); const TreePointer prev_tree = prev->GetTree(); previous_trees.push_back(prev_tree); } } std::vector<std::string> sparse_features; if (m_constraints) { sparse_features = m_constraints->SyntacticRules(mytree, previous_trees); } mytree->Combine(previous_trees); //sparse scores for (std::vector<std::string>::const_iterator feature=sparse_features.begin(); feature != sparse_features.end(); ++feature) { accumulator->PlusEquals(this, *feature, 1); } return new TreeState(mytree); } else { UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found"); } }
FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo , int featureID /* used to index the state in the previous hypotheses */ , ScoreComponentCollection* accumulator) const { if (const PhraseProperty *property = cur_hypo.GetCurrTargetPhrase().GetProperty("Tree")) { const std::string *tree = property->GetValueString(); TreePointer mytree (boost::make_shared<InternalTree>(*tree)); //get subtrees (in target order) std::vector<TreePointer> previous_trees; for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) { const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos); if (word.IsNonTerminal()) { size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos]; const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd); const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID)); const TreePointer prev_tree = prev->GetTree(); previous_trees.push_back(prev_tree); } } if (m_constraints) { m_constraints->SyntacticRules(mytree, previous_trees, this, accumulator); } mytree->Combine(previous_trees); bool full_sentence = (mytree->GetChildren().back()->GetLabel() == m_send || (mytree->GetChildren().back()->GetLabel() == m_send_nt && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == m_send)); if (m_binarized && full_sentence) { mytree->Unbinarize(); } return new TreeState(mytree); } else { UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found"); } }
void Manager::OutputNBestList(OutputCollector *collector, const KBestExtractor::KBestVec &nBestList, long translationId) const { const StaticData &staticData = StaticData::Instance(); const std::vector<FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder(); std::ostringstream out; if (collector->OutputIsCout()) { // Set precision only if we're writing the n-best list to cout. This is to // preserve existing behaviour, but should probably be done either way. FixPrecision(out); } bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); bool PrintNBestTrees = staticData.PrintNBestTrees(); for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin(); p != nBestList.end(); ++p) { const KBestExtractor::Derivation &derivation = **p; // get the derivation's target-side yield Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation); // delete <s> and </s> UTIL_THROW_IF2(outputPhrase.GetSize() < 2, "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); // print the translation ID, surface factors, and scores out << translationId << " ||| "; OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; OutputAllFeatureScores(derivation.scoreBreakdown, out); out << " ||| " << derivation.score; // optionally, print word alignments if (includeWordAlignment) { out << " ||| "; Alignments align; OutputAlignmentNBest(align, derivation, 0); for (Alignments::const_iterator q = align.begin(); q != align.end(); ++q) { out << q->first << "-" << q->second << " "; } } // optionally, print tree if (PrintNBestTrees) { TreePointer tree = KBestExtractor::GetOutputTree(derivation); out << " ||| " << tree->GetString(); } out << std::endl; } assert(collector); collector->Write(translationId, out.str()); }