コード例 #1
0
// Generate the target tree of the derivation d.
TreePointer KBestExtractor::GetOutputTree(const Derivation &d)
{
  const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation);
  if (const PhraseProperty *property = phrase.GetProperty("Tree")) {
    const std::string *tree = property->GetValueString();
    TreePointer mytree (boost::make_shared<InternalTree>(*tree));

    //get subtrees (in target order)
    std::vector<TreePointer> previous_trees;
    for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
      const Word &word = phrase.GetWord(pos);
      if (word.IsNonTerminal()) {
        size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos];
        const Derivation &subderivation = *d.subderivations[nonTermInd];
        const TreePointer prev_tree = GetOutputTree(subderivation);
        previous_trees.push_back(prev_tree);
      }
    }

    mytree->Combine(previous_trees);
    return mytree;
  } else {
    UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
  }
}
コード例 #2
0
FFState* SyntaxConstraintFeature::EvaluateChart(const ChartHypothesis& cur_hypo
                                   , int featureID /* used to index the state in the previous hypotheses */
                                   , ScoreComponentCollection* accumulator) const
{
  std::string tree;
  bool found = 0;
  cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", tree, found);

  TreePointer mytree (new InternalTree(tree));

  //get subtrees (in target order)
  std::vector<TreePointer> previous_trees;
  for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) {
    const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos);
    if (word.IsNonTerminal()) {
      size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
      const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd);
      const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID));
      const TreePointer prev_tree = prev->GetTree();
      previous_trees.push_back(prev_tree);
    }
  }

  mytree->Combine(previous_trees);


  return new TreeState(mytree);

}
コード例 #3
0
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) {

  if (tree->GetLength() > 0) {
    string head = getHead(tree);

    if (head.empty()) {
      for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
        extractHeadWordChain(*it, history, hwc);
      }
    }
    else {
      vector<string> new_history(kHwcmOrder);
      new_history[0] = head;
      hwc[0][head]++;
      for (size_t hist_idx = 0; hist_idx < kHwcmOrder-1; hist_idx++) {
        if (!history[hist_idx].empty()) {
          string chain = history[hist_idx] + " " + head;
          hwc[hist_idx+1][chain]++;
          if (hist_idx+2 < kHwcmOrder) {
            new_history[hist_idx+1] = chain;
          }
        }
      }
      for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
        extractHeadWordChain(*it, new_history, hwc);
      }
    }
  }
}
コード例 #4
0
string HwcmScorer::getHead(TreePointer tree) {
  // assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head)
  // if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned
  for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it)
  {
    TreePointer child = *it;

    if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) {
      return child->GetChildren()[0]->GetLabel();
    }
  }
  return "";

}
コード例 #5
0
void TBranchCollection::addBranchToTree(std::string branchLabel, std::string varType, TreePointer tree, bool isSingleValuePerEvent) {

    if ( isSingleValuePerEvent ) {
        // Make variable associated with this branch
        varMap_[branchLabel] = VarPointer(new float( -99 ) );
        // Make branch
        tree->Branch( branchLabel.c_str(), varMap_[branchLabel].get(), ( branchLabel+"/"+varType).c_str() );
    }
    else {
        // Make variable associated with this branch
        varVectorMap_[branchLabel] = VarVectorPointer(new std::vector<float>() );
        // Make branch
        tree->Branch( branchLabel.c_str(), "std::vector<float>", varVectorMap_[branchLabel].get() );    
    }
}
コード例 #6
0
// define NT labels (ints) that are mapped from strings for quicker comparison.
void TreeStructureFeature::AddNTLabels(TreePointer root) const {
      std::string label = root->GetLabel();

      if (root->IsTerminal()) {
          return;
      }

      std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
      if (it != m_labelset->string_to_label.end()) {
        root->SetNTLabel(it->second);
      }

      std::vector<TreePointer> children = root->GetChildren();
      for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
          AddNTLabels(*it2);
      }
}
コード例 #7
0
FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
                                   , int featureID /* used to index the state in the previous hypotheses */
                                   , ScoreComponentCollection* accumulator) const
{
  if (const PhraseProperty *property = cur_hypo.GetCurrTargetPhrase().GetProperty("Tree")) {
    const std::string *tree = property->GetValueString();
    TreePointer mytree (new InternalTree(*tree));

    if (m_labelset) {
        AddNTLabels(mytree);
    }

    //get subtrees (in target order)
    std::vector<TreePointer> previous_trees;
    for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) {
      const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos);
      if (word.IsNonTerminal()) {
        size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
        const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd);
        const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID));
        const TreePointer prev_tree = prev->GetTree();
        previous_trees.push_back(prev_tree);
      }
    }

    std::vector<std::string> sparse_features;
    if (m_constraints) {
      sparse_features = m_constraints->SyntacticRules(mytree, previous_trees);
    }
    mytree->Combine(previous_trees);

    //sparse scores
    for (std::vector<std::string>::const_iterator feature=sparse_features.begin(); feature != sparse_features.end(); ++feature) {
      accumulator->PlusEquals(this, *feature, 1);
    }
    return new TreeState(mytree);
  }
  else {
    UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
  }

}
コード例 #8
0
FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
    , int featureID /* used to index the state in the previous hypotheses */
    , ScoreComponentCollection* accumulator) const
{
  if (const PhraseProperty *property = cur_hypo.GetCurrTargetPhrase().GetProperty("Tree")) {
    const std::string *tree = property->GetValueString();
    TreePointer mytree (boost::make_shared<InternalTree>(*tree));

    //get subtrees (in target order)
    std::vector<TreePointer> previous_trees;
    for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) {
      const Word &word = cur_hypo.GetCurrTargetPhrase().GetWord(pos);
      if (word.IsNonTerminal()) {
        size_t nonTermInd = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
        const ChartHypothesis *prevHypo = cur_hypo.GetPrevHypo(nonTermInd);
        const TreeState* prev = dynamic_cast<const TreeState*>(prevHypo->GetFFState(featureID));
        const TreePointer prev_tree = prev->GetTree();
        previous_trees.push_back(prev_tree);
      }
    }

    if (m_constraints) {
      m_constraints->SyntacticRules(mytree, previous_trees, this, accumulator);
    }
    mytree->Combine(previous_trees);

    bool full_sentence = (mytree->GetChildren().back()->GetLabel() == m_send || (mytree->GetChildren().back()->GetLabel() == m_send_nt && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == m_send));
    if (m_binarized && full_sentence) {
      mytree->Unbinarize();
    }

    return new TreeState(mytree);
  } else {
    UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
  }

}
コード例 #9
0
void Manager::OutputNBestList(OutputCollector *collector,
                              const KBestExtractor::KBestVec &nBestList,
                              long translationId) const
{
  const StaticData &staticData = StaticData::Instance();

  const std::vector<FactorType> &outputFactorOrder =
    staticData.GetOutputFactorOrder();

  std::ostringstream out;

  if (collector->OutputIsCout()) {
    // Set precision only if we're writing the n-best list to cout.  This is to
    // preserve existing behaviour, but should probably be done either way.
    FixPrecision(out);
  }

  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
  bool PrintNBestTrees = staticData.PrintNBestTrees();

  for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
      p != nBestList.end(); ++p) {
    const KBestExtractor::Derivation &derivation = **p;

    // get the derivation's target-side yield
    Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);

    // delete <s> and </s>
    UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
        "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
    outputPhrase.RemoveWord(0);
    outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);

    // print the translation ID, surface factors, and scores
    out << translationId << " ||| ";
    OutputSurface(out, outputPhrase, outputFactorOrder, false);
    out << " ||| ";
    OutputAllFeatureScores(derivation.scoreBreakdown, out);
    out << " ||| " << derivation.score;

    // optionally, print word alignments
    if (includeWordAlignment) {
      out << " ||| ";
      Alignments align;
      OutputAlignmentNBest(align, derivation, 0);
      for (Alignments::const_iterator q = align.begin(); q != align.end();
          ++q) {
        out << q->first << "-" << q->second << " ";
      }
    }

    // optionally, print tree
    if (PrintNBestTrees) {
      TreePointer tree = KBestExtractor::GetOutputTree(derivation);
      out << " ||| " << tree->GetString();
    }

    out << std::endl;
  }

  assert(collector);
  collector->Write(translationId, out.str());
}