void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
    ScoreComponentCollection* accumulator) const
{

  const TargetPhrase& target = hypo.GetCurrTargetPhrase();

  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = target.GetAlignNonTerm().GetNonTermIndexMap();

  // loop over the rule that is being applied
  for (size_t pos = 0; pos < target.GetSize(); ++pos) {
    const Word& word = target.GetWord(pos);

    // for non-terminals, trigger the feature mapping the LHS of the previous hypo to the RHS of this hypo
    if (word.IsNonTerminal()) {
      size_t nonTermInd = nonTermIndexMap[pos];

      const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd);
      const Word& prevLHS = prevHypo->GetTargetLHS();

      if ( (word != prevLHS) || m_scoreIdentical ) {
        const std::string &name = GetOrSetFeatureName(word, prevLHS);
        accumulator->PlusEquals(this,name,1);
      }
    }
  }
}
예제 #2
0
ChartTrellisPath::ChartTrellisPath(const ChartHypothesis &hypo)
  : m_finalNode(new ChartTrellisNode(hypo))
  , m_deviationPoint(NULL)
  , m_scoreBreakdown(hypo.GetScoreBreakdown())
  , m_totalScore(hypo.GetTotalScore())
{
}
예제 #3
0
void ChartHypothesis::CleanupArcList()
{
  // point this hypo's main hypo to itself
  m_winningHypo = this;

  if (!m_arcList) return;

  /* keep only number of arcs we need to create all n-best paths.
   * However, may not be enough if only unique candidates are needed,
   * so we'll keep all of arc list if nedd distinct n-best list
   */
  AllOptions const& opts = StaticData::Instance().options();
  const StaticData &staticData = StaticData::Instance();
  size_t nBestSize = opts.nbest.nbest_size;
  bool distinctNBest = (opts.nbest.only_distinct
                        || opts.mbr.enabled
                        || opts.output.NeedSearchGraph()
                        || !opts.output.SearchGraphHG.empty());

  if (!distinctNBest && m_arcList->size() > nBestSize) {
    // prune arc list only if there too many arcs
    NTH_ELEMENT4(m_arcList->begin()
                 , m_arcList->begin() + nBestSize - 1
                 , m_arcList->end()
                 , CompareChartHypothesisTotalScore());

    // delete bad ones
    ChartArcList::iterator iter;
    for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
      ChartHypothesis *arc = *iter;
      delete arc;
    }
    m_arcList->erase(m_arcList->begin() + nBestSize
                     , m_arcList->end());
  }

  // set all arc's main hypo variable to this hypo
  ChartArcList::iterator iter = m_arcList->begin();
  for (; iter != m_arcList->end() ; ++iter) {
    ChartHypothesis *arc = *iter;
    arc->SetWinningHypo(this);
  }

  //cerr << m_arcList->size() << " ";
}
ControlRecombinationState::ControlRecombinationState(const ChartHypothesis &hypo, const ControlRecombination &ff)
  :m_ff(ff)
{
  if (ff.GetType() == SameOutput) {
    hypo.GetOutputPhrase(m_outputPhrase);
  } else {
    m_hypo = &hypo;
  }
}
예제 #5
0
void ChartHypothesis::CleanupArcList()
{
    // point this hypo's main hypo to itself
    m_winningHypo = this;

    if (!m_arcList) return;

    /* keep only number of arcs we need to create all n-best paths.
     * However, may not be enough if only unique candidates are needed,
     * so we'll keep all of arc list if nedd distinct n-best list
     */
    const StaticData &staticData = StaticData::Instance();
    size_t nBestSize = staticData.GetNBestSize();
    bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph();

    if (!distinctNBest && m_arcList->size() > nBestSize) {
        // prune arc list only if there too many arcs
        nth_element(m_arcList->begin()
                    , m_arcList->begin() + nBestSize - 1
                    , m_arcList->end()
                    , CompareChartChartHypothesisTotalScore());

        // delete bad ones
        ChartArcList::iterator iter;
        for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
            ChartHypothesis *arc = *iter;
            ChartHypothesis::Delete(arc);
        }
        m_arcList->erase(m_arcList->begin() + nBestSize
                         , m_arcList->end());
    }

    // set all arc's main hypo variable to this hypo
    ChartArcList::iterator iter = m_arcList->begin();
    for (; iter != m_arcList->end() ; ++iter) {
        ChartHypothesis *arc = *iter;
        arc->SetWinningHypo(this);
    }

    //cerr << m_arcList->size() << " ";
}
ChartTrellisDetour::ChartTrellisDetour(
    boost::shared_ptr<const ChartTrellisPath> basePath,
    const ChartTrellisNode &substitutedNode,
    const ChartHypothesis &replacementHypo)
  : m_basePath(basePath)
  , m_substitutedNode(substitutedNode)
  , m_replacementHypo(replacementHypo)
{
  float diff = replacementHypo.GetTotalScore()
             - substitutedNode.GetHypothesis().GetTotalScore();
  m_totalScore = basePath->GetTotalScore() + diff;
}
void SparseHieroReorderingFeature::EvaluateChart(
  const ChartHypothesis&  cur_hypo ,
  ScoreComponentCollection* accumulator) const
{
  // get index map for underlying hypotheses
  //const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
  //  cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
  
  //The Huck features. For a rule with source side:
  //   abXcdXef
  //We first have to split into blocks:
  // ab X cd X ef
  //Then we extract features based in the boundary words of the neighbouring blocks
  //For the block pair, we use the right word of the left block, and the left 
  //word of the right block.

  //Need to get blocks, and their alignment. Each block has a word range (on the 
  // on the source), a non-terminal flag, and  a set of alignment points in the target phrase

  //We need to be able to map source word position to target word position, as
  //much as possible (don't need interior of non-terminals). The alignment info
  //objects just give us the mappings between *rule* positions. So if we can 
  //map source word position to source rule position, and target rule position
  //to target word position, then we can map right through.

  size_t sourceStart = cur_hypo.GetCurrSourceRange().GetStartPos();
  size_t sourceSize = cur_hypo.GetCurrSourceRange().GetNumWordsCovered();

  vector<WordsRange> sourceNTSpans;
  for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) {
    sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange());
  }
  //put in source order. Is this necessary?
  sort(sourceNTSpans.begin(), sourceNTSpans.end()); 
  //cerr << "Source NTs: ";
  //for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " ";
  //cerr << endl;

  typedef pair<WordsRange,bool> Block;//flag indicates NT
  vector<Block> sourceBlocks; 
  sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false));
  for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin(); 
      i != sourceNTSpans.end(); ++i) {
    const WordsRange& prevHypoRange = *i;
    Block lastBlock = sourceBlocks.back();
    sourceBlocks.pop_back();
    //split this range into before NT, NT and after NT
    if (prevHypoRange.GetStartPos() > lastBlock.first.GetStartPos()) {
      sourceBlocks.push_back(Block(WordsRange(lastBlock.first.GetStartPos(),prevHypoRange.GetStartPos()-1),false));
    }
    sourceBlocks.push_back(Block(prevHypoRange,true));
    if (prevHypoRange.GetEndPos() < lastBlock.first.GetEndPos()) {
      sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false));
    }
  }
  /*
  cerr << "Source Blocks: ";
  for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " "
      << (sourceBlocks[i].second ? "NT" : "T") << " ";
  cerr << endl;
  */

  //Mapping from source word to target rule position
  vector<size_t> sourceWordToTargetRulePos(sourceSize);
  map<size_t,size_t> alignMap;
  alignMap.insert(
    cur_hypo.GetCurrTargetPhrase().GetAlignTerm().begin(),
    cur_hypo.GetCurrTargetPhrase().GetAlignTerm().end());
  alignMap.insert(
    cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().begin(),
    cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().end());
  //vector<size_t> alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm()
  size_t sourceRulePos = 0;
  //cerr << "SW->RP ";
  for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin(); 
    sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) {
    for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos();
      sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) {
      sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos];
   //   cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " ";
      if (! sourceBlockIt->second) {
        //T
        ++sourceRulePos;
      }
    }
    if ( sourceBlockIt->second) {
      //NT
      ++sourceRulePos;
    }
  }
  //cerr << endl;

  //Iterate through block pairs
  const Sentence& sentence = 
    dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
  //const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
  for (size_t i = 0; i < sourceBlocks.size()-1; ++i) {
    Block& leftSourceBlock = sourceBlocks[i];
    Block& rightSourceBlock = sourceBlocks[i+1];
    size_t sourceLeftBoundaryPos = leftSourceBlock.first.GetEndPos();
    size_t sourceRightBoundaryPos = rightSourceBlock.first.GetStartPos();
    const Word& sourceLeftBoundaryWord = sentence.GetWord(sourceLeftBoundaryPos);
    const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos);
    sourceLeftBoundaryPos -= sourceStart;
    sourceRightBoundaryPos -= sourceStart;
    
    // Need to figure out where these map to on the target.
    size_t targetLeftRulePos = 
      sourceWordToTargetRulePos[sourceLeftBoundaryPos];
    size_t targetRightRulePos = 
      sourceWordToTargetRulePos[sourceRightBoundaryPos];

    bool isMonotone = true;
    if ((sourceLeftBoundaryPos < sourceRightBoundaryPos  &&
          targetLeftRulePos > targetRightRulePos) ||
      ((sourceLeftBoundaryPos > sourceRightBoundaryPos  &&
          targetLeftRulePos < targetRightRulePos)))
    {
      isMonotone = false;
    }
    stringstream buf;
    buf << "h_"; //sparse reordering, Huck
    if (m_type == SourceLeft || m_type == SourceCombined) {
      buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
      buf << "_";
    }
    if (m_type == SourceRight || m_type == SourceCombined) {
    buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
      buf << "_";
    }
    buf << (isMonotone ? "M" : "S");
    accumulator->PlusEquals(this,buf.str(), 1);
  }
//  cerr << endl;
}
예제 #8
0
ConstrainedDecodingState::ConstrainedDecodingState(const ChartHypothesis &hypo)
{
  hypo.GetOutputPhrase(m_outputPhrase);
}