std::string  LexicalReorderingTableCompact::MakeKey(const Phrase& f,
    const Phrase& e,
    const Phrase& c) const
{
  return MakeKey(Trim(f.GetStringRep(m_FactorsF)),
                 Trim(e.GetStringRep(m_FactorsE)),
                 Trim(c.GetStringRep(m_FactorsC)));
}
std::string
LexicalReorderingTableMemory::MakeKey(const Phrase& f,
                                      const Phrase& e,
                                      const Phrase& c) const
{
  return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)),
                 auxClearString(e.GetStringRep(m_FactorsE)),
                 auxClearString(c.GetStringRep(m_FactorsC)));
}
std::string LexicalReorderingTableTree::MakeCacheKey(const Phrase& f, 
						     const Phrase& e) const {
  std::string key;
  if(!m_FactorsF.empty()){
    key += auxClearString(f.GetStringRep(m_FactorsF));
  }
  if(!m_FactorsE.empty()){
    if(!key.empty()){
      key += "|||";
    }
    key += auxClearString(e.GetStringRep(m_FactorsE));
  }
  return key;
};
Exemplo n.º 4
0
void Manager::OutputBest(OutputCollector *collector) const
{
  if (!collector) {
    return;
  }
  std::ostringstream out;
  FixPrecision(out);
  const SHyperedge *best = GetBestSHyperedge();
  if (best == NULL) {
    VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
    if (StaticData::Instance().GetOutputHypoScore()) {
      out << "0 ";
    }
    out << '\n';
  } else {
    if (StaticData::Instance().GetOutputHypoScore()) {
      out << best->label.score << " ";
    }
    Phrase yield = GetOneBestTargetYield(*best);
    // delete 1st & last
    UTIL_THROW_IF2(yield.GetSize() < 2,
                   "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
    yield.RemoveWord(0);
    yield.RemoveWord(yield.GetSize()-1);
    out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
    out << '\n';
  }
  collector->Write(m_source.GetTranslationId(), out.str());
}
std::string  LexicalReorderingTableMemory::MakeKey(const Phrase& f, 
												   const Phrase& e,
												   const Phrase& c) const {
  /*
  std::string key;
  if(!m_FactorsF.empty()){
    key += f.GetStringRep(m_FactorsF);
  }
  if(!m_FactorsE.empty()){
    if(!key.empty()){
      key += " ||| ";
    }
    key += e.GetStringRep(m_FactorsE);
  }
  */
  return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)),
				 auxClearString(e.GetStringRep(m_FactorsE)),
				 auxClearString(c.GetStringRep(m_FactorsC)));
}
Exemplo n.º 6
0
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
{
    
  // Not using TargetPhraseCollection avoiding "new" operator
  // which can introduce heavy locking with multiple threads
  TargetPhraseVectorPtr tpv(new TargetPhraseVector());
  size_t bitsLeft = 0;
                                
  if(m_coding == PREnc)
  {
    std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
      = m_decodingCache.Retrieve(sourcePhrase);
    
    // Has been cached and is complete or does not need to be completed
    if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
      return cachedPhraseColl.first;
  
    // Has been cached, but is incomplete
    else if(cachedPhraseColl.first != NULL)
    {
      bitsLeft = cachedPhraseColl.second;
      tpv->resize(cachedPhraseColl.first->size());
      std::copy(cachedPhraseColl.first->begin(),
                cachedPhraseColl.first->end(),
                tpv->begin());
    }
  }
  
  // Retrieve source phrase identifier
  std::string sourcePhraseString = sourcePhrase.GetStringRep(*m_input);
  size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
  
  if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize())
  {
    // Retrieve compressed and encoded target phrase collection  
    std::string encodedPhraseCollection;
    if(m_phraseDictionary.m_inMemory)
      encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId];
    else
      encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId];
    
    BitWrapper<> encodedBitStream(encodedPhraseCollection);
    if(m_coding == PREnc && bitsLeft)
      encodedBitStream.SeekFromEnd(bitsLeft);
    
    // Decompress and decode target phrase collection
    TargetPhraseVectorPtr decodedPhraseColl =
      DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
    
    return decodedPhraseColl;
  }
  else
    return TargetPhraseVectorPtr(); 
}
void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f)
{
  if(m_FactorsE.empty()) {
    //f is all of key...
    Candidates cands;
    m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands);
    m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands;
  } else {
    ObjectPool<PPimp>     pool;
    PPimp* pPos  = m_Table->GetRoot();
    //1) goto subtree for f
    for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i) {
      /* old code
      pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId);
      */
      pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
    }
    if(0 != pPos && pPos->isValid()) {
      pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord);
    }
    if(0 == pPos || !pPos->isValid()) {
      return;
    }
    //2) explore whole subtree depth first & cache
    std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||";

    std::vector<State> stack;
    stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),""));
    Candidates cands;
    while(!stack.empty()) {
      if(stack.back().pos->isValid()) {
        LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
        std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId);
        //cache this
        m_Table->GetCandidates(*stack.back().pos,&cands);
        if(!cands.empty()) {
          m_Cache[cache_key + auxClearString(next_path)] = cands;
        }
        cands.clear();
        PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0));
        ++stack.back().pos->idx;
        stack.push_back(State(next_pos,next_path));
      } else {
        stack.pop_back();
      }
    }
  }
}
Exemplo n.º 8
0
//Find top n translations of source, and send them to output
static void outputTopN(const StringPiece& sourcePhraseString, PhraseDictionary* phraseTable, const std::vector<FactorType> &input,  ostream& out) {
  //get list of target phrases
  Phrase sourcePhrase;
  sourcePhrase.CreateFromString(Input,input,sourcePhraseString,NULL);
  InputPath inputPath(sourcePhrase, NonTerminalSet(), WordsRange(0,sourcePhrase.GetSize()-1),NULL,NULL);
  InputPathList inputPaths;
  inputPaths.push_back(&inputPath);
  phraseTable->GetTargetPhraseCollectionBatch(inputPaths);
  const TargetPhraseCollection* targetPhrases = inputPath.GetTargetPhrases(*phraseTable);




  //print phrases
  const std::vector<FactorType>& output = StaticData::Instance().GetOutputFactorOrder();
  if (targetPhrases) {
    //if (targetPhrases->GetSize() > 10) cerr << "src " << sourcePhrase << " tgt count " << targetPhrases->GetSize() << endl;
    for (TargetPhraseCollection::const_iterator i = targetPhrases->begin(); i != targetPhrases->end(); ++i) {
      const TargetPhrase* targetPhrase = *i;
      out << sourcePhrase.GetStringRep(input);
      out << " ||| ";
      out << targetPhrase->GetStringRep(output);
      out << " ||| ";
      const ScoreComponentCollection scores = targetPhrase->GetScoreBreakdown();
      vector<float> phraseScores = scores.GetScoresForProducer(phraseTable);
      for (size_t j = 0; j < phraseScores.size(); ++j) {
        out << exp(phraseScores[j]) << " ";
      }
      out << "||| ";
      const AlignmentInfo& align = targetPhrase->GetAlignTerm();
      for (AlignmentInfo::const_iterator j = align.begin(); j != align.end(); ++j) {
        out << j->first << "-" << j->second << " ";
      }
      out << endl;
    }
  }

}