Пример #1
0
size_t Phrase::Find(const Phrase &sought, int maxUnknown) const
{
  if (GetSize() < sought.GetSize()) {
    // sought phrase too big
    return NOT_FOUND;
  }

  size_t maxStartPos = GetSize() - sought.GetSize();
  for (size_t startThisPos = 0; startThisPos <= maxStartPos; ++startThisPos) {
    size_t thisPos = startThisPos;
    int currUnknowns = 0;
    size_t soughtPos;
    for (soughtPos = 0; soughtPos < sought.GetSize(); ++soughtPos) {
      const Word &soughtWord = sought.GetWord(soughtPos);
      const Word &thisWord = GetWord(thisPos);

      if (soughtWord == thisWord) {
        ++thisPos;
      } else if (soughtWord.IsOOV() && (maxUnknown < 0 || currUnknowns < maxUnknown)) {
        // the output has an OOV word. Allow a certain number of OOVs
        ++currUnknowns;
        ++thisPos;
      } else {
        break;
      }
    }

    if (soughtPos == sought.GetSize()) {
      return startThisPos;
    }
  }

  return NOT_FOUND;
}
IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f,
    const Phrase& e) const
{
  IPhrase key;
  std::vector<std::string> keyPart;
  if(!m_FactorsF.empty()) {
    for(size_t i = 0; i < f.GetSize(); ++i) {
      /* old code
        std::string s = f.GetWord(i).ToString(m_FactorsF);
        keyPart.push_back(s.substr(0,s.size()-1));
        */
      keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
    }
    auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId));
    keyPart.clear();
  }
  if(!m_FactorsE.empty()) {
    if(!key.empty()) {
      key.push_back(PrefixTreeMap::MagicWord);
    }
    for(size_t i = 0; i < e.GetSize(); ++i) {
      /* old code
        std::string s = e.GetWord(i).ToString(m_FactorsE);
        keyPart.push_back(s.substr(0,s.size()-1));
        */
      keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
    }
    auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId));
    //keyPart.clear();
  }
  return key;
};
Scores LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context)
{
  if(m_FactorsC.empty()) {
    CHECK(cands.size() <= 1);
    return (1 == cands.size())?(cands[0].GetScore(0)):(Scores());
  } else {
    std::vector<std::string> cvec;
    for(size_t i = 0; i < context.GetSize(); ++i) {
      /* old code
        std::string s = context.GetWord(i).ToString(m_FactorsC);
      cvec.push_back(s.substr(0,s.size()-1));
        */
      cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
    }
    IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
    IPhrase sub_c;
    IPhrase::iterator start = c.begin();
    for(size_t j = 0; j <= context.GetSize(); ++j, ++start) {
      sub_c.assign(start, c.end());
      for(size_t cand = 0; cand < cands.size(); ++cand) {
        IPhrase p = cands[cand].GetPhrase(0);
        if(cands[cand].GetPhrase(0) == sub_c) {
          return cands[cand].GetScore(0);
        }
      }
    }
    return Scores();
  }
}
int Phrase::Compare(const Phrase &compare) const
{
  int ret = 0;
  for (size_t pos = 0; pos < GetSize(); ++pos) {
    if (pos >= compare.GetSize()) {
      // we're bigger than the other. Put 1st
      ret = -1;
      break;
    }

    const Word &thisWord = GetWord(pos)
                           ,&compareWord = compare.GetWord(pos);
    int wordRet = thisWord.Compare(compareWord);
    if (wordRet != 0) {
      ret = wordRet;
      break;
    }
  }

  if (ret == 0) {
    CHECK(compare.GetSize() >= GetSize());
    ret = (compare.GetSize() > GetSize()) ? 1 : 0;
  }
  return ret;
}
std::vector<float>  LexicalReorderingTableMemory::GetScore(const Phrase& f,
    const Phrase& e,
    const Phrase& c)
{
  //rather complicated because of const can't use []... as [] might enter new things into std::map
  //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large
  TableType::const_iterator r;
  std::string key;
  if(0 == c.GetSize()) {
    key = MakeKey(f,e,c);
    r = m_Table.find(key);
    if(m_Table.end() != r) {
      return r->second;
    }
  } else {
    //right try from large to smaller context
    for(size_t i = 0; i <= c.GetSize(); ++i) {
      Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
      key = MakeKey(f,e,sub_c);
      r = m_Table.find(key);
      if(m_Table.end() != r) {
        return r->second;
      }
    }
  }
  return Scores();
}
Пример #6
0
void Manager::OutputBest(OutputCollector *collector) const
{
  if (!collector) {
    return;
  }
  std::ostringstream out;
  FixPrecision(out);
  const SHyperedge *best = GetBestSHyperedge();
  if (best == NULL) {
    VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
    if (StaticData::Instance().GetOutputHypoScore()) {
      out << "0 ";
    }
    out << '\n';
  } else {
    if (StaticData::Instance().GetOutputHypoScore()) {
      out << best->label.score << " ";
    }
    Phrase yield = GetOneBestTargetYield(*best);
    // delete 1st & last
    UTIL_THROW_IF2(yield.GetSize() < 2,
                   "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
    yield.RemoveWord(0);
    yield.RemoveWord(yield.GetSize()-1);
    out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
    out << '\n';
  }
  collector->Write(m_source.GetTranslationId(), out.str());
}
Scores
LexicalReorderingTableTree::
auxFindScoreForContext(const Candidates& cands, const Phrase& context)
{
  if(m_FactorsC.empty()) {
    UTIL_THROW_IF2(cands.size() > 1, "Error");
    return (cands.size() == 1) ? cands[0].GetScore(0) : Scores();
  } else {
    std::vector<std::string> cvec;
    for(size_t i = 0; i < context.GetSize(); ++i)
      cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));

    IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
    IPhrase sub_c;
    IPhrase::iterator start = c.begin();
    for(size_t j = 0; j <= context.GetSize(); ++j, ++start) {
      sub_c.assign(start, c.end());
      for(size_t cand = 0; cand < cands.size(); ++cand) {
        IPhrase p = cands[cand].GetPhrase(0);
        if(cands[cand].GetPhrase(0) == sub_c)
          return cands[cand].GetScore(0);
      }
    }
    return Scores();
  }
}
Scores
LexicalReorderingTableTree::
GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
{
  if((!m_FactorsF.empty() && 0 == f.GetSize())
      || (!m_FactorsE.empty() && 0 == e.GetSize())) {
    //NOTE: no check for c as c might be empty, e.g. start of sentence
    //not a proper key
    // phi: commented out, since e may be empty (drop-unknown)
    //std::cerr << "Not a proper key!\n";
    return Scores();
  }

  CacheType::iterator i;

  if(m_UseCache) {
    std::pair<CacheType::iterator, bool> r;
    r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates()));
    if(!r.second) return auxFindScoreForContext((r.first)->second, c);
    i = r.first;
  } else if((i = m_Cache.find(MakeCacheKey(f,e))) != m_Cache.end())
    // although we might not be caching now, cache might be none empty!
    return auxFindScoreForContext(i->second, c);

  // not in cache => go to file...
  Candidates cands;
  m_Table->GetCandidates(MakeTableKey(f,e), &cands);
  if(cands.empty()) return Scores();
  if(m_UseCache) i->second = cands;

  if(m_FactorsC.empty()) {
    UTIL_THROW_IF2(1 != cands.size(), "Error");
    return cands[0].GetScore(0);
  } else return auxFindScoreForContext(cands, c);
};
Пример #9
0
/**
 * Calculate real sentence Bleu score of complete translation
 */
float BleuScoreFeature::CalculateBleu(Phrase translation) const
{
    if (translation.GetSize() == 0)
        return 0.0;

    Phrase normTranslation = translation;
    // remove start and end symbol for chart decoding
    if (m_cur_source_length != m_cur_norm_source_length) {
        WordsRange* range = new WordsRange(1, translation.GetSize()-2);
        normTranslation = translation.GetSubString(*range);
    }

    // get ngram matches for translation
    BleuScoreState* state = new BleuScoreState();
    GetClippedNgramMatchesAndCounts(normTranslation,
                                    m_cur_ref_ngrams,
                                    state->m_ngram_counts,
                                    state->m_ngram_matches,
                                    0); // number of words in previous states

    // set state variables
    state->m_words = normTranslation;
    state->m_source_length = m_cur_norm_source_length;
    state->m_target_length = normTranslation.GetSize();
    state->m_scaled_ref_length = m_cur_ref_length;

    // Calculate bleu.
    return CalculateBleu(state);
}
std::vector<float> LexicalReorderingTableCompact::GetScore(const Phrase& f,
    const Phrase& e,
    const Phrase& c)
{
  std::string key;
  Scores scores;
  
  if(0 == c.GetSize())
    key = MakeKey(f, e, c);
  else
    for(size_t i = 0; i <= c.GetSize(); ++i)
    {
      Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
      key = MakeKey(f,e,sub_c);
    }
    
  size_t index = m_hash[key];
  if(m_hash.GetSize() != index)
  {
    std::string scoresString;
    if(m_inMemory)
      scoresString = m_scoresMemory[index];
    else
      scoresString = m_scoresMapped[index];
      
    BitWrapper<> bitStream(scoresString);
    for(size_t i = 0; i < m_numScoreComponent; i++)
      scores.push_back(m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));

    return scores;
  }

  return Scores();
}
Пример #11
0
Phrase::Phrase(const Phrase &copy)
  :m_words(copy.GetSize())
{
  for (size_t pos = 0; pos < copy.GetSize(); ++pos) {
    const Word &oldWord = copy.GetWord(pos);
    Word *newWord = new Word(oldWord);
    m_words[pos] = newWord;
  }
}
Пример #12
0
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore,
                             float &ngramScore, std::size_t &oovCount) const
{
  fullScore = 0;
  ngramScore = 0;
  oovCount = 0;

  if (!phrase.GetSize()) return;

  lm::ngram::ChartState discarded_sadly;
  lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);

  size_t position;
  if (m_bos == phrase[0][m_factorType]) {
    scorer.BeginSentence();
    position = 1;
  } else {
    position = 0;
  }

  size_t ngramBoundary = m_ngram->Order() - 1;

  size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
  for (; position < end_loop; ++position) {
    const SCFG::Word &word = phrase[position];
    if (word.isNonTerminal) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  float before_boundary = fullScore + scorer.Finish();
  for (; position < phrase.GetSize(); ++position) {
    const SCFG::Word &word = phrase[position];
    if (word.isNonTerminal) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  fullScore += scorer.Finish();

  ngramScore = TransformLMScore(fullScore - before_boundary);
  fullScore = TransformLMScore(fullScore);
}
Пример #13
0
/**
 * Pre-calculate the n-gram probabilities for the words in the specified phrase.
 *
 * Note that when this method is called, we do not have access to the context
 * in which this phrase will eventually be applied.
 *
 * In other words, we know what words are in this phrase,
 * but we do not know what words will come before or after this phrase.
 *
 * The parameters fullScore, ngramScore, and oovCount are all output parameters.
 *
 * The value stored in oovCount is the number of words in the phrase
 * that are not in the language model's vocabulary.
 *
 * The sum of the ngram scores for all words in this phrase are stored in fullScore.
 *
 * The value stored in ngramScore is similar, but only full-order ngram scores are included.
 *
 * This is best shown by example:
 *
 * Assume a trigram backward language model and a phrase "a b c d e f g"
 *
 * fullScore would represent the sum of the logprob scores for the following values:
 *
 * p(g)
 * p(f | g)
 * p(e | g f)
 * p(d | f e)
 * p(c | e d)
 * p(b | d c)
 * p(a | c b)
 *
 * ngramScore would represent the sum of the logprob scores for the following values:
 *
 * p(g)
 * p(f | g)
 * p(e | g f)
 * p(d | f e)
 * p(c | e d)
 * p(b | d c)
 * p(a | c b)
 */
template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
{
  fullScore = 0;
  ngramScore = 0;
  oovCount = 0;

  if (!phrase.GetSize()) return;

  lm::ngram::ChartState discarded_sadly;
  lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);

  UTIL_THROW_IF(
    (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)),
    util::Exception,
    "BackwardLanguageModel does not currently support rules that include <s>"
  );

  float before_boundary = 0.0f;

  int lastWord = phrase.GetSize() - 1;
  int ngramBoundary = m_ngram->Order() - 1;
  int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary;

  int position;
  for (position = lastWord; position >= 0; position-=1) {
    const Word &word = phrase.GetWord(position);
    UTIL_THROW_IF(
      (word.IsNonTerminal()),
      util::Exception,
      "BackwardLanguageModel does not currently support rules that include non-terminals "
    );

    lm::WordIndex index = TranslateID(word);
    scorer.Terminal(index);
    if (!index) ++oovCount;

    if (position==boundary) {
      before_boundary = scorer.Finish();
    }

  }

  fullScore = scorer.Finish();

  ngramScore = TransformLMScore(fullScore - before_boundary);
  fullScore = TransformLMScore(fullScore);

}
Пример #14
0
// score ngrams around the overlap of two previously scored phrases
void BleuScoreFeature::GetNgramMatchCounts_overlap(Phrase& phrase,
        const NGrams& ref_ngram_counts,
        std::vector< size_t >& ret_counts,
        std::vector< size_t >& ret_matches,
        size_t overlap_index) const
{
    NGrams::const_iterator ref_ngram_counts_iter;
    size_t ngram_start_idx, ngram_end_idx;

    // Chiang et al (2008) use unclipped counts of ngram matches
    for (size_t end_idx = overlap_index; end_idx < phrase.GetSize(); end_idx++) {
        if (end_idx >= (overlap_index+BleuScoreState::bleu_order-1)) break;
        for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
            if (order > end_idx) break;

            ngram_end_idx = end_idx;
            ngram_start_idx = end_idx - order;
            if (ngram_start_idx >= overlap_index) continue; // only score ngrams that span the overlap point

            Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0);
            ret_counts[order]++;

            ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
            if (ref_ngram_counts_iter != ref_ngram_counts.end())
                ret_matches[order]++;
        }
    }
}
Пример #15
0
/***
 * print surface factor only for the given phrase
 */
void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
                                const std::vector<FactorType> &outputFactorOrder,
                                bool reportAllFactors) const
{
  UTIL_THROW_IF2(outputFactorOrder.size() == 0,
                 "Cannot be empty phrase");
  if (reportAllFactors == true) {
    out << phrase;
  } else {
    size_t size = phrase.GetSize();
    for (size_t pos = 0 ; pos < size ; pos++) {
      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
      out << *factor;
      UTIL_THROW_IF2(factor == NULL,
                     "Empty factor 0 at position " << pos);

      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
        UTIL_THROW_IF2(factor == NULL,
                       "Empty factor " << i << " at position " << pos);

        out << "|" << *factor;
      }
      out << " ";
    }
  }
}
Пример #16
0
void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source
    , const TargetPhrase &targetPhrase
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection &estimatedFutureScore) const
{
  // get length of source and target phrase
  size_t targetLength = targetPhrase.GetSize();
  size_t sourceLength = source.GetSize();

  // create feature names
  stringstream nameSource;
  nameSource << "s" << sourceLength;

  stringstream nameTarget;
  nameTarget << "t" << targetLength;

  stringstream nameBoth;
  nameBoth << sourceLength << "," << targetLength;

  // increase feature counts
  scoreBreakdown.PlusEquals(this,nameSource.str(),1);
  scoreBreakdown.PlusEquals(this,nameTarget.str(),1);
  scoreBreakdown.PlusEquals(this,nameBoth.str(),1);

  //cerr << nameSource.str() << " " << nameTarget.str() << " " << nameBoth.str() << endl;
}
Пример #17
0
    void outputHypo(ostream& out, const Hypothesis* hypo, bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo, bool reportAllFactors = false) {
        if (hypo->GetPrevHypo() != NULL) {
            outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, alignInfo, reportAllFactors);
            Phrase p = hypo->GetCurrTargetPhrase();
            if(reportAllFactors) {
                out << p << " ";
            } else {
                for (size_t pos = 0 ; pos < p.GetSize() ; pos++) {
                    const Factor *factor = p.GetFactor(pos, 0);
                    out << *factor << " ";
                }
            }

            if (addAlignmentInfo) {
                /**
                 * Add the alignment info to the array. This is in target order and consists of
                 *       (tgt-start, src-start, src-end) triples.
                 **/
                map<string, xmlrpc_c::value> phraseAlignInfo;
                phraseAlignInfo["tgt-start"] = xmlrpc_c::value_int(hypo->GetCurrTargetWordsRange().GetStartPos());
                phraseAlignInfo["src-start"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
                phraseAlignInfo["src-end"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
                alignInfo.push_back(xmlrpc_c::value_struct(phraseAlignInfo));
            }
        }
    }
Пример #18
0
/*
 * Given a phrase (current translation) calculate its ngram counts and
 * its ngram matches against the ngrams in the reference translation
 */
void BleuScoreFeature::GetNgramMatchCounts(Phrase& phrase,
        const NGrams& ref_ngram_counts,
        std::vector< size_t >& ret_counts,
        std::vector< size_t >& ret_matches,
        size_t skip_first) const
{
    NGrams::const_iterator ref_ngram_counts_iter;
    size_t ngram_start_idx, ngram_end_idx;

    // Chiang et al (2008) use unclipped counts of ngram matches
    for (size_t end_idx = skip_first; end_idx < phrase.GetSize(); end_idx++) {
        for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
            if (order > end_idx) break;

            ngram_end_idx = end_idx;
            ngram_start_idx = end_idx - order;

            Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0);
            ret_counts[order]++;

            ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
            if (ref_ngram_counts_iter != ref_ngram_counts.end())
                ret_matches[order]++;
        }
    }
}
Пример #19
0
/** TODO: this method isn't used anywhere. Remove? */
void ChartHypothesis::GetOutputPhrase(size_t leftRightMost, size_t numWords, Phrase &outPhrase) const
{
  const TargetPhrase &tp = GetCurrTargetPhrase();

  size_t targetSize = tp.GetSize();
  for (size_t i = 0; i < targetSize; ++i) {
    size_t pos;
    if (leftRightMost == 1) {
      pos = i;
    } else if (leftRightMost == 2) {
      pos = targetSize - i - 1;
    } else {
      abort();
    }

    const Word &word = tp.GetWord(pos);

    if (word.IsNonTerminal()) {
      // non-term. fill out with prev hypo
      size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
      const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
      prevHypo->GetOutputPhrase(outPhrase);
    } else {
      outPhrase.AddWord(word);
    }

    if (outPhrase.GetSize() >= numWords) {
      return;
    }
  }
}
Пример #20
0
void Phrase::Append(const Phrase &endPhrase)
{

  for (size_t i = 0; i < endPhrase.GetSize(); i++) {
    AddWord(endPhrase.GetWord(i));
  }
}
void SourceWordDeletionFeature::ComputeFeatures(const Phrase &source,
    const TargetPhrase& targetPhrase,
    ScoreComponentCollection* accumulator,
    const AlignmentInfo &alignmentInfo) const
{
  // handle special case: unknown words (they have no word alignment)
  size_t targetLength = targetPhrase.GetSize();
  size_t sourceLength = source.GetSize();
  if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return;

  // flag aligned words
  bool aligned[16];
  CHECK(sourceLength < 16);
  for(size_t i=0; i<sourceLength; i++)
    aligned[i] = false;
  for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++)
    aligned[ alignmentPoint->first ] = true;

  // process unaligned source words
  for(size_t i=0; i<sourceLength; i++) {
    if (!aligned[i]) {
      const Word &w = source.GetWord(i);
      if (!w.IsNonTerminal()) {
        const StringPiece word = w.GetFactor(m_factorType)->GetString();
        if (word != "<s>" && word != "</s>") {
          if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) {
            accumulator->PlusEquals(this, StringPiece("OTHER"),1);
          } else {
            accumulator->PlusEquals(this,word,1);
          }
        }
      }
    }
  }
}
Пример #22
0
void RuleScope::EvaluateInIsolation(const Phrase &source
						, const TargetPhrase &targetPhrase
						, ScoreComponentCollection &scoreBreakdown
						, ScoreComponentCollection &estimatedFutureScore) const
{
  // adjacent non-term count as 1 ammbiguity, rather than 2 as in rule scope
  // source can't be empty, right?
  float score = 0;

  int count = 0;
  for (size_t i = 0; i < source.GetSize() - 0; ++i) {
	const Word &word = source.GetWord(i);
	bool ambiguous = IsAmbiguous(word, m_sourceSyntax);
	if (ambiguous) {
		++count;
	}
	else {
		if (count > 0) {
			score += count;
		}
		count = -1;
	}
  }

  // 1st & last always adjacent to ambiguity
  ++count;
  if (count > 0) {
	score += count;
  }

  scoreBreakdown.PlusEquals(this, score);
}
Пример #23
0
void Phrase::MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec)
{
  CHECK(GetSize() == copy.GetSize());
  for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)
    for (std::vector<FactorType>::const_iterator i = factorVec.begin();
         i != factorVec.end(); ++i) {
      SetFactor(currPos, *i, copy.GetFactor(currPos, *i));
    }
}
Пример #24
0
void Phrase::MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec)
{
  UTIL_THROW_IF2(GetSize() != copy.GetSize(), "Both phrases need to be the same size to merge");
  for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)
    for (std::vector<FactorType>::const_iterator i = factorVec.begin();
         i != factorVec.end(); ++i) {
      SetFactor(currPos, *i, copy.GetFactor(currPos, *i));
    }
}
Phrase TrellisPath::GetSurfacePhrase() const
{
  const std::vector<FactorType> &outputFactor = StaticData::Instance().GetOutputFactorOrder();
  Phrase targetPhrase = GetTargetPhrase()
                        ,ret(targetPhrase.GetSize());

  for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) {
    Word &newWord = ret.AddWord();
    for (size_t i = 0 ; i < outputFactor.size() ; i++) {
      FactorType factorType = outputFactor[i];
      const Factor *factor = targetPhrase.GetFactor(pos, factorType);
      CHECK(factor);
      newWord[factorType] = factor;
    }
  }

  return ret;
}
Пример #26
0
void OpSequenceModel:: Evaluate(const Phrase &source
                                , const TargetPhrase &targetPhrase
                                , ScoreComponentCollection &scoreBreakdown
                                , ScoreComponentCollection &estimatedFutureScore) const
{

  osmHypothesis obj;
  obj.setState(OSM->NullContextState());
  WordsBitmap myBitmap(source.GetSize());
  vector <string> mySourcePhrase;
  vector <string> myTargetPhrase;
  vector<float> scores(5);
  vector <int> alignments;
  int startIndex = 0;
  int endIndex = source.GetSize();

  const AlignmentInfo &align = targetPhrase.GetAlignTerm();
  AlignmentInfo::const_iterator iter;


  for (iter = align.begin(); iter != align.end(); ++iter) {
    alignments.push_back(iter->first);
    alignments.push_back(iter->second);
  }

  for (int i = 0; i < targetPhrase.GetSize(); i++) {
    if (targetPhrase.GetWord(i).IsOOV())
      myTargetPhrase.push_back("_TRANS_SLF_");
    else
      myTargetPhrase.push_back(targetPhrase.GetWord(i).GetFactor(0)->GetString().as_string());
  }

  for (int i = 0; i < source.GetSize(); i++) {
    mySourcePhrase.push_back(source.GetWord(i).GetFactor(0)->GetString().as_string());
  }

  obj.setPhrases(mySourcePhrase , myTargetPhrase);
  obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
  obj.computeOSMFeature(startIndex,myBitmap);
  obj.calculateOSMProb(*OSM);
  obj.populateScores(scores);
  estimatedFutureScore.PlusEquals(this, scores);

}
Пример #27
0
Phrase TrellisPath::GetSurfacePhrase() const
{
  std::vector<FactorType> const& oFactor = manager().options()->output.factor_order;
  Phrase targetPhrase = GetTargetPhrase();
  Phrase ret(targetPhrase.GetSize());

  for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) {
    Word &newWord = ret.AddWord();
    for (size_t i = 0 ; i < oFactor.size() ; i++) {
      FactorType factorType = oFactor[i];
      const Factor *factor = targetPhrase.GetFactor(pos, factorType);
      UTIL_THROW_IF2(factor == NULL,
                     "No factor " << factorType << " at position " << pos);
      newWord[factorType] = factor;
    }
  }

  return ret;
}
Пример #28
0
bool Phrase::IsCompatible(const Phrase &inputPhrase, FactorType factorType) const
{
	if (inputPhrase.GetSize() != GetSize())	{ return false;	}
	for (size_t currPos = 0 ; currPos < GetSize() ; currPos++)
	{
		if (GetFactor(currPos, factorType) != inputPhrase.GetFactor(currPos, factorType))
			return false;
	}
	return true;
}
Пример #29
0
void TranslationOption::MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection& score, const std::vector<FactorType>& featuresToAdd)
{
	assert(phrase.GetSize() == m_targetPhrase.GetSize());
	if (featuresToAdd.size() == 1) {
		m_targetPhrase.MergeFactors(phrase, featuresToAdd[0]);
	} else if (featuresToAdd.empty()) {
		/* features already there, just update score */ 
  } else {
		m_targetPhrase.MergeFactors(phrase, featuresToAdd);
	}
	m_scoreBreakdown.PlusEquals(score);
}
TargetPhraseVectorPtr
PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const
{

  // There is no souch source phrase if source phrase is longer than longest
  // observed source phrase during compilation
  if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
    return TargetPhraseVectorPtr();

  // Retrieve target phrase collection from phrase table
  return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
}