Esempio n. 1
0
FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
                                     const FFState* prev_state,
                                     ScoreComponentCollection* out) const {
    Scores score(GetNumScoreComponents(), 0);
    const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
    LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), score);

    out->PlusEquals(this, score);
    
    return next_state;
}
Esempio n. 2
0
std::vector<float> LexicalReordering::CalcScore(Hypothesis* hypothesis) const {
  std::vector<float> score(GetNumScoreComponents(), 0);
  std::vector<float> values;

  //for every direction
  for(size_t i = 0; i < m_Direction.size(); ++i){
    //grab data
    if(Forward == m_Direction[i]){
      //relates to prev hypothesis as we dont know next phrase for current yet
      //sanity check: is there a previous hypothesis?
      if(0 == hypothesis->GetPrevHypo()->GetId()){
				continue; //no score continue with next direction
      }
      //grab probs for prev hypothesis
			const ScoreComponentCollection &reorderingScoreColl = 
							hypothesis->GetPrevHypo()->GetCachedReorderingScore();
			values = reorderingScoreColl.GetScoresForProducer(this);
			/*
      values = m_Table->GetScore((hypothesis->GetPrevHypo()->GetSourcePhrase()).GetSubString(hypothesis->GetPrevHypo()->GetCurrSourceWordsRange()),
								 hypothesis->GetPrevHypo()->GetCurrTargetPhrase(),
								 auxGetContext(hypothesis->GetPrevHypo()));
			*/
    }
    if(Backward == m_Direction[i])
		{
			const ScoreComponentCollection &reorderingScoreColl = 
				hypothesis->GetCachedReorderingScore();
			values = reorderingScoreColl.GetScoresForProducer(this);
			/*
      values = m_Table->GetScore(hypothesis->GetSourcePhrase().GetSubString(hypothesis->GetCurrSourceWordsRange()),
								 hypothesis->GetCurrTargetPhrase(),
								 auxGetContext(hypothesis));
								 */
    }
    
    //add score
    //sanity check: do we have any probs?
	  assert(values.size() == (GetNumOrientationTypes() * m_Direction.size()));

		OrientationType orientation = GetOrientationType(hypothesis); 
    float value = values[orientation + i * GetNumOrientationTypes()];
    if(m_OneScorePerDirection){ 
      //one score per direction
      score[i] = value;
    } else {
      //one score per direction and orientation
      score[orientation + i * GetNumOrientationTypes()] = value; 
    }
  }
  return score;
}
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
{
  char dirName[] = "/tmp/moses.XXXXXX";
  char *temp = mkdtemp(dirName);
  UTIL_THROW_IF2(temp == NULL,
		  "Couldn't create temporary directory " << dirName);

  string dirNameStr(dirName);

  string inFileName(dirNameStr + "/in");

  ofstream inFile(inFileName.c_str());

  for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i) {
    inFile << inputSentence.GetWord(i);
  }
  inFile << endl;
  inFile.close();

  long translationId = inputSentence.GetTranslationId();
  string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr);

  // populate with rules for this sentence
  PhraseDictionaryNodeMemory &rootNode = m_collection[translationId];
  FormatType format = MosesFormat;

  // data from file
  InputFileStream inStream(ptFileName);

  // copied from class LoaderStandard
  PrintUserTime("Start loading fuzzy-match phrase model");

  const StaticData &staticData = StaticData::Instance();
  const std::string& factorDelimiter = staticData.GetFactorDelimiter();


  string lineOrig;
  size_t count = 0;

  while(getline(inStream, lineOrig)) {
    const string *line;
    if (format == HieroFormat) { // reformat line
      UTIL_THROW(util::Exception, "Cannot be Hiero format");
      //line = ReformatHieroRule(lineOrig);
    } else {
      // do nothing to format of line
      line = &lineOrig;
    }

    vector<string> tokens;
    vector<float> scoreVector;

    TokenizeMultiCharSeparator(tokens, *line , "|||" );

    if (tokens.size() != 4 && tokens.size() != 5) {
      stringstream strme;
      strme << "Syntax error at " << ptFileName << ":" << count;
      UserMessage::Add(strme.str());
      abort();
    }

    const string &sourcePhraseString = tokens[0]
                                       , &targetPhraseString = tokens[1]
                                           , &scoreString        = tokens[2]
                                               , &alignString        = tokens[3];

    bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
    if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
      TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
      continue;
    }

    Tokenize<float>(scoreVector, scoreString);
    const size_t numScoreComponents = GetNumScoreComponents();
    if (scoreVector.size() != numScoreComponents) {
      stringstream strme;
      strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
            << numScoreComponents << ") of score components on line " << count;
      UserMessage::Add(strme.str());
      abort();
    }

    UTIL_THROW_IF2(scoreVector.size() != numScoreComponents,
    		"Number of scores incorrectly specified");

    // parse source & find pt node

    // constituent labels
    Word *sourceLHS;
    Word *targetLHS;

    // source
    Phrase sourcePhrase( 0);
    sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter, &sourceLHS);

    // create target phrase obj
    TargetPhrase *targetPhrase = new TargetPhrase();
    targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS);

    // rest of target phrase
    targetPhrase->SetAlignmentInfo(alignString);
    targetPhrase->SetTargetLHS(targetLHS);
    //targetPhrase->SetDebugOutput(string("New Format pt ") + line);

    // component score, for n-best output
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);

    targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
    targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply());

    TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
    phraseColl.Add(targetPhrase);

    count++;

    if (format == HieroFormat) { // reformat line
      delete line;
    } else {
      // do nothing
    }

  }

  // sort and prune each target phrase collection
  SortAndPrune(rootNode);

  //removedirectoryrecursively(dirName);
}