C++ (Cpp) TargetPhrase::CreateFromStringNewFormatの例

プログラミング言語: C++ (Cpp)

クラス/型: TargetPhrase

メソッド/関数: CreateFromStringNewFormat

hotexamples.comのコード掲載数: 4

C++ (Cpp) TargetPhrase::CreateFromStringNewFormat - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のTargetPhrase::CreateFromStringNewFormatの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

GetWord(15)

GetSize(14)

SetTargetLHS(11)

SetAlignmentInfo(9)

Evaluate(9)

AddWord(7)

CreateFromString(6)

EvaluateInIsolation(6)

GetScoreBreakdown(5)

SetScore(5)

SetSourcePhrase(5)

CreateFromStringNewFormat(4)

SetScoreChart(4)

GetSourcePhrase(4)

GetTargetLHS(3)

SetProperties(3)

SetSparseScore(3)

GetFutureScore(3)

GetAlignTerm(3)

SortAlign(3)

GetRuleSource(2)

SetProperty(2)

GetNumNonTerminals(2)

GetAlignNonTerm(2)

SetXMLScore(2)

CreateAlignFromString(2)

SetAlignNonTerm(2)

SetAlignTerm(2)

SetRuleSource(1)

SetRuleCount(1)

SetSparseFeatures(1)

GetStringRep(1)

Set(1)

ReadOtherInfoFromFile(1)

ReadFromFile(1)

Append(1)

GetScores(1)

GetProperty(1)

GetNumTerminals(1)

GetContainer(1)

CreateCountInfo(1)

Create1AlignFromString(1)

SwapWords(1)

コード例 #1

ファイルを表示

ファイル: LoaderStandard.cpp プロジェクト: Avmb/mosesdecoder

bool RuleTableLoaderStandard::Load(FormatType format
                                , const std::vector<FactorType> &input
                                , const std::vector<FactorType> &output
                                , const std::string &inFile
                                , const std::vector<float> &weight
                                , size_t /* tableLimit */
                                , const LMList &languageModels
                                , const WordPenaltyProducer* wpProducer
                                , RuleTableTrie &ruleTable)
{
  PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");

  const StaticData &staticData = StaticData::Instance();
  const std::string& factorDelimiter = staticData.GetFactorDelimiter();

  string lineOrig;
  size_t count = 0;

  std::ostream *progress = NULL;
  IFVERBOSE(1) progress = &std::cerr;
  util::FilePiece in(inFile.c_str(), progress);

  // reused variables
  vector<float> scoreVector;
  StringPiece line;
  std::string hiero_before, hiero_after;

  while(true) {
    try {
      line = in.ReadLine();
    } catch (const util::EndOfFileException &e) { break; }

    if (format == HieroFormat) { // inefficiently reformat line
      hiero_before.assign(line.data(), line.size());
      ReformatHieroRule(hiero_before, hiero_after);
      line = hiero_after;
    }

    util::TokenIter<util::MultiCharacter> pipes(line, "|||");
    StringPiece sourcePhraseString(*pipes);
    StringPiece targetPhraseString(*++pipes);
    StringPiece scoreString(*++pipes);
    StringPiece alignString(*++pipes);
    // TODO(bhaddow) efficiently handle default instead of parsing this string every time.  
    StringPiece ruleCountString = ++pipes ? *pipes : StringPiece("1 1");
    
    if (++pipes) {
      stringstream strme;
      strme << "Syntax error at " << ruleTable.GetFilePath() << ":" << count;
      UserMessage::Add(strme.str());
      abort();
    }

    bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
    if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
      TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
      continue;
    }

    scoreVector.clear();
    for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
      char *err_ind;
      scoreVector.push_back(strtod(s->data(), &err_ind));
      UTIL_THROW_IF(err_ind == s->data(), util::Exception, "Bad score " << *s << " on line " << count);
    }
    const size_t numScoreComponents = ruleTable.GetFeature()->GetNumScoreComponents();
    if (scoreVector.size() != numScoreComponents) {
      stringstream strme;
      strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
            << numScoreComponents << ") of score components on line " << count;
      UserMessage::Add(strme.str());
      abort();
    }

    // parse source & find pt node

    // constituent labels
    Word sourceLHS, targetLHS;

    // source
    Phrase sourcePhrase( 0);
    sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);

    // create target phrase obj
    TargetPhrase *targetPhrase = new TargetPhrase(Output);
    targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
    targetPhrase->SetSourcePhrase(sourcePhrase);

    // rest of target phrase
    targetPhrase->SetAlignmentInfo(alignString, sourcePhrase);
    targetPhrase->SetTargetLHS(targetLHS);
    
    targetPhrase->SetRuleCount(ruleCountString, scoreVector[0]);
    //targetPhrase->SetDebugOutput(string("New Format pt ") + line);
    
    // component score, for n-best output
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);

    targetPhrase->SetScoreChart(ruleTable.GetFeature(), scoreVector, weight, languageModels,wpProducer);

    TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
    phraseColl.Add(targetPhrase);

    count++;
  }

  // sort and prune each target phrase collection
  SortAndPrune(ruleTable);

  return true;
}

コード例 #2

ファイルを表示

  void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
  {
    char dirName[] = "/tmp/moses.XXXXXX";
    char *temp = mkdtemp(dirName);
    CHECK(temp);
    string dirNameStr(dirName);
    
    string inFileName(dirNameStr + "/in");
    
    ofstream inFile(inFileName.c_str());
    
    for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i)
    {
      inFile << inputSentence.GetWord(i);
    }
    inFile << endl;
    inFile.close();
        
    long translationId = inputSentence.GetTranslationId();
    string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr);

    // populate with rules for this sentence
    PhraseDictionaryNodeSCFG &rootNode = m_collection[translationId];
    FormatType format = MosesFormat;
        
    // data from file
    InputFileStream inStream(ptFileName);
    
    // copied from class LoaderStandard
    PrintUserTime("Start loading fuzzy-match phrase model");
    
    const StaticData &staticData = StaticData::Instance();
    const std::string& factorDelimiter = staticData.GetFactorDelimiter();
    
    
    string lineOrig;
    size_t count = 0;
    
    while(getline(inStream, lineOrig)) {  //mgjang std add
      const string *line;
      if (format == HieroFormat) { // reformat line
        assert(false);
        //line = ReformatHieroRule(lineOrig);
      }
      else
      { // do nothing to format of line
        line = &lineOrig;
      }
      
      vector<string> tokens;
      vector<float> scoreVector;
      
      TokenizeMultiCharSeparator(tokens, *line , "|||" );
      
      if (tokens.size() != 4 && tokens.size() != 5) {
        stringstream strme;
        strme << "Syntax error at " << ptFileName << ":" << count;
        UserMessage::Add(strme.str());
		LOGE("[mgjang] before abort\n");
        abort();
      }
      
      const string &sourcePhraseString = tokens[0]
      , &targetPhraseString = tokens[1]
      , &scoreString        = tokens[2]
      , &alignString        = tokens[3];
      
      bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
      if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
        TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
        continue;
      }
      
      Tokenize<float>(scoreVector, scoreString);
      const size_t numScoreComponents = GetFeature()->GetNumScoreComponents();
      if (scoreVector.size() != numScoreComponents) {
        stringstream strme;
        strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
        << numScoreComponents << ") of score components on line " << count;
        UserMessage::Add(strme.str());
		LOGE("[mgjang] before abort\n");
        abort();
      }
      CHECK(scoreVector.size() == numScoreComponents);
      
      // parse source & find pt node
      
      // constituent labels
      Word sourceLHS, targetLHS;
      
      // source
      Phrase sourcePhrase( 0);
      sourcePhrase.CreateFromStringNewFormat(Input, *m_input, sourcePhraseString, factorDelimiter, sourceLHS);
      
      // create target phrase obj
      TargetPhrase *targetPhrase = new TargetPhrase();
      targetPhrase->CreateFromStringNewFormat(Output, *m_output, targetPhraseString, factorDelimiter, targetLHS);
      
      // rest of target phrase
      targetPhrase->SetAlignmentInfo(alignString);
      targetPhrase->SetTargetLHS(targetLHS);
      //targetPhrase->SetDebugOutput(string("New Format pt ") + line);
      
      // component score, for n-best output
      std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
      std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
      
      targetPhrase->SetScoreChart(GetFeature(), scoreVector, *m_weight, *m_languageModels, m_wpProducer);
      
      TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
      phraseColl.Add(targetPhrase);
      
      count++;
      
      if (format == HieroFormat) { // reformat line
        delete line;
      }
      else
      { // do nothing
      }
      
    }
    
    // sort and prune each target phrase collection
    SortAndPrune(rootNode);
   
    //removedirectoryrecursively(dirName);
  }

コード例 #3

ファイルを表示

ファイル: PhraseDictionaryNewFormat.cpp プロジェクト: svetakrasikova/ADSKMosesTraining

bool PhraseDictionaryNewFormat::Load(const std::vector<FactorType> &input
																			 , const std::vector<FactorType> &output
																			 , std::istream &inStream
																			 , const std::vector<float> &weight
																			 , size_t tableLimit
																			 , const LMList &languageModels
																			 , float weightWP)
{
	PrintUserTime("Start loading new format pt model");
	
	const StaticData &staticData = StaticData::Instance();
	const std::string& factorDelimiter = staticData.GetFactorDelimiter();
	
	VERBOSE(2,"PhraseDictionaryNewFormat: input=" << m_inputFactors << "  output=" << m_outputFactors << std::endl);
	
	string line;
	size_t count = 0;
	
	while(getline(inStream, line))
	{
		vector<string> tokens;
		vector<float> scoreVector;
		
		TokenizeMultiCharSeparator(tokens, line , "|||" );
					
		if (tokens.size() != 4 && tokens.size() != 5)
		{
			stringstream strme;
			strme << "Syntax error at " << m_filePath << ":" << count;
			UserMessage::Add(strme.str());
			abort();
		}
		
		const string &sourcePhraseString	= tokens[0]
								, &targetPhraseString	= tokens[1]
								, &alignString				= tokens[2]
								, &scoreString				= tokens[3];

		bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
		if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
			TRACE_ERR( m_filePath << ":" << count << ": pt entry contains empty target, skipping\n");
			continue;
		}
		
		Tokenize<float>(scoreVector, scoreString);
		if (scoreVector.size() != m_numScoreComponent)
		{
			stringstream strme;
			strme << "Size of scoreVector != number (" <<scoreVector.size() << "!=" <<m_numScoreComponent<<") of score components on line " << count;
			UserMessage::Add(strme.str());
			abort();
		}
		assert(scoreVector.size() == m_numScoreComponent);
		
		// parse source & find pt node
		
		// head word
		Word sourceLHS, targetLHS;

		// source
		Phrase sourcePhrase(Input);
		sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);
		
		// create target phrase obj
		TargetPhrase *targetPhrase = new TargetPhrase(Output);
		targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);
		
		// alignment
		list<pair<size_t,size_t> > alignmentInfo;
		CreateAlignmentInfo(alignmentInfo, alignString);

		// rest of target phrase
		targetPhrase->SetAlignmentInfo(alignmentInfo);
		targetPhrase->SetTargetLHS(targetLHS);
		//targetPhrase->SetDebugOutput(string("New Format pt ") + line);
		
		// component score, for n-best output
		std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
		std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
		
		targetPhrase->SetScoreChart(GetFeature(), scoreVector, weight, languageModels);
		
		// count info for backoff
		if (tokens.size() >= 6)
			targetPhrase->CreateCountInfo(tokens[5]);

		TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(sourcePhrase, *targetPhrase);
		AddEquivPhrase(phraseColl, targetPhrase);
		
		count++;
	}
	
	// cleanup cache
	
	// sort each target phrase collection
	m_collection.Sort(m_tableLimit);
	
	return true;
}

コード例 #4

ファイルを表示

ファイル: LoaderStandard.cpp プロジェクト: arianna-bis/mosesdecoder

bool RuleTableLoaderStandard::Load(FormatType format
                                , const std::vector<FactorType> &input
                                , const std::vector<FactorType> &output
                                , std::istream &inStream
                                , const std::vector<float> &weight
                                , size_t /* tableLimit */
                                , const LMList &languageModels
                                , const WordPenaltyProducer* wpProducer
                                , RuleTableTrie &ruleTable)
{
  PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");

  const StaticData &staticData = StaticData::Instance();
  const std::string& factorDelimiter = staticData.GetFactorDelimiter();


  string lineOrig;
  size_t count = 0;

  while(getline(inStream, lineOrig)) {
    const string *line;
    if (format == HieroFormat) { // reformat line
      line = ReformatHieroRule(lineOrig);
    }
    else
    { // do nothing to format of line
      line = &lineOrig;
    }
    
    vector<string> tokens;
    vector<float> scoreVector;

    TokenizeMultiCharSeparator(tokens, *line , "|||" );

    if (tokens.size() != 4 && tokens.size() != 5) {
      stringstream strme;
      strme << "Syntax error at " << ruleTable.GetFilePath() << ":" << count;
      UserMessage::Add(strme.str());
      abort();
    }

    const string &sourcePhraseString = tokens[0]
               , &targetPhraseString = tokens[1]
               , &scoreString        = tokens[2]
               , &alignString        = tokens[3];

    bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
    if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
      TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
      continue;
    }

    Tokenize<float>(scoreVector, scoreString);
    const size_t numScoreComponents = ruleTable.GetFeature()->GetNumScoreComponents();
    if (scoreVector.size() != numScoreComponents) {
      stringstream strme;
      strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
            << numScoreComponents << ") of score components on line " << count;
      UserMessage::Add(strme.str());
      abort();
    }
    CHECK(scoreVector.size() == numScoreComponents);

    // parse source & find pt node

    // constituent labels
    Word sourceLHS, targetLHS;

    // source
    Phrase sourcePhrase( 0);
    sourcePhrase.CreateFromStringNewFormat(Input, input, sourcePhraseString, factorDelimiter, sourceLHS);

    // create target phrase obj
    TargetPhrase *targetPhrase = new TargetPhrase(Output);
    targetPhrase->CreateFromStringNewFormat(Output, output, targetPhraseString, factorDelimiter, targetLHS);

    // rest of target phrase
    targetPhrase->SetAlignmentInfo(alignString);
    targetPhrase->SetTargetLHS(targetLHS);
    //targetPhrase->SetDebugOutput(string("New Format pt ") + line);

    // component score, for n-best output
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);

    targetPhrase->SetScoreChart(ruleTable.GetFeature(), scoreVector, weight, languageModels,wpProducer);

    TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
    phraseColl.Add(targetPhrase);

    count++;

    if (format == HieroFormat) { // reformat line
      delete line;
    }
    else
    { // do nothing
    }

  }

  // sort and prune each target phrase collection
  SortAndPrune(ruleTable);

  return true;
}