コード例 #1
0
ファイル: Main.cpp プロジェクト: Deseaus/mosesdecoder
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, const std::string &lineStr, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
  char line[lineStr.size() + 1];
  strcpy(line, lineStr.c_str());

  stringstream sparseFeatures, property;

  size_t scoreInd = 0;

  // MAIN LOOP
  size_t stage = 0;
  /*	0 = source phrase
   1 = target phrase
   2 = scores
   3 = align
   4 = count
   7 = properties
   */
  char *tok = strtok (line," ");
  OnDiskPt::PhrasePtr out(new Phrase());
  while (tok != NULL) {
    if (0 == strcmp(tok, "|||")) {
      ++stage;
    } else {
      switch (stage) {
      case 0: {
        WordPtr w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper, 1);
        if (w != NULL)
          out->AddWord(w);

        break;
      }
      case 1: {
        Tokenize(targetPhrase, tok, false, true, onDiskWrapper, 0);
        break;
      }
      case 2: {
        float score = Moses::Scan<float>(tok);
        targetPhrase.SetScore(score, scoreInd);
        ++scoreInd;
        break;
      }
      case 3: {
        //targetPhrase.Create1AlignFromString(tok);
        targetPhrase.CreateAlignFromString(tok);
        break;
      }
      case 4: {
        // store only the 3rd one (rule count)
        float val = Moses::Scan<float>(tok);
        misc[0] = val;
        break;
      }
      case 5: {
        // sparse features
        sparseFeatures << tok << " ";
        break;
      }
      case 6: {
        property << tok << " ";
        break;
      }
      default:
        cerr << "ERROR in line " << line << endl;
        assert(false);
        break;
      }
    }

    tok = strtok (NULL, " ");
  } // while (tok != NULL)

  assert(scoreInd == numScores);
  targetPhrase.SetSparseFeatures(Moses::Trim(sparseFeatures.str()));
  targetPhrase.SetProperty(Moses::Trim(property.str()));
  targetPhrase.SortAlign();
  return out;
} // Tokenize()
コード例 #2
0
ファイル: Main.cpp プロジェクト: A30041839/mosesdecoder
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
  size_t scoreInd = 0;

  // MAIN LOOP
  size_t stage = 0;
  /*	0 = source phrase
   1 = target phrase
   2 = scores
   3 = align
   4 = count
   */
  char *tok = strtok (line," ");
  OnDiskPt::PhrasePtr out(new Phrase());
  while (tok != NULL) {
    if (0 == strcmp(tok, "|||")) {
      ++stage;
    } else {
      switch (stage) {
      case 0: {
        WordPtr w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper, 1);
        if (w != NULL)
          out->AddWord(w);

        break;
      }
      case 1: {
        Tokenize(targetPhrase, tok, false, true, onDiskWrapper, 0);
        break;
      }
      case 2: {
        float score = Moses::Scan<float>(tok);
        targetPhrase.SetScore(score, scoreInd);
        ++scoreInd;
        break;
      }
      case 3: {
        //targetPhrase.Create1AlignFromString(tok);
        targetPhrase.CreateAlignFromString(tok);
        break;
      }
      case 4:
        ++stage;
        break;
        /*      case 5: {
              // count info. Only store the 2nd one
              float val = Moses::Scan<float>(tok);
              misc[0] = val;
              ++stage;
              break;
        }*/
      case 5: {
        // count info. Only store the 2nd one
        //float val = Moses::Scan<float>(tok);
        //misc[0] = val;
        ++stage;
        break;
      }
      case 6: {
        // store only the 3rd one (rule count)
        float val = Moses::Scan<float>(tok);
        misc[0] = val;
        ++stage;
        break;
      }
      default:
        cerr << "ERROR in line " << line << endl;
        assert(false);
        break;
      }
    }

    tok = strtok (NULL, " ");
  } // while (tok != NULL)

  assert(scoreInd == numScores);
  targetPhrase.SortAlign();
  return out;
} // Tokenize()
コード例 #3
0
ファイル: Main.cpp プロジェクト: obo/Moses-Extensions-at-UFAL
void Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
	size_t scoreInd = 0;
	
	// MAIN LOOP
	size_t stage = 0;
	/*	0 = source phrase
	 1 = target phrase
	 2 = scores
	 3 = align
	 4 = count
	 */
	char *tok = strtok (line," ");
	while (tok != NULL)
	{
		if (0 == strcmp(tok, "|||"))
		{
			++stage;
		}
		else
		{
			switch (stage)
			{
				case 0:
				{
					Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
					break;
				}
				case 1:
				{
					Tokenize(targetPhrase, tok, false, true, onDiskWrapper);
					break;
				}
				case 2:
				{
					float score = Moses::Scan<float>(tok);
					targetPhrase.SetScore(score, scoreInd);
					++scoreInd;
					break;
				}
				case 3:
				{
					targetPhrase.Create1AlignFromString(tok);
					break;
				}
				case 4:
					++stage;
					break;
				case 5:					
				{ // count info. Only store the 2nd one
					float val = Moses::Scan<float>(tok);
					misc[0] = val;
					++stage;
					break;
				}
				default:
					assert(false);
					break;
			}
		}
		
		tok = strtok (NULL, " ");
	} // while (tok != NULL)
	
	assert(scoreInd == numScores);
	targetPhrase.SortAlign();
	
} // Tokenize()