コード例 #1
0
ファイル: Main.cpp プロジェクト: mibamur/mosesdecoder
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
    size_t scoreInd = 0;

    // MAIN LOOP
    size_t stage = 0;
    /*	0 = source phrase
     1 = target phrase
     2 = scores
     3 = align
     4 = count
     */
    char *tok = strtok (line," ");
    OnDiskPt::PhrasePtr out(new Phrase());
    while (tok != NULL) {
        if (0 == strcmp(tok, "|||")) {
            ++stage;
        } else {
            switch (stage) {
            case 0: {
                WordPtr w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
                if (w != NULL)
                    out->AddWord(w);

                break;
            }
            case 1: {
                Tokenize(targetPhrase, tok, false, true, onDiskWrapper);
                break;
            }
            case 2: {
                float score = Moses::Scan<float>(tok);
                targetPhrase.SetScore(score, scoreInd);
                ++scoreInd;
                break;
            }
            case 3: {
                //targetPhrase.Create1AlignFromString(tok);
                targetPhrase.CreateAlignFromString(tok);
                break;
            }
            case 4:
                ++stage;
                break;
            /*      case 5: {
                // count info. Only store the 2nd one
                float val = Moses::Scan<float>(tok);
                misc[0] = val;
                ++stage;
                break;
            }*/
            case 5: {
                // count info. Only store the 2nd one
                //float val = Moses::Scan<float>(tok);
                //misc[0] = val;
                ++stage;
                break;
            }
            case 6: {
                // store only the 3rd one (rule count)
                float val = Moses::Scan<float>(tok);
                misc[0] = val;
                ++stage;
                break;
            }
            default:
                assert(false);
                break;
            }
        }

        tok = strtok (NULL, " ");
    } // while (tok != NULL)

    assert(scoreInd == numScores);
    targetPhrase.SortAlign();
    return out;
} // Tokenize()
コード例 #2
0
ファイル: Main.cpp プロジェクト: EktaGupta28/mosesdecoder
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, const std::string &lineStr, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
  char line[lineStr.size() + 1];
  strcpy(line, lineStr.c_str());

  stringstream sparseFeatures, property;

  size_t scoreInd = 0;

  // MAIN LOOP
  size_t stage = 0;
  /*	0 = source phrase
   1 = target phrase
   2 = scores
   3 = align
   4 = count
   7 = properties
   */
  char *tok = strtok (line," ");
  OnDiskPt::PhrasePtr out(new Phrase());
  while (tok != NULL) {
    if (0 == strcmp(tok, "|||")) {
      ++stage;
    } else {
      switch (stage) {
      case 0: {
        WordPtr w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper, 1);
        if (w != NULL)
          out->AddWord(w);

        break;
      }
      case 1: {
        Tokenize(targetPhrase, tok, false, true, onDiskWrapper, 0);
        break;
      }
      case 2: {
        float score = Moses::Scan<float>(tok);
        targetPhrase.SetScore(score, scoreInd);
        ++scoreInd;
        break;
      }
      case 3: {
        //targetPhrase.Create1AlignFromString(tok);
        targetPhrase.CreateAlignFromString(tok);
        break;
      }
      case 4: {
        // store only the 3rd one (rule count)
        float val = Moses::Scan<float>(tok);
        misc[0] = val;
        break;
      }
      case 5: {
        // sparse features
        sparseFeatures << tok << " ";
        break;
      }
      case 6: {
        property << tok << " ";
        break;
      }
      default:
        cerr << "ERROR in line " << line << endl;
        assert(false);
        break;
      }
    }

    tok = strtok (NULL, " ");
  } // while (tok != NULL)

  assert(scoreInd == numScores);
  targetPhrase.SetSparseFeatures(Moses::Trim(sparseFeatures.str()));
  targetPhrase.SetProperty(Moses::Trim(property.str()));
  targetPhrase.SortAlign();
  return out;
} // Tokenize()