void Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc) { size_t scoreInd = 0; // MAIN LOOP size_t stage = 0; /* 0 = source phrase 1 = target phrase 2 = scores 3 = align 4 = count */ char *tok = strtok (line," "); while (tok != NULL) { if (0 == strcmp(tok, "|||")) { ++stage; } else { switch (stage) { case 0: { Tokenize(sourcePhrase, tok, true, true, onDiskWrapper); break; } case 1: { Tokenize(targetPhrase, tok, false, true, onDiskWrapper); break; } case 2: { float score = Moses::Scan<float>(tok); targetPhrase.SetScore(score, scoreInd); ++scoreInd; break; } case 3: { targetPhrase.Create1AlignFromString(tok); break; } case 4: ++stage; break; case 5: { // count info. Only store the 2nd one float val = Moses::Scan<float>(tok); misc[0] = val; ++stage; break; } default: assert(false); break; } } tok = strtok (NULL, " "); } // while (tok != NULL) assert(scoreInd == numScores); targetPhrase.SortAlign(); } // Tokenize()