void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) { AlignmentInfo::CollType alignTerm, alignNonTerm; for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) { util::TokenIter<util::SingleCharacter, false> dash(*token, util::SingleCharacter('-')); char *endptr; size_t sourcePos = strtoul(dash->data(), &endptr, 10); UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); ++dash; size_t targetPos = strtoul(dash->data(), &endptr, 10); UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash); UTIL_THROW_IF(++dash, util::Exception, "Extra gunk in alignment " << *token); if (GetWord(targetPos).IsNonTerminal()) { alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } else { alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } } SetAlignTerm(alignTerm); SetAlignNonTerm(alignNonTerm); }
void TargetPhraseImpl::SetAlignmentInfo(const std::string &alignString) { AlignmentInfo::CollType alignTerm, alignNonTerm; vector<string> toks = Tokenize(alignString); for (size_t i = 0; i < toks.size(); ++i) { vector<size_t> alignPair = Tokenize<size_t>(toks[i], "-"); UTIL_THROW_IF2(alignPair.size() != 2, "Wrong alignment format"); size_t sourcePos = alignPair[0]; size_t targetPos = alignPair[1]; if ((*this)[targetPos].isNonTerminal) { alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } else { alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos)); } } SetAlignTerm(alignTerm); SetAlignNonTerm(alignNonTerm); // cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n"; //cerr << "alignTerm=" << alignTerm.size() << endl; //cerr << "alignNonTerm=" << alignNonTerm.size() << endl; }