void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
{
	AlignmentInfo::CollType alignTerm, alignNonTerm;
  for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) {
    util::TokenIter<util::SingleCharacter, false> dash(*token, util::SingleCharacter('-'));

    char *endptr;
    size_t sourcePos = strtoul(dash->data(), &endptr, 10);
    UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
    ++dash;
    size_t targetPos = strtoul(dash->data(), &endptr, 10);
    UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
    UTIL_THROW_IF(++dash, util::Exception, "Extra gunk in alignment " << *token);


    if (GetWord(targetPos).IsNonTerminal()) {
    	alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
    }
  	else {
  		alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
  	}
  }
  SetAlignTerm(alignTerm);
  SetAlignNonTerm(alignNonTerm);

}
Ejemplo n.º 2
0
void TargetPhraseImpl::SetAlignmentInfo(const std::string &alignString)
{
  AlignmentInfo::CollType alignTerm, alignNonTerm;

  vector<string> toks = Tokenize(alignString);
  for (size_t i = 0; i < toks.size(); ++i) {
    vector<size_t> alignPair = Tokenize<size_t>(toks[i], "-");
    UTIL_THROW_IF2(alignPair.size() != 2, "Wrong alignment format");

    size_t sourcePos = alignPair[0];
    size_t targetPos = alignPair[1];

    if ((*this)[targetPos].isNonTerminal) {
      alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
    } else {
      alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
    }
  }

  SetAlignTerm(alignTerm);
  SetAlignNonTerm(alignNonTerm);
  //    cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";

  //cerr << "alignTerm=" << alignTerm.size() << endl;
  //cerr << "alignNonTerm=" << alignNonTerm.size() << endl;

}