Exemplo n.º 1
0
std::vector<uint64_t> getVocabIDs(const StringPiece &textin)
{
  //Tokenize
  std::vector<uint64_t> output;

  util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));

  while (itWord) {
	StringPiece word = *itWord;
	uint64_t id = 0;

	util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
    while (itFactor) {
    	StringPiece factor = *itFactor;
    	//cerr << "factor=" << factor << endl;

    	id += getHash(factor);
        itFactor++;
    }

    output.push_back(id);
    itWord++;
  }

  return output;
}
Exemplo n.º 2
0
void add_to_map(StoreVocab<uint64_t> &sourceVocab,
                const StringPiece &textin)
{
  //Tokenize
  util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));

  while (itWord) {
    StringPiece word = *itWord;

    util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
    while (itFactor) {
      StringPiece factor = *itFactor;

      sourceVocab.Insert(getHash(factor), factor.as_string());
      itFactor++;
    }
    itWord++;
  }
}