int StreamTokenizer::getTokens(SeekableReadStream &stream, std::vector<UString> &list, int min, int max, const UString &def) { assert((min >= 0) && ((max == -1) || (max >= min))); list.clear(); list.reserve(min); int realTokenCount; for (realTokenCount = 0; !isChunkEnd(stream) && ((max < 0) || (realTokenCount < max)); realTokenCount++) { UString token = getToken(stream); if (!token.empty() || (_conSepRule != kRuleIgnoreAll)) list.push_back(token); } while (list.size() < ((uint32) min)) list.push_back(def); return realTokenCount; }
size_t StreamTokenizer::getTokens(SeekableReadStream &stream, std::vector<UString> &list, size_t min, size_t max, const UString &def) { assert(max >= min); list.clear(); list.reserve(min); size_t realTokenCount = 0; while (!isChunkEnd(stream) && (realTokenCount < max)) { UString token = getToken(stream); if (!token.empty() || (_conSepRule != kRuleIgnoreAll)) { list.push_back(token); realTokenCount++; } } while (list.size() < min) list.push_back(def); return realTokenCount; }
void getSegments(int* label, int length, std::vector<Segment>& segments) { segments.clear(); segments.reserve(length); int chunkStart = 0; bool inChunk = false; int tag = -1; int type = otherChunkType_; for (int i = 0; i < length; ++i) { int prevTag = tag; int prevType = type; CHECK_LE(label[i], numChunkTypes_ * numTagTypes_); tag = label[i] % numTagTypes_; type = label[i] / numTagTypes_; if (inChunk && isChunkEnd(prevTag, prevType, tag, type)) { Segment segment{ chunkStart, // begin i - 1, // end prevType, }; segments.push_back(segment); inChunk = false; } if (isChunkBegin(prevTag, prevType, tag, type)) { chunkStart = i; inChunk = true; } } if (inChunk) { Segment segment{ chunkStart, // begin length - 1, // end type, }; segments.push_back(segment); } }