void StreamTokenizer::nextChunk(SeekableReadStream &stream) { skipChunk(stream); uint32 c = stream.readChar(); if (c == ReadStream::kEOF) return; if (!isIn(c, _chunkEnds)) stream.seek(-1, SeekableReadStream::kOriginCurrent); }
bool StreamTokenizer::isChunkEnd(SeekableReadStream &stream) { uint32 c = stream.readChar(); if (c == ReadStream::kEOF) return true; bool chunkEnd = isIn(c, _chunkEnds); stream.seek(-1, SeekableReadStream::kOriginCurrent); return chunkEnd; }
void StreamTokenizer::skipChunk(SeekableReadStream &stream) { assert(!_chunkEnds.empty()); uint32 c; while ((c = stream.readChar()) != ReadStream::kEOF) { if (isIn(c, _chunkEnds)) { stream.seek(-1, SeekableReadStream::kOriginCurrent); break; } } }
UString StreamTokenizer::getToken(SeekableReadStream &stream) { // Init bool chunkEnd = false; bool inQuote = false; uint32 separator = 0xFFFFFFFF; UString token; // Run through the stream, character by character uint32 c; while ((c = stream.readChar()) != ReadStream::kEOF) { if (isIn(c, _chunkEnds)) { // This is a end character, seek back and break stream.seek(-1, SeekableReadStream::kOriginCurrent); chunkEnd = true; break; } if (isIn(c, _quotes)) { // This is a quote character, set state inQuote = !inQuote; continue; } if (!inQuote && isIn(c, _separators)) { // We're not in a quote and this is a separator if (!token.empty()) { // We have a token separator = c; break; } // We don't yet have a token, let the consecutive separator rule decide what to do if (_conSepRule == kRuleHeed) { // We heed every separator separator = c; break; } if ((_conSepRule == kRuleIgnoreSame) && (separator != 0xFFFFFFFF) && (separator != c)) { // We ignore only consecutive separators that are the same separator = c; break; } // We ignore all consecutive separators separator = c; continue; } if (isIn(c, _ignores)) // This is a character to be ignored, do so continue; // A normal character, add it to our token token += c; } // Is the string actually empty? if (!token.empty() && (*token.begin() == '\0')) token.clear(); if (!chunkEnd && (_conSepRule != kRuleHeed)) { // We have to look for consecutive separators while ((c = stream.readChar()) != ReadStream::kEOF) { // Use the rule to determine when we should abort skipping consecutive separators if (((_conSepRule == kRuleIgnoreSame) && (c != separator)) || ((_conSepRule == kRuleIgnoreAll ) && !isIn(c, _separators))) { stream.seek(-1, SeekableReadStream::kOriginCurrent); break; } } } // And return the token return token; }