void JsonStreamingParser::parse(char c) { //System.out.print(c); // valid whitespace characters in JSON (from RFC4627 for JSON) include: // space, horizontal tab, line feed or new line, and carriage return. // thanks: // http://stackoverflow.com/questions/16042274/definition-of-whitespace-in-json if ((c == ' ' || c == '\t' || c == '\n' || c == '\r') && !(state == STATE_IN_STRING || state == STATE_UNICODE || state == STATE_START_ESCAPE || state == STATE_IN_NUMBER || state == STATE_DONE)) { return; } switch (state) { case STATE_IN_STRING: if (c == '"') { endString(); } else if (c == '\\') { state = STATE_START_ESCAPE; } else if ((c < 0x1f) || (c == 0x7f)) { //throw new RuntimeException("Unescaped control character encountered: " + c + " at position" + characterCounter); } else { buffer[bufferPos] = c; bufferPos++; } break; case STATE_IN_ARRAY: if (c == ']') { endArray(); } else { startValue(c); } break; case STATE_IN_OBJECT: if (c == '}') { endObject(); } else if (c == '"') { startKey(); } else { //throw new RuntimeException("Start of string expected for object key. Instead got: " + c + " at position" + characterCounter); } break; case STATE_END_KEY: if (c != ':') { //throw new RuntimeException("Expected ':' after key. Instead got " + c + " at position" + characterCounter); } state = STATE_AFTER_KEY; break; case STATE_AFTER_KEY: startValue(c); break; case STATE_START_ESCAPE: processEscapeCharacters(c); break; case STATE_UNICODE: processUnicodeCharacter(c); break; case STATE_UNICODE_SURROGATE: unicodeEscapeBuffer[unicodeEscapeBufferPos] = c; unicodeEscapeBufferPos++; if (unicodeEscapeBufferPos == 2) { endUnicodeSurrogateInterstitial(); } break; case STATE_AFTER_VALUE: { // not safe for size == 0!!! int within = stack[stackPos - 1]; if (within == STACK_OBJECT) { if (c == '}') { endObject(); } else if (c == ',') { state = STATE_IN_OBJECT; } else { //throw new RuntimeException("Expected ',' or '}' while parsing object. Got: " + c + ". " + characterCounter); } } else if (within == STACK_ARRAY) { if (c == ']') { endArray(); } else if (c == ',') { state = STATE_IN_ARRAY; } else { //throw new RuntimeException("Expected ',' or ']' while parsing array. Got: " + c + ". " + characterCounter); } } else { //throw new RuntimeException("Finished a literal, but unclear what state to move to. Last state: " + characterCounter); } }break; case STATE_IN_NUMBER: if (c >= '0' && c <= '9') { buffer[bufferPos] = c; bufferPos++; } else if (c == '.') { if (doesCharArrayContain(buffer, bufferPos, '.')) { //throw new RuntimeException("Cannot have multiple decimal points in a number. " + characterCounter); } else if (doesCharArrayContain(buffer, bufferPos, 'e')) { //throw new RuntimeException("Cannot have a decimal point in an exponent." + characterCounter); } buffer[bufferPos] = c; bufferPos++; } else if (c == 'e' || c == 'E') { if (doesCharArrayContain(buffer, bufferPos, 'e')) { //throw new RuntimeException("Cannot have multiple exponents in a number. " + characterCounter); } buffer[bufferPos] = c; bufferPos++; } else if (c == '+' || c == '-') { char last = buffer[bufferPos - 1]; if (!(last == 'e' || last == 'E')) { //throw new RuntimeException("Can only have '+' or '-' after the 'e' or 'E' in a number." + characterCounter); } buffer[bufferPos] = c; bufferPos++; } else { endNumber(); // we have consumed one beyond the end of the number parse(c); } break; case STATE_IN_TRUE: buffer[bufferPos] = c; bufferPos++; if (bufferPos == 4) { endTrue(); } break; case STATE_IN_FALSE: buffer[bufferPos] = c; bufferPos++; if (bufferPos == 5) { endFalse(); } break; case STATE_IN_NULL: buffer[bufferPos] = c; bufferPos++; if (bufferPos == 4) { endNull(); } break; case STATE_DONE: myListener->startDocument(); if (c == '[') { startArray(); } else if (c == '{') { startObject(); } else { // throw new ParsingError($this->_line_number, // $this->_char_number, // "Document must start with object or array."); } break; //case STATE_DONE: // throw new ParsingError($this->_line_number, $this->_char_number, // "Expected end of document."); //default: // throw new ParsingError($this->_line_number, $this->_char_number, // "Internal error. Reached an unknown state: ".$this->_state); } characterCounter++; }
bool RtfReader::parseDocument() { enum { READ_NORMAL_DATA, READ_BINARY_DATA, READ_HEX_SYMBOL, READ_KEYWORD, READ_KEYWORD_PARAMETER, READ_END_OF_FILE } parserState = READ_NORMAL_DATA; std::string keyword; std::string parameterString; std::string hexString; int imageStartOffset = -1; while (!myIsInterrupted) { const char *ptr = myStreamBuffer; const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize); if (ptr == end) { break; } const char *dataStart = ptr; bool readNextChar = true; while (ptr != end) { switch (parserState) { case READ_END_OF_FILE: if (*ptr != '}' && !std::isspace(*ptr)) { return false; } break; case READ_BINARY_DATA: // TODO: optimize processCharData(ptr, 1); --myBinaryDataSize; if (myBinaryDataSize == 0) { parserState = READ_NORMAL_DATA; } break; case READ_NORMAL_DATA: switch (*ptr) { case '{': if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; myStateStack.push(myState); myState.ReadDataAsHex = false; break; case '}': { if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; if (imageStartOffset >= 0) { if (!myNextImageMimeType.empty()) { const int imageSize = myStream->offset() + (ptr - end) - imageStartOffset; insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize); } imageStartOffset = -1; } if (myStateStack.empty()) { parserState = READ_END_OF_FILE; break; } if (myState.Destination != myStateStack.top().Destination) { switchDestination(myState.Destination, false); switchDestination(myStateStack.top().Destination, true); } bool oldItalic = myState.Italic; bool oldBold = myState.Bold; bool oldUnderlined = myState.Underlined; ZLTextAlignmentType oldAlignment = myState.Alignment; myState = myStateStack.top(); myStateStack.pop(); if (myState.Italic != oldItalic) { setFontProperty(RtfReader::FONT_ITALIC); } if (myState.Bold != oldBold) { setFontProperty(RtfReader::FONT_BOLD); } if (myState.Underlined != oldUnderlined) { setFontProperty(RtfReader::FONT_UNDERLINED); } if (myState.Alignment != oldAlignment) { setAlignment(); } break; } case '\\': if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; keyword.erase(); parserState = READ_KEYWORD; break; case 0x0d: case 0x0a: // cr and lf are noise characters... if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; break; default: if (myState.ReadDataAsHex) { if (imageStartOffset == -1) { imageStartOffset = myStream->offset() + (ptr - end); } } break; } break; case READ_HEX_SYMBOL: hexString += *ptr; if (hexString.size() == 2) { char ch = std::strtol(hexString.c_str(), 0, 16); hexString.erase(); processCharData(&ch, 1); parserState = READ_NORMAL_DATA; dataStart = ptr + 1; } break; case READ_KEYWORD: if (!std::isalpha(*ptr)) { if (ptr == dataStart && keyword.empty()) { if (*ptr == '\'') { parserState = READ_HEX_SYMBOL; } else { keyword = *ptr; processKeyword(keyword); parserState = READ_NORMAL_DATA; } dataStart = ptr + 1; } else { keyword.append(dataStart, ptr - dataStart); if (*ptr == '-' || std::isdigit(*ptr)) { dataStart = ptr; parserState = READ_KEYWORD_PARAMETER; } else { readNextChar = *ptr == ' '; processKeyword(keyword); parserState = READ_NORMAL_DATA; dataStart = readNextChar ? ptr + 1 : ptr; } } } break; case READ_KEYWORD_PARAMETER: if (!std::isdigit(*ptr)) { parameterString.append(dataStart, ptr - dataStart); int parameter = std::atoi(parameterString.c_str()); parameterString.erase(); readNextChar = *ptr == ' '; if (keyword == "bin" && parameter > 0) { myBinaryDataSize = parameter; parserState = READ_BINARY_DATA; } else if (keyword == "u") { // TODO: implement commands of form "\ucL\uN" (insert symbol N + skip L bytes) processUnicodeCharacter(parameter); readNextChar &= *ptr != '\\'; parserState = READ_NORMAL_DATA; } else { processKeyword(keyword, ¶meter); parserState = READ_NORMAL_DATA; } dataStart = readNextChar ? ptr + 1 : ptr; } break; } if (readNextChar) { ++ptr; } else { readNextChar = true; } } if (dataStart < end) { switch (parserState) { case READ_NORMAL_DATA: processCharData(dataStart, end - dataStart); case READ_KEYWORD: keyword.append(dataStart, end - dataStart); break; case READ_KEYWORD_PARAMETER: parameterString.append(dataStart, end - dataStart); break; default: break; } } } return myIsInterrupted || myStateStack.empty(); }