void JsonStreamingParser::parse(char c) {
    //System.out.print(c);
    // valid whitespace characters in JSON (from RFC4627 for JSON) include:
    // space, horizontal tab, line feed or new line, and carriage return.
    // thanks:
    // http://stackoverflow.com/questions/16042274/definition-of-whitespace-in-json
    if ((c == ' ' || c == '\t' || c == '\n' || c == '\r')
        && !(state == STATE_IN_STRING || state == STATE_UNICODE || state == STATE_START_ESCAPE
            || state == STATE_IN_NUMBER || state == STATE_DONE)) {
      return;
    }
    switch (state) {
    case STATE_IN_STRING:
      if (c == '"') {
        endString();
      } else if (c == '\\') {
        state = STATE_START_ESCAPE;
      } else if ((c < 0x1f) || (c == 0x7f)) {
        //throw new RuntimeException("Unescaped control character encountered: " + c + " at position" + characterCounter);
      } else {
        buffer[bufferPos] = c;
        bufferPos++;
      }
      break;
    case STATE_IN_ARRAY:
      if (c == ']') {
        endArray();
      } else {
        startValue(c);
      }
      break;
    case STATE_IN_OBJECT:
      if (c == '}') {
        endObject();
      } else if (c == '"') {
        startKey();
      } else {
        //throw new RuntimeException("Start of string expected for object key. Instead got: " + c + " at position" + characterCounter);
      }
      break;
    case STATE_END_KEY:
      if (c != ':') {
        //throw new RuntimeException("Expected ':' after key. Instead got " + c + " at position" + characterCounter);
      }
      state = STATE_AFTER_KEY;
      break;
    case STATE_AFTER_KEY:
      startValue(c);
      break;
    case STATE_START_ESCAPE:
      processEscapeCharacters(c);
      break;
    case STATE_UNICODE:
      processUnicodeCharacter(c);
      break;
    case STATE_UNICODE_SURROGATE:
      unicodeEscapeBuffer[unicodeEscapeBufferPos] = c;
      unicodeEscapeBufferPos++;
      if (unicodeEscapeBufferPos == 2) {
        endUnicodeSurrogateInterstitial();
      }
      break;
    case STATE_AFTER_VALUE: {
      // not safe for size == 0!!!
      int within = stack[stackPos - 1];
      if (within == STACK_OBJECT) {
        if (c == '}') {
          endObject();
        } else if (c == ',') {
          state = STATE_IN_OBJECT;
        } else {
          //throw new RuntimeException("Expected ',' or '}' while parsing object. Got: " + c + ". " + characterCounter);
        }
      } else if (within == STACK_ARRAY) {
        if (c == ']') {
          endArray();
        } else if (c == ',') {
          state = STATE_IN_ARRAY;
        } else {
          //throw new RuntimeException("Expected ',' or ']' while parsing array. Got: " + c + ". " + characterCounter);

        }
      } else {
        //throw new RuntimeException("Finished a literal, but unclear what state to move to. Last state: " + characterCounter);
      }
    }break;
    case STATE_IN_NUMBER:
      if (c >= '0' && c <= '9') {
        buffer[bufferPos] = c;
        bufferPos++;
      } else if (c == '.') {
        if (doesCharArrayContain(buffer, bufferPos, '.')) {
          //throw new RuntimeException("Cannot have multiple decimal points in a number. " + characterCounter);
        } else if (doesCharArrayContain(buffer, bufferPos, 'e')) {
          //throw new RuntimeException("Cannot have a decimal point in an exponent." + characterCounter);
        }
        buffer[bufferPos] = c;
        bufferPos++;
      } else if (c == 'e' || c == 'E') {
        if (doesCharArrayContain(buffer, bufferPos, 'e')) {
          //throw new RuntimeException("Cannot have multiple exponents in a number. " + characterCounter);
        }
        buffer[bufferPos] = c;
        bufferPos++;
      } else if (c == '+' || c == '-') {
        char last = buffer[bufferPos - 1];
        if (!(last == 'e' || last == 'E')) {
          //throw new RuntimeException("Can only have '+' or '-' after the 'e' or 'E' in a number." + characterCounter);
        }
        buffer[bufferPos] = c;
        bufferPos++;
      } else {
        endNumber();
        // we have consumed one beyond the end of the number
        parse(c);
      }
      break;
    case STATE_IN_TRUE:
      buffer[bufferPos] = c;
      bufferPos++;
      if (bufferPos == 4) {
        endTrue();
      }
      break;
    case STATE_IN_FALSE:
      buffer[bufferPos] = c;
      bufferPos++;
      if (bufferPos == 5) {
        endFalse();
      }
      break;
    case STATE_IN_NULL:
      buffer[bufferPos] = c;
      bufferPos++;
      if (bufferPos == 4) {
        endNull();
      }
      break;
    case STATE_DONE:
      myListener->startDocument();
      if (c == '[') {
        startArray();
      } else if (c == '{') {
        startObject();
      } else {
        // throw new ParsingError($this->_line_number,
        // $this->_char_number,
        // "Document must start with object or array.");
      }
      break;
    //case STATE_DONE:
      // throw new ParsingError($this->_line_number, $this->_char_number,
      // "Expected end of document.");
    //default:
      // throw new ParsingError($this->_line_number, $this->_char_number,
      // "Internal error. Reached an unknown state: ".$this->_state);
    }
    characterCounter++;
  }
Пример #2
0
bool RtfReader::parseDocument() {
	enum {
		READ_NORMAL_DATA,
		READ_BINARY_DATA,
		READ_HEX_SYMBOL,
		READ_KEYWORD,
		READ_KEYWORD_PARAMETER,
		READ_END_OF_FILE
	} parserState = READ_NORMAL_DATA;

	std::string keyword;
	std::string parameterString;
	std::string hexString;
	int imageStartOffset = -1;

	while (!myIsInterrupted) {
		const char *ptr = myStreamBuffer;
		const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize);
		if (ptr == end) {
			break;
		}
		const char *dataStart = ptr;
		bool readNextChar = true;
		while (ptr != end) {
			switch (parserState) {
				case READ_END_OF_FILE:
					if (*ptr != '}' && !std::isspace(*ptr)) {
						return false;
					}
					break;
				case READ_BINARY_DATA:
					// TODO: optimize
					processCharData(ptr, 1);
					--myBinaryDataSize;
					if (myBinaryDataSize == 0) {
						parserState = READ_NORMAL_DATA;
					}
					break;
				case READ_NORMAL_DATA:
					switch (*ptr) {
						case '{':
							if (ptr > dataStart) {
								processCharData(dataStart, ptr - dataStart);
							}
							dataStart = ptr + 1;
							myStateStack.push(myState);
							myState.ReadDataAsHex = false;
							break;
						case '}':
						{
							if (ptr > dataStart) {
								processCharData(dataStart, ptr - dataStart);
							}
							dataStart = ptr + 1;

							if (imageStartOffset >= 0) {
								if (!myNextImageMimeType.empty()) {
									const int imageSize = myStream->offset() + (ptr - end) - imageStartOffset;
									insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize);
								}
								imageStartOffset = -1;
							}

							if (myStateStack.empty()) {
								parserState = READ_END_OF_FILE;
								break;
							}

							if (myState.Destination != myStateStack.top().Destination) {
								switchDestination(myState.Destination, false);
								switchDestination(myStateStack.top().Destination, true);
							}

							bool oldItalic = myState.Italic;
							bool oldBold = myState.Bold;
							bool oldUnderlined = myState.Underlined;
							ZLTextAlignmentType oldAlignment = myState.Alignment;
							myState = myStateStack.top();
							myStateStack.pop();

							if (myState.Italic != oldItalic) {
								setFontProperty(RtfReader::FONT_ITALIC);
							}
							if (myState.Bold != oldBold) {
								setFontProperty(RtfReader::FONT_BOLD);
							}
							if (myState.Underlined != oldUnderlined) {
								setFontProperty(RtfReader::FONT_UNDERLINED);
							}
							if (myState.Alignment != oldAlignment) {
								setAlignment();
							}

							break;
						}
						case '\\':
							if (ptr > dataStart) {
								processCharData(dataStart, ptr - dataStart);
							}
							dataStart = ptr + 1;
							keyword.erase();
							parserState = READ_KEYWORD;
							break;
						case 0x0d:
						case 0x0a:			// cr and lf are noise characters...
							if (ptr > dataStart) {
								processCharData(dataStart, ptr - dataStart);
							}
							dataStart = ptr + 1;
							break;
						default:
							if (myState.ReadDataAsHex) {
								if (imageStartOffset == -1) {
									imageStartOffset = myStream->offset() + (ptr - end);
								}
							}
							break;
					}
					break;
				case READ_HEX_SYMBOL:
					hexString += *ptr;
					if (hexString.size() == 2) {
						char ch = std::strtol(hexString.c_str(), 0, 16);
						hexString.erase();
						processCharData(&ch, 1);
						parserState = READ_NORMAL_DATA;
						dataStart = ptr + 1;
					}
					break;
				case READ_KEYWORD:
					if (!std::isalpha(*ptr)) {
						if (ptr == dataStart && keyword.empty()) {
							if (*ptr == '\'') {
								parserState = READ_HEX_SYMBOL;
							} else {
								keyword = *ptr;
								processKeyword(keyword);
								parserState = READ_NORMAL_DATA;
							}
							dataStart = ptr + 1;
						} else {
							keyword.append(dataStart, ptr - dataStart);
							if (*ptr == '-' || std::isdigit(*ptr)) {
								dataStart = ptr;
								parserState = READ_KEYWORD_PARAMETER;
							} else {
								readNextChar = *ptr == ' ';
								processKeyword(keyword);
								parserState = READ_NORMAL_DATA;
								dataStart = readNextChar ? ptr + 1 : ptr;
							}
						}
					}
					break;
				case READ_KEYWORD_PARAMETER:
					if (!std::isdigit(*ptr)) {
						parameterString.append(dataStart, ptr - dataStart);
						int parameter = std::atoi(parameterString.c_str());
						parameterString.erase();
						readNextChar = *ptr == ' ';
						if (keyword == "bin" && parameter > 0) {
							myBinaryDataSize = parameter;
							parserState = READ_BINARY_DATA;
						} else if (keyword == "u") {
							// TODO: implement commands of form "\ucL\uN" (insert symbol N + skip L bytes)
							processUnicodeCharacter(parameter);
							readNextChar &= *ptr != '\\';
							parserState = READ_NORMAL_DATA;
						} else {
							processKeyword(keyword, &parameter);
							parserState = READ_NORMAL_DATA;
						}
						dataStart = readNextChar ? ptr + 1 : ptr;
					}
					break;
			}
			if (readNextChar) {
				++ptr;
			} else {
				readNextChar = true;
			}
		}
		if (dataStart < end) {
			switch (parserState) {
				case READ_NORMAL_DATA:
					processCharData(dataStart, end - dataStart);
				case READ_KEYWORD:
					keyword.append(dataStart, end - dataStart);
					break;
				case READ_KEYWORD_PARAMETER:
					parameterString.append(dataStart, end - dataStart);
					break;
				default:
					break;
			}
		}
	}

	return myIsInterrupted || myStateStack.empty();
}