bool PmlReader::parseDocument(ZLInputStream &stream) { enum { READ_NORMAL_DATA, READ_TAG, READ_TAG_PARAMETER, } parserState = READ_NORMAL_DATA; size_t tagNameLength = 0; std::string tagName; std::string parameterString; bool startParameterReading = false; size_t tagCounter = 0; static bool FLAG = true; while (!myIsInterrupted) { const char *ptr = myStreamBuffer; const char *end = myStreamBuffer + stream.read(myStreamBuffer, pmlStreamBufferSize); if (ptr == end) { break; } const char *dataStart = ptr; bool readNextChar = true; while (ptr != end) { switch (parserState) { case READ_NORMAL_DATA: if (*ptr == '\n') { if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } newLine(); FLAG = true; dataStart = ptr + 1; } else if (FLAG && isspace(*ptr)) { } else { FLAG = false; if (*ptr == '\\') { if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; tagName.erase(); parserState = READ_TAG; } } break; case READ_TAG: if ((ptr == dataStart) && (tagName.empty())) { if (*ptr == '\\') { processCharData(ptr, 1); dataStart = ptr + 1; parserState = READ_NORMAL_DATA; } else { tagNameLength = findTagLength(ptr); if (tagNameLength == 0) { dataStart = ptr + 1; parserState = READ_NORMAL_DATA; ++tagCounter; } else { --tagNameLength; } } } else { if (tagNameLength == 0) { tagName.append(dataStart, ptr - dataStart); if (*ptr == '=') { dataStart = ptr + 1; parameterString.erase(); parserState = READ_TAG_PARAMETER; ++tagCounter; } else { readNextChar = false; processTag(tagName); dataStart = ptr; parserState = READ_NORMAL_DATA; ++tagCounter; } } else { --tagNameLength; } } break; case READ_TAG_PARAMETER: if (*ptr == '"') { if (!startParameterReading) { startParameterReading = true; dataStart = ptr + 1; } else { parameterString.append(dataStart, ptr - dataStart); processTag(tagName, parameterString); parserState = READ_NORMAL_DATA; dataStart = ptr + 1; startParameterReading = false; } } break; } if (readNextChar) { ++ptr; } else { readNextChar = true; } } if (dataStart < end) { switch (parserState) { case READ_NORMAL_DATA: processCharData(dataStart, end - dataStart); case READ_TAG: tagName.append(dataStart, end - dataStart); break; case READ_TAG_PARAMETER: parameterString.append(dataStart, end - dataStart); break; default: break; } } } return myIsInterrupted; }
bool RtfReader::parseDocument() { enum { READ_NORMAL_DATA, READ_BINARY_DATA, READ_HEX_SYMBOL, READ_KEYWORD, READ_KEYWORD_PARAMETER } parserState = READ_NORMAL_DATA; std::string keyword; std::string parameterString; std::string hexString; int imageStartOffset = -1; while (!myIsInterrupted) { const char *ptr = myStreamBuffer; const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize); if (ptr == end) { break; } const char *dataStart = ptr; bool readNextChar = true; while (ptr != end) { switch (parserState) { case READ_BINARY_DATA: // TODO: optimize processCharData(ptr, 1); --myBinaryDataSize; if (myBinaryDataSize == 0) { parserState = READ_NORMAL_DATA; } break; case READ_NORMAL_DATA: switch (*ptr) { case '{': if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; myStateStack.push(myState); myState.ReadDataAsHex = false; break; case '}': { if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; if (imageStartOffset >= 0) { int imageSize = myStream->offset() + (ptr - end) - imageStartOffset; insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize); imageStartOffset = -1; } if (myStateStack.empty()) { return false; } if (myState.Destination != myStateStack.top().Destination) { switchDestination(myState.Destination, false); switchDestination(myStateStack.top().Destination, true); } bool oldItalic = myState.Italic; bool oldBold = myState.Bold; bool oldUnderlined = myState.Underlined; ZLTextAlignmentType oldAlignment = myState.Alignment; myState = myStateStack.top(); myStateStack.pop(); if (myState.Italic != oldItalic) { setFontProperty(RtfReader::FONT_ITALIC); } if (myState.Bold != oldBold) { setFontProperty(RtfReader::FONT_BOLD); } if (myState.Underlined != oldUnderlined) { setFontProperty(RtfReader::FONT_UNDERLINED); } if (myState.Alignment != oldAlignment) { setAlignment(); } break; } case '\\': if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; keyword.erase(); parserState = READ_KEYWORD; break; case 0x0d: case 0x0a: // cr and lf are noise characters... if (ptr > dataStart) { processCharData(dataStart, ptr - dataStart); } dataStart = ptr + 1; break; default: if (myState.ReadDataAsHex) { if (imageStartOffset == -1) { imageStartOffset = myStream->offset() + (ptr - end); } } break; } break; case READ_HEX_SYMBOL: hexString += *ptr; if (hexString.size() == 2) { char ch = strtol(hexString.c_str(), 0, 16); hexString.erase(); processCharData(&ch, 1); parserState = READ_NORMAL_DATA; dataStart = ptr + 1; } break; case READ_KEYWORD: if (!isalpha(*ptr)) { if ((ptr == dataStart) && (keyword.empty())) { if (*ptr == '\'') { parserState = READ_HEX_SYMBOL; } else { keyword = *ptr; processKeyword(keyword); parserState = READ_NORMAL_DATA; } dataStart = ptr + 1; } else { keyword.append(dataStart, ptr - dataStart); if ((*ptr == '-') || isdigit(*ptr)) { dataStart = ptr; parserState = READ_KEYWORD_PARAMETER; } else { readNextChar = *ptr == ' '; processKeyword(keyword); parserState = READ_NORMAL_DATA; dataStart = readNextChar ? ptr + 1 : ptr; } } } break; case READ_KEYWORD_PARAMETER: if (!isdigit(*ptr)) { parameterString.append(dataStart, ptr - dataStart); int parameter = atoi(parameterString.c_str()); parameterString.erase(); readNextChar = *ptr == ' '; if ((keyword == "bin") && (parameter > 0)) { myBinaryDataSize = parameter; parserState = READ_BINARY_DATA; } else { processKeyword(keyword, ¶meter); parserState = READ_NORMAL_DATA; } dataStart = readNextChar ? ptr + 1 : ptr; } break; } if (readNextChar) { ++ptr; } else { readNextChar = true; } } if (dataStart < end) { switch (parserState) { case READ_NORMAL_DATA: processCharData(dataStart, end - dataStart); case READ_KEYWORD: keyword.append(dataStart, end - dataStart); break; case READ_KEYWORD_PARAMETER: parameterString.append(dataStart, end - dataStart); break; default: break; } } } return myIsInterrupted || myStateStack.empty(); }
void RtfReader::processUnicodeCharacter(int character) { static char buffer[8]; const int len = ZLUnicodeUtil::ucs4ToUtf8(buffer, character); processCharData(buffer, len, false); }