예제 #1
0
bool PmlReader::parseDocument(ZLInputStream &stream) {
	enum {
		READ_NORMAL_DATA,
		READ_TAG,
		READ_TAG_PARAMETER,
	} parserState = READ_NORMAL_DATA;

	size_t tagNameLength = 0;
	std::string tagName;
	std::string parameterString;
	
	bool startParameterReading = false;
	size_t tagCounter = 0;
	static bool FLAG = true;

	while (!myIsInterrupted) {
		const char *ptr = myStreamBuffer;
		const char *end = myStreamBuffer + stream.read(myStreamBuffer, pmlStreamBufferSize);
		if (ptr == end) {
			break;
		}
		const char *dataStart = ptr;
		bool readNextChar = true;
		while (ptr != end) {
			switch (parserState) {
				case READ_NORMAL_DATA:
					if (*ptr == '\n') {
						if (ptr > dataStart) {
							processCharData(dataStart, ptr - dataStart);
						}
						newLine();
						FLAG = true;
						dataStart = ptr + 1;
					} else if (FLAG && isspace(*ptr)) {
					} else {
						FLAG = false;
						if (*ptr == '\\') {
							if (ptr > dataStart) {
								processCharData(dataStart, ptr - dataStart);
							}
							dataStart = ptr + 1;
							tagName.erase();
							parserState = READ_TAG;
						}
					}
					break;
				case READ_TAG:
					if ((ptr == dataStart) && (tagName.empty())) {
						if (*ptr == '\\') {
							processCharData(ptr, 1);
							dataStart = ptr + 1;
							parserState = READ_NORMAL_DATA;
						} else {
							tagNameLength = findTagLength(ptr);
							if (tagNameLength == 0) {
								dataStart = ptr + 1;
								parserState = READ_NORMAL_DATA;
								++tagCounter;
							} else {
								--tagNameLength;
							}
						}
					} else {
						if (tagNameLength == 0) {
							tagName.append(dataStart, ptr - dataStart);
							if (*ptr == '=') {
								dataStart = ptr + 1; 
								parameterString.erase();
								parserState = READ_TAG_PARAMETER;
								++tagCounter;
							} else {
								readNextChar = false;
								processTag(tagName);
								dataStart = ptr;
								parserState = READ_NORMAL_DATA;
								++tagCounter;
							}
						} else {
							--tagNameLength;
						}
					}
					break;
				case READ_TAG_PARAMETER:
					if (*ptr == '"') { 
						if (!startParameterReading) {
							startParameterReading = true;
							dataStart = ptr + 1;  
						} else {
							parameterString.append(dataStart, ptr - dataStart);
							processTag(tagName, parameterString);
							parserState = READ_NORMAL_DATA;
							dataStart =  ptr + 1;
							startParameterReading = false;
						}
					}
					break;
			}
			if (readNextChar) {
				++ptr;
			} else {
				readNextChar = true;
			}
		}
		if (dataStart < end) {
			switch (parserState) {
				case READ_NORMAL_DATA:
					processCharData(dataStart, end - dataStart);
				case READ_TAG:
					tagName.append(dataStart, end - dataStart);
					break;
				case READ_TAG_PARAMETER:
					parameterString.append(dataStart, end - dataStart);
					break;
				default:
					break;
			}
		}
	}
	return myIsInterrupted;
}
예제 #2
0
bool RtfReader::parseDocument() {
    enum {
        READ_NORMAL_DATA,
        READ_BINARY_DATA,
        READ_HEX_SYMBOL,
        READ_KEYWORD,
        READ_KEYWORD_PARAMETER
    } parserState = READ_NORMAL_DATA;

    std::string keyword;
    std::string parameterString;
    std::string hexString;
    int imageStartOffset = -1;

    while (!myIsInterrupted) {
        const char *ptr = myStreamBuffer;
        const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize);
        if (ptr == end) {
            break;
        }
        const char *dataStart = ptr;
        bool readNextChar = true;
        while (ptr != end) {
            switch (parserState) {
            case READ_BINARY_DATA:
                // TODO: optimize
                processCharData(ptr, 1);
                --myBinaryDataSize;
                if (myBinaryDataSize == 0) {
                    parserState = READ_NORMAL_DATA;
                }
                break;
            case READ_NORMAL_DATA:
                switch (*ptr) {
                case '{':
                    if (ptr > dataStart) {
                        processCharData(dataStart, ptr - dataStart);
                    }
                    dataStart = ptr + 1;
                    myStateStack.push(myState);
                    myState.ReadDataAsHex = false;
                    break;
                case '}':
                {
                    if (ptr > dataStart) {
                        processCharData(dataStart, ptr - dataStart);
                    }
                    dataStart = ptr + 1;

                    if (imageStartOffset >= 0) {
                        int imageSize = myStream->offset() + (ptr - end) - imageStartOffset;
                        insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize);
                        imageStartOffset = -1;
                    }

                    if (myStateStack.empty()) {
                        return false;
                    }

                    if (myState.Destination != myStateStack.top().Destination) {
                        switchDestination(myState.Destination, false);
                        switchDestination(myStateStack.top().Destination, true);
                    }

                    bool oldItalic = myState.Italic;
                    bool oldBold = myState.Bold;
                    bool oldUnderlined = myState.Underlined;
                    ZLTextAlignmentType oldAlignment = myState.Alignment;
                    myState = myStateStack.top();
                    myStateStack.pop();

                    if (myState.Italic != oldItalic) {
                        setFontProperty(RtfReader::FONT_ITALIC);
                    }
                    if (myState.Bold != oldBold) {
                        setFontProperty(RtfReader::FONT_BOLD);
                    }
                    if (myState.Underlined != oldUnderlined) {
                        setFontProperty(RtfReader::FONT_UNDERLINED);
                    }
                    if (myState.Alignment != oldAlignment) {
                        setAlignment();
                    }

                    break;
                }
                case '\\':
                    if (ptr > dataStart) {
                        processCharData(dataStart, ptr - dataStart);
                    }
                    dataStart = ptr + 1;
                    keyword.erase();
                    parserState = READ_KEYWORD;
                    break;
                case 0x0d:
                case 0x0a:			// cr and lf are noise characters...
                    if (ptr > dataStart) {
                        processCharData(dataStart, ptr - dataStart);
                    }
                    dataStart = ptr + 1;
                    break;
                default:
                    if (myState.ReadDataAsHex) {
                        if (imageStartOffset == -1) {
                            imageStartOffset = myStream->offset() + (ptr - end);
                        }
                    }
                    break;
                }
                break;
            case READ_HEX_SYMBOL:
                hexString += *ptr;
                if (hexString.size() == 2) {
                    char ch = strtol(hexString.c_str(), 0, 16);
                    hexString.erase();
                    processCharData(&ch, 1);
                    parserState = READ_NORMAL_DATA;
                    dataStart = ptr + 1;
                }
                break;
            case READ_KEYWORD:
                if (!isalpha(*ptr)) {
                    if ((ptr == dataStart) && (keyword.empty())) {
                        if (*ptr == '\'') {
                            parserState = READ_HEX_SYMBOL;
                        } else {
                            keyword = *ptr;
                            processKeyword(keyword);
                            parserState = READ_NORMAL_DATA;
                        }
                        dataStart = ptr + 1;
                    } else {
                        keyword.append(dataStart, ptr - dataStart);
                        if ((*ptr == '-') || isdigit(*ptr)) {
                            dataStart = ptr;
                            parserState = READ_KEYWORD_PARAMETER;
                        } else {
                            readNextChar = *ptr == ' ';
                            processKeyword(keyword);
                            parserState = READ_NORMAL_DATA;
                            dataStart = readNextChar ? ptr + 1 : ptr;
                        }
                    }
                }
                break;
            case READ_KEYWORD_PARAMETER:
                if (!isdigit(*ptr)) {
                    parameterString.append(dataStart, ptr - dataStart);
                    int parameter = atoi(parameterString.c_str());
                    parameterString.erase();
                    readNextChar = *ptr == ' ';
                    if ((keyword == "bin") && (parameter > 0)) {
                        myBinaryDataSize = parameter;
                        parserState = READ_BINARY_DATA;
                    } else {
                        processKeyword(keyword, &parameter);
                        parserState = READ_NORMAL_DATA;
                    }
                    dataStart = readNextChar ? ptr + 1 : ptr;
                }
                break;
            }
            if (readNextChar) {
                ++ptr;
            } else {
                readNextChar = true;
            }
        }
        if (dataStart < end) {
            switch (parserState) {
            case READ_NORMAL_DATA:
                processCharData(dataStart, end - dataStart);
            case READ_KEYWORD:
                keyword.append(dataStart, end - dataStart);
                break;
            case READ_KEYWORD_PARAMETER:
                parameterString.append(dataStart, end - dataStart);
                break;
            default:
                break;
            }
        }
    }

    return myIsInterrupted || myStateStack.empty();
}
예제 #3
0
void RtfReader::processUnicodeCharacter(int character) {
	static char buffer[8];
	const int len = ZLUnicodeUtil::ucs4ToUtf8(buffer, character);
	processCharData(buffer, len, false);
}