ByteString CPDF_StreamParser::ReadHexString() { if (!PositionIsInBounds()) return ByteString(); std::ostringstream buf; bool bFirst = true; int code = 0; while (PositionIsInBounds()) { int ch = m_pBuf[m_Pos++]; if (ch == '>') break; if (!std::isxdigit(ch)) continue; int val = FXSYS_HexCharToInt(ch); if (bFirst) { code = val * 16; } else { code += val; buf << static_cast<uint8_t>(code); } bFirst = !bFirst; } if (!bFirst) buf << static_cast<char>(code); if (buf.tellp() <= 0) return ByteString(); return ByteString( buf.str().c_str(), std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); }
CFX_ByteString CPDF_StreamParser::ReadHexString() { if (!PositionIsInBounds()) return CFX_ByteString(); CFX_ByteTextBuf buf; bool bFirst = true; int code = 0; while (PositionIsInBounds()) { int ch = m_pBuf[m_Pos++]; if (ch == '>') break; if (!std::isxdigit(ch)) continue; int val = FXSYS_toHexDigit(ch); if (bFirst) { code = val * 16; } else { code += val; buf.AppendByte((uint8_t)code); } bFirst = !bFirst; } if (!bFirst) buf.AppendChar((char)code); if (buf.GetLength() > MAX_STRING_LENGTH) return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); return buf.GetByteString(); }
CFX_ByteString CPDF_StreamParser::ReadHexString() { if (!PositionIsInBounds()) return CFX_ByteString(); int ch = m_pBuf[m_Pos++]; CFX_ByteTextBuf buf; FX_BOOL bFirst = TRUE; int code = 0; while (1) { if (ch == '>') { break; } if (ch >= '0' && ch <= '9') { if (bFirst) { code = (ch - '0') * 16; } else { code += ch - '0'; buf.AppendChar((char)code); } bFirst = !bFirst; } else if (ch >= 'A' && ch <= 'F') { if (bFirst) { code = (ch - 'A' + 10) * 16; } else { code += ch - 'A' + 10; buf.AppendChar((char)code); } bFirst = !bFirst; } else if (ch >= 'a' && ch <= 'f') { if (bFirst) { code = (ch - 'a' + 10) * 16; } else { code += ch - 'a' + 10; buf.AppendChar((char)code); } bFirst = !bFirst; } if (!PositionIsInBounds()) break; ch = m_pBuf[m_Pos++]; } if (!bFirst) { buf.AppendChar((char)code); } if (buf.GetLength() > MAX_STRING_LENGTH) { return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); } return buf.GetByteString(); }
CFX_ByteString CPDF_StreamParser::ReadString() { if (!PositionIsInBounds()) return CFX_ByteString(); int ch = m_pBuf[m_Pos++]; CFX_ByteTextBuf buf; int parlevel = 0; int status = 0, iEscCode = 0; while (1) { switch (status) { case 0: if (ch == ')') { if (parlevel == 0) { if (buf.GetLength() > MAX_STRING_LENGTH) { return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); } return buf.GetByteString(); } parlevel--; buf.AppendChar(')'); } else if (ch == '(') { parlevel++; buf.AppendChar('('); } else if (ch == '\\') { status = 1; } else { buf.AppendChar((char)ch); } break; case 1: if (ch >= '0' && ch <= '7') { iEscCode = ch - '0'; status = 2; break; } if (ch == 'n') { buf.AppendChar('\n'); } else if (ch == 'r') { buf.AppendChar('\r'); } else if (ch == 't') { buf.AppendChar('\t'); } else if (ch == 'b') { buf.AppendChar('\b'); } else if (ch == 'f') { buf.AppendChar('\f'); } else if (ch == '\r') { status = 4; break; } else if (ch == '\n') { } else { buf.AppendChar(ch); } status = 0; break; case 2: if (ch >= '0' && ch <= '7') { iEscCode = iEscCode * 8 + ch - '0'; status = 3; } else { buf.AppendChar(iEscCode); status = 0; continue; } break; case 3: if (ch >= '0' && ch <= '7') { iEscCode = iEscCode * 8 + ch - '0'; buf.AppendChar(iEscCode); status = 0; } else { buf.AppendChar(iEscCode); status = 0; continue; } break; case 4: status = 0; if (ch != '\n') { continue; } break; } if (!PositionIsInBounds()) break; ch = m_pBuf[m_Pos++]; } if (PositionIsInBounds()) ch = m_pBuf[m_Pos++]; if (buf.GetLength() > MAX_STRING_LENGTH) { return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); } return buf.GetByteString(); }
void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) { m_WordSize = 0; bIsNumber = TRUE; if (!PositionIsInBounds()) return; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) { return; } ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch)) { bIsNumber = FALSE; m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; return; } if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; } } else if (ch == '<') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '<') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } else if (ch == '>') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '>') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } return; } while (1) { if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = FALSE; if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } }
void CPDF_StreamParser::SkipPathObject() { FX_DWORD command_startpos = m_Pos; if (!PositionIsInBounds()) return; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } if (!PDFCharIsNumeric(ch)) { m_Pos = command_startpos; return; } while (1) { while (!PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } if (PDFCharIsNumeric(ch)) continue; FX_DWORD op_startpos = m_Pos - 1; while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } if (m_Pos - op_startpos == 2) { int op = m_pBuf[op_startpos]; if (op == kPathOperatorSubpath || op == kPathOperatorLine || op == kPathOperatorCubicBezier1 || op == kPathOperatorCubicBezier2 || op == kPathOperatorCubicBezier3) { command_startpos = m_Pos; break; } } else if (m_Pos - op_startpos == 3) { if (m_pBuf[op_startpos] == kPathOperatorRectangle[0] && m_pBuf[op_startpos + 1] == kPathOperatorRectangle[1]) { command_startpos = m_Pos; break; } } m_Pos = command_startpos; return; } } }
CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { if (m_pLastObj) { m_pLastObj->Release(); m_pLastObj = nullptr; } m_WordSize = 0; FX_BOOL bIsNumber = TRUE; if (!PositionIsInBounds()) return EndOfData; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch) && ch != '/') { m_Pos--; m_pLastObj = ReadNextObject(); return Others; } while (1) { if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = FALSE; if (!PositionIsInBounds()) break; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } m_WordBuffer[m_WordSize] = 0; if (bIsNumber) return Number; if (m_WordBuffer[0] == '/') return Name; if (m_WordSize == 4) { if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { m_pLastObj = CPDF_Boolean::Create(TRUE); return Others; } if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { m_pLastObj = CPDF_Null::Create(); return Others; } } else if (m_WordSize == 5) { if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { m_pLastObj = CPDF_Boolean::Create(FALSE); return Others; } } return Keyword; }
ByteString CPDF_StreamParser::ReadString() { if (!PositionIsInBounds()) return ByteString(); uint8_t ch = m_pBuf[m_Pos++]; std::ostringstream buf; int parlevel = 0; int status = 0; int iEscCode = 0; while (1) { switch (status) { case 0: if (ch == ')') { if (parlevel == 0) { if (buf.tellp() <= 0) return ByteString(); return ByteString( buf.str().c_str(), std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); } parlevel--; buf << ')'; } else if (ch == '(') { parlevel++; buf << '('; } else if (ch == '\\') { status = 1; } else { buf << static_cast<char>(ch); } break; case 1: if (FXSYS_IsOctalDigit(ch)) { iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch)); status = 2; break; } if (ch == '\r') { status = 4; break; } if (ch == '\n') { // Do nothing. } else if (ch == 'n') { buf << '\n'; } else if (ch == 'r') { buf << '\r'; } else if (ch == 't') { buf << '\t'; } else if (ch == 'b') { buf << '\b'; } else if (ch == 'f') { buf << '\f'; } else { buf << static_cast<char>(ch); } status = 0; break; case 2: if (FXSYS_IsOctalDigit(ch)) { iEscCode = iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch)); status = 3; } else { buf << static_cast<char>(iEscCode); status = 0; continue; } break; case 3: if (FXSYS_IsOctalDigit(ch)) { iEscCode = iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch)); buf << static_cast<char>(iEscCode); status = 0; } else { buf << static_cast<char>(iEscCode); status = 0; continue; } break; case 4: status = 0; if (ch != '\n') continue; break; } if (!PositionIsInBounds()) break; ch = m_pBuf[m_Pos++]; } if (PositionIsInBounds()) ++m_Pos; if (buf.tellp() <= 0) return ByteString(); return ByteString( buf.str().c_str(), std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); }
// TODO(npm): the following methods are almost identical in cpdf_syntaxparser void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { m_WordSize = 0; bIsNumber = true; if (!PositionIsInBounds()) return; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) { return; } ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch)) { bIsNumber = false; m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; return; } if (m_WordSize < kMaxWordLength) m_WordBuffer[m_WordSize++] = ch; } } else if (ch == '<') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '<') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } else if (ch == '>') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '>') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } return; } while (1) { if (m_WordSize < kMaxWordLength) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = false; if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } }
CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { m_pLastObj.reset(); m_WordSize = 0; if (!PositionIsInBounds()) return EndOfData; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch) && ch != '/') { m_Pos--; m_pLastObj = ReadNextObject(false, false, 0); return Others; } bool bIsNumber = true; while (1) { if (m_WordSize < kMaxWordLength) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = false; if (!PositionIsInBounds()) break; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } m_WordBuffer[m_WordSize] = 0; if (bIsNumber) return Number; if (m_WordBuffer[0] == '/') return Name; if (m_WordSize == 4) { if (memcmp(m_WordBuffer, "true", 4) == 0) { m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true); return Others; } if (memcmp(m_WordBuffer, "null", 4) == 0) { m_pLastObj = pdfium::MakeUnique<CPDF_Null>(); return Others; } } else if (m_WordSize == 5) { if (memcmp(m_WordBuffer, "false", 5) == 0) { m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false); return Others; } } return Keyword; }