CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { uint8_t* src_buf = (uint8_t*)orig.c_str(); int src_len = orig.GetLength(); int dest_len = 0; int i; for (i = 0; i < src_len; i++) { uint8_t ch = src_buf[i]; if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || PDFCharIsDelimiter(ch)) { dest_len += 3; } else { dest_len++; } } if (dest_len == src_len) return orig; CFX_ByteString res; FX_CHAR* dest_buf = res.GetBuffer(dest_len); dest_len = 0; for (i = 0; i < src_len; i++) { uint8_t ch = src_buf[i]; if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || PDFCharIsDelimiter(ch)) { dest_buf[dest_len++] = '#'; dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; } else { dest_buf[dest_len++] = ch; } } dest_buf[dest_len] = 0; res.ReleaseBuffer(); return res; }
bool CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { uint8_t ch; if (!GetNextChar(ch)) return false; while (1) { while (PDFCharIsWhitespace(ch)) { if (!GetNextChar(ch)) return false; } if (ch != '%') break; while (1) { if (!GetNextChar(ch)) return false; if (PDFCharIsLineEnding(ch)) break; } } uint8_t buffer[256]; uint32_t index = 0; if (PDFCharIsDelimiter(ch)) { buffer[index++] = ch; if (ch == '/') { while (1) { if (!GetNextChar(ch)) return false; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; CFX_ByteString ret(buffer, index); token = ret; return true; } if (index < sizeof(buffer)) buffer[index++] = ch; } } else if (ch == '<') { if (!GetNextChar(ch)) return false; if (ch == '<') buffer[index++] = ch; else m_Pos--; } else if (ch == '>') { if (!GetNextChar(ch)) return false; if (ch == '>') buffer[index++] = ch; else m_Pos--; } CFX_ByteString ret(buffer, index); token = ret; return true; } while (1) { if (index < sizeof(buffer)) buffer[index++] = ch; if (!GetNextChar(ch)) return false; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } token = CFX_ByteString(buffer, index); return true; }
void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) { m_WordSize = 0; bIsNumber = TRUE; if (!PositionIsInBounds()) return; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) { return; } ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch)) { bIsNumber = FALSE; m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; return; } if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; } } else if (ch == '<') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '<') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } else if (ch == '>') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '>') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } return; } while (1) { if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = FALSE; if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } }
void CPDF_StreamParser::SkipPathObject() { FX_DWORD command_startpos = m_Pos; if (!PositionIsInBounds()) return; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } if (!PDFCharIsNumeric(ch)) { m_Pos = command_startpos; return; } while (1) { while (!PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } if (PDFCharIsNumeric(ch)) continue; FX_DWORD op_startpos = m_Pos - 1; while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; } if (m_Pos - op_startpos == 2) { int op = m_pBuf[op_startpos]; if (op == kPathOperatorSubpath || op == kPathOperatorLine || op == kPathOperatorCubicBezier1 || op == kPathOperatorCubicBezier2 || op == kPathOperatorCubicBezier3) { command_startpos = m_Pos; break; } } else if (m_Pos - op_startpos == 3) { if (m_pBuf[op_startpos] == kPathOperatorRectangle[0] && m_pBuf[op_startpos + 1] == kPathOperatorRectangle[1]) { command_startpos = m_Pos; break; } } m_Pos = command_startpos; return; } } }
CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { if (m_pLastObj) { m_pLastObj->Release(); m_pLastObj = nullptr; } m_WordSize = 0; FX_BOOL bIsNumber = TRUE; if (!PositionIsInBounds()) return EndOfData; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch) && ch != '/') { m_Pos--; m_pLastObj = ReadNextObject(); return Others; } while (1) { if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = FALSE; if (!PositionIsInBounds()) break; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } m_WordBuffer[m_WordSize] = 0; if (bIsNumber) return Number; if (m_WordBuffer[0] == '/') return Name; if (m_WordSize == 4) { if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { m_pLastObj = CPDF_Boolean::Create(TRUE); return Others; } if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { m_pLastObj = CPDF_Null::Create(); return Others; } } else if (m_WordSize == 5) { if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { m_pLastObj = CPDF_Boolean::Create(FALSE); return Others; } } return Keyword; }
CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode) { if (m_Pos == m_Size) return nullptr; if (PDFCharIsWhitespace(m_pBuf[m_Pos])) m_Pos++; CFX_ByteString Decoder; CPDF_Dictionary* pParam = nullptr; CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter")); if (pFilter) { if (CPDF_Array* pArray = pFilter->AsArray()) { Decoder = pArray->GetString(0); CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms")); if (pParams) pParam = pParams->GetDict(0); } else { Decoder = pFilter->GetString(); pParam = pDict->GetDict(FX_BSTRC("DecodeParms")); } } FX_DWORD width = pDict->GetInteger(FX_BSTRC("Width")); FX_DWORD height = pDict->GetInteger(FX_BSTRC("Height")); FX_DWORD OrigSize = 0; if (pCSObj != NULL) { FX_DWORD bpc = pDict->GetInteger(FX_BSTRC("BitsPerComponent")); FX_DWORD nComponents = 1; CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); if (pCS == NULL) { nComponents = 3; } else { nComponents = pCS->CountComponents(); pDoc->GetPageData()->ReleaseColorSpace(pCSObj); } FX_DWORD pitch = width; if (bpc && pitch > INT_MAX / bpc) { return NULL; } pitch *= bpc; if (nComponents && pitch > INT_MAX / nComponents) { return NULL; } pitch *= nComponents; if (pitch > INT_MAX - 7) { return NULL; } pitch += 7; pitch /= 8; OrigSize = pitch; } else { if (width > INT_MAX - 7) { return NULL; } OrigSize = ((width + 7) / 8); } if (height && OrigSize > INT_MAX / height) { return NULL; } OrigSize *= height; uint8_t* pData = NULL; FX_DWORD dwStreamSize; if (Decoder.IsEmpty()) { if (OrigSize > m_Size - m_Pos) { OrigSize = m_Size - m_Pos; } pData = FX_Alloc(uint8_t, OrigSize); FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize); dwStreamSize = OrigSize; m_Pos += OrigSize; } else { FX_DWORD dwDestSize = OrigSize; dwStreamSize = PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, Decoder, pParam, pData, dwDestSize); if ((int)dwStreamSize < 0) { FX_Free(pData); return NULL; } if (bDecode) { m_Pos += dwStreamSize; dwStreamSize = dwDestSize; if (CPDF_Array* pArray = pFilter->AsArray()) { pArray->RemoveAt(0); CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms")); if (pParams) pParams->RemoveAt(0); } else { pDict->RemoveAt(FX_BSTRC("Filter")); pDict->RemoveAt(FX_BSTRC("DecodeParms")); } } else { FX_Free(pData); FX_DWORD dwSavePos = m_Pos; m_Pos += dwStreamSize; while (1) { FX_DWORD dwPrevPos = m_Pos; CPDF_StreamParser::SyntaxType type = ParseNextElement(); if (type == CPDF_StreamParser::EndOfData) { break; } if (type != CPDF_StreamParser::Keyword) { dwStreamSize += m_Pos - dwPrevPos; continue; } if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' && GetWordBuf()[1] == 'I') { m_Pos = dwPrevPos; break; } dwStreamSize += m_Pos - dwPrevPos; } m_Pos = dwSavePos; pData = FX_Alloc(uint8_t, dwStreamSize); FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize); m_Pos += dwStreamSize; } } pDict->SetAtInteger(FX_BSTRC("Length"), (int)dwStreamSize); return new CPDF_Stream(pData, dwStreamSize, pDict); }
void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize, int& type) { pStart = NULL; dwSize = 0; type = PDFWORD_EOF; uint8_t ch; while (1) { if (m_dwSize <= m_dwCurPos) return; ch = m_pData[m_dwCurPos++]; while (PDFCharIsWhitespace(ch)) { if (m_dwSize <= m_dwCurPos) return; ch = m_pData[m_dwCurPos++]; } if (ch != '%') break; while (1) { if (m_dwSize <= m_dwCurPos) return; ch = m_pData[m_dwCurPos++]; if (ch == '\r' || ch == '\n') break; } } FX_DWORD start_pos = m_dwCurPos - 1; pStart = m_pData + start_pos; if (PDFCharIsDelimiter(ch)) { if (ch == '/') { while (1) { if (m_dwSize <= m_dwCurPos) return; ch = m_pData[m_dwCurPos++]; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_dwCurPos--; dwSize = m_dwCurPos - start_pos; type = PDFWORD_NAME; return; } } } else { type = PDFWORD_DELIMITER; dwSize = 1; if (ch == '<') { if (m_dwSize <= m_dwCurPos) return; ch = m_pData[m_dwCurPos++]; if (ch == '<') dwSize = 2; else m_dwCurPos--; } else if (ch == '>') { if (m_dwSize <= m_dwCurPos) return; ch = m_pData[m_dwCurPos++]; if (ch == '>') dwSize = 2; else m_dwCurPos--; } } return; } type = PDFWORD_NUMBER; dwSize = 1; while (1) { if (!PDFCharIsNumeric(ch)) type = PDFWORD_TEXT; if (m_dwSize <= m_dwCurPos) return; ch = m_pData[m_dwCurPos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_dwCurPos--; break; } dwSize++; } }
// TODO(npm): the following methods are almost identical in cpdf_syntaxparser void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { m_WordSize = 0; bIsNumber = true; if (!PositionIsInBounds()) return; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) { return; } ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch)) { bIsNumber = false; m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; return; } if (m_WordSize < kMaxWordLength) m_WordBuffer[m_WordSize++] = ch; } } else if (ch == '<') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '<') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } else if (ch == '>') { if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (ch == '>') m_WordBuffer[m_WordSize++] = ch; else m_Pos--; } return; } while (1) { if (m_WordSize < kMaxWordLength) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = false; if (!PositionIsInBounds()) return; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } }
CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { m_pLastObj.reset(); m_WordSize = 0; if (!PositionIsInBounds()) return EndOfData; int ch = m_pBuf[m_Pos++]; while (1) { while (PDFCharIsWhitespace(ch)) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; } if (ch != '%') break; while (1) { if (!PositionIsInBounds()) return EndOfData; ch = m_pBuf[m_Pos++]; if (PDFCharIsLineEnding(ch)) break; } } if (PDFCharIsDelimiter(ch) && ch != '/') { m_Pos--; m_pLastObj = ReadNextObject(false, false, 0); return Others; } bool bIsNumber = true; while (1) { if (m_WordSize < kMaxWordLength) m_WordBuffer[m_WordSize++] = ch; if (!PDFCharIsNumeric(ch)) bIsNumber = false; if (!PositionIsInBounds()) break; ch = m_pBuf[m_Pos++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } m_WordBuffer[m_WordSize] = 0; if (bIsNumber) return Number; if (m_WordBuffer[0] == '/') return Name; if (m_WordSize == 4) { if (memcmp(m_WordBuffer, "true", 4) == 0) { m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true); return Others; } if (memcmp(m_WordBuffer, "null", 4) == 0) { m_pLastObj = pdfium::MakeUnique<CPDF_Null>(); return Others; } } else if (m_WordSize == 5) { if (memcmp(m_WordBuffer, "false", 5) == 0) { m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false); return Others; } } return Keyword; }
std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream( CPDF_Document* pDoc, std::unique_ptr<CPDF_Dictionary> pDict, const CPDF_Object* pCSObj) { if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos])) m_Pos++; if (m_Pos == m_pBuf.size()) return nullptr; ByteString Decoder; CPDF_Dictionary* pParam = nullptr; CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter"); if (pFilter) { if (CPDF_Array* pArray = pFilter->AsArray()) { Decoder = pArray->GetStringAt(0); CPDF_Array* pParams = pDict->GetArrayFor(pdfium::stream::kDecodeParms); if (pParams) pParam = pParams->GetDictAt(0); } else { Decoder = pFilter->GetString(); pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms); } } uint32_t width = pDict->GetIntegerFor("Width"); uint32_t height = pDict->GetIntegerFor("Height"); uint32_t OrigSize = 0; if (pCSObj) { uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent"); uint32_t nComponents = 1; CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj, nullptr); if (pCS) { nComponents = pCS->CountComponents(); pDoc->GetPageData()->ReleaseColorSpace(pCSObj); } else { nComponents = 3; } uint32_t pitch = width; if (bpc && pitch > INT_MAX / bpc) return nullptr; pitch *= bpc; if (nComponents && pitch > INT_MAX / nComponents) return nullptr; pitch *= nComponents; if (pitch > INT_MAX - 7) return nullptr; pitch += 7; pitch /= 8; OrigSize = pitch; } else { if (width > INT_MAX - 7) return nullptr; OrigSize = ((width + 7) / 8); } if (height && OrigSize > INT_MAX / height) return nullptr; OrigSize *= height; std::unique_ptr<uint8_t, FxFreeDeleter> pData; uint32_t dwStreamSize; if (Decoder.IsEmpty()) { if (OrigSize > m_pBuf.size() - m_Pos) OrigSize = m_pBuf.size() - m_Pos; pData.reset(FX_Alloc(uint8_t, OrigSize)); auto copy_span = m_pBuf.subspan(m_Pos, OrigSize); memcpy(pData.get(), copy_span.data(), copy_span.size()); dwStreamSize = OrigSize; m_Pos += OrigSize; } else { dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height, Decoder, pParam, OrigSize); if (static_cast<int>(dwStreamSize) < 0) return nullptr; uint32_t dwSavePos = m_Pos; m_Pos += dwStreamSize; while (1) { uint32_t dwPrevPos = m_Pos; CPDF_StreamParser::SyntaxType type = ParseNextElement(); if (type == CPDF_StreamParser::EndOfData) break; if (type != CPDF_StreamParser::Keyword) { dwStreamSize += m_Pos - dwPrevPos; continue; } if (GetWord() == "EI") { m_Pos = dwPrevPos; break; } dwStreamSize += m_Pos - dwPrevPos; } m_Pos = dwSavePos; pData.reset(FX_Alloc(uint8_t, dwStreamSize)); auto copy_span = m_pBuf.subspan(m_Pos, dwStreamSize); memcpy(pData.get(), copy_span.data(), copy_span.size()); m_Pos += dwStreamSize; } pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize)); return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize, std::move(pDict)); }