PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas ) : PdfTokenizer() { PdfObject* pContents = pCanvas->GetContents(); if( pContents && pContents->IsArray() ) { PdfArray& a = pContents->GetArray(); for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it ) { if ( !(*it).IsReference() ) { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" ); } m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) ); } } else if ( pContents && pContents->HasStream() ) { m_lstContents.push_back( pContents ); } else { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" ); } if( m_lstContents.size() ) { SetCurrentContentsStream( m_lstContents.front() ); m_lstContents.pop_front(); } }
bool PdfContentsTokenizer::GetNextToken( const char*& pszToken , EPdfTokenType* peType ) { bool result = PdfTokenizer::GetNextToken(pszToken, peType); while (!result) { if( !m_lstContents.size() ) return false; SetCurrentContentsStream( m_lstContents.front() ); m_lstContents.pop_front(); result = PdfTokenizer::GetNextToken(pszToken, peType); } return result; }
bool PdfContentsTokenizer::ReadNext( EPdfContentsType& reType, const char*& rpszKeyword, PdfVariant & rVariant ) { EPdfTokenType eTokenType; EPdfDataType eDataType; const char* pszToken; // While officially the keyword pointer is undefined if not needed, it // costs us practically nothing to zero it (in case someone fails to check // the return value and/or reType). Do so. We won't nullify the variant // since that has a real cost. //rpszKeyword = 0; // If we've run out of data in this stream and there's another one to read, // switch to reading the next stream. //if( m_device.Device() && m_device.Device()->Eof() && m_lstContents.size() ) //{ // SetCurrentContentsStream( m_lstContents.front() ); // m_lstContents.pop_front(); //} bool gotToken = this->GetNextToken( pszToken, &eTokenType ); if ( !gotToken ) { if ( m_lstContents.size() ) { // We ran out of tokens in this stream. Switch to the next stream // and try again. SetCurrentContentsStream( m_lstContents.front() ); m_lstContents.pop_front(); return ReadNext( reType, rpszKeyword, rVariant ); } else { // No more content stream tokens to read. return false; } } eDataType = this->DetermineDataType( pszToken, eTokenType, rVariant ); // asume we read a variant unless we discover otherwise later. reType = ePdfContentsType_Variant; switch( eDataType ) { case ePdfDataType_Null: case ePdfDataType_Bool: case ePdfDataType_Number: case ePdfDataType_Real: // the data was already read into rVariant by the DetermineDataType function break; case ePdfDataType_Reference: { // references are invalid in content streams PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "references are invalid in content streams" ); break; } case ePdfDataType_Dictionary: this->ReadDictionary( rVariant, NULL ); break; case ePdfDataType_Array: this->ReadArray( rVariant, NULL ); break; case ePdfDataType_String: this->ReadString( rVariant, NULL ); break; case ePdfDataType_HexString: this->ReadHexString( rVariant, NULL ); break; case ePdfDataType_Name: this->ReadName( rVariant ); break; case ePdfDataType_Unknown: case ePdfDataType_RawData: default: // Assume we have a keyword reType = ePdfContentsType_Keyword; rpszKeyword = pszToken; break; } return true; }
PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas ) : PdfTokenizer(), m_readingInlineImgData(false) { if( !pCanvas ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } PdfObject* pContents = pCanvas->GetContents(); if( pContents && pContents->IsArray() ) { PdfArray& a = pContents->GetArray(); for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it ) { if ( !(*it).IsReference() ) { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" ); } if ( !pContents->GetOwner()->GetObject( (*it).GetReference() ) ) { // some damaged PDFs may have dangling references PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array NULL reference" ); } m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) ); } } else if ( pContents && pContents->HasStream() ) { m_lstContents.push_back( pContents ); } else if ( pContents && pContents->IsDictionary() ) { m_lstContents.push_back( pContents ); PdfError::LogMessage(eLogSeverity_Information, "PdfContentsTokenizer: found canvas-dictionary without stream => empty page"); // OC 18.09.2010 BugFix: Found an empty page in a PDF document: // 103 0 obj // << // /Type /Page // /MediaBox [ 0 0 595 842 ] // /Parent 3 0 R // /Resources << // /ProcSet [ /PDF ] // >> // /Rotate 0 // >> // endobj } else { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" ); } if( m_lstContents.size() ) { SetCurrentContentsStream( m_lstContents.front() ); m_lstContents.pop_front(); } }