Пример #1
0
PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas )
    : PdfTokenizer()
{
    PdfObject* pContents = pCanvas->GetContents();
    if( pContents && pContents->IsArray()  ) 
    {
        PdfArray& a = pContents->GetArray();
        for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it )
        {
            if ( !(*it).IsReference() )
            {
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" );

            }
            
            m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) );
        }
    }
    else if ( pContents && pContents->HasStream() )
    {
        m_lstContents.push_back( pContents );
    }
    else
    {
        PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" );
    }

    if( m_lstContents.size() )
    {
        SetCurrentContentsStream( m_lstContents.front() );
        m_lstContents.pop_front();
    }
}
bool PdfContentsTokenizer::GetNextToken( const char*& pszToken , EPdfTokenType* peType )
{
	bool result = PdfTokenizer::GetNextToken(pszToken, peType);
	while (!result) {
		if( !m_lstContents.size() )
			return false;

		SetCurrentContentsStream( m_lstContents.front() );
		m_lstContents.pop_front();
		result = PdfTokenizer::GetNextToken(pszToken, peType);
	}
	return result;
}
Пример #3
0
bool PdfContentsTokenizer::ReadNext( EPdfContentsType& reType, const char*& rpszKeyword, PdfVariant & rVariant )
{
    EPdfTokenType eTokenType;
    EPdfDataType  eDataType;
    const char*   pszToken;

    // While officially the keyword pointer is undefined if not needed, it
    // costs us practically nothing to zero it (in case someone fails to check
    // the return value and/or reType). Do so. We won't nullify the variant
    // since that has a real cost.
    //rpszKeyword = 0;

    // If we've run out of data in this stream and there's another one to read,
    // switch to reading the next stream.
    //if( m_device.Device() && m_device.Device()->Eof() && m_lstContents.size() )
    //{
    //    SetCurrentContentsStream( m_lstContents.front() );
    //    m_lstContents.pop_front();
    //}

    bool gotToken = this->GetNextToken( pszToken, &eTokenType );
    if ( !gotToken )
    {
        if ( m_lstContents.size() )
        {
	    // We ran out of tokens in this stream. Switch to the next stream
	    // and try again.
            SetCurrentContentsStream( m_lstContents.front() );
            m_lstContents.pop_front();
            return ReadNext( reType, rpszKeyword, rVariant );
        }
        else
        {
            // No more content stream tokens to read.
            return false;
        }
    }

    eDataType = this->DetermineDataType( pszToken, eTokenType, rVariant );

    // asume we read a variant unless we discover otherwise later.
    reType = ePdfContentsType_Variant;

    switch( eDataType ) 
    {
        case ePdfDataType_Null:
        case ePdfDataType_Bool:
        case ePdfDataType_Number:
        case ePdfDataType_Real:
            // the data was already read into rVariant by the DetermineDataType function
            break;

        case ePdfDataType_Reference:
        {
            // references are invalid in content streams
            PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "references are invalid in content streams" );
            break;
        }

        case ePdfDataType_Dictionary:
            this->ReadDictionary( rVariant, NULL );
            break;
        case ePdfDataType_Array:
            this->ReadArray( rVariant, NULL );
            break;
        case ePdfDataType_String:
            this->ReadString( rVariant, NULL );
            break;
        case ePdfDataType_HexString:
            this->ReadHexString( rVariant, NULL );
            break;
        case ePdfDataType_Name:
            this->ReadName( rVariant );
            break;

        case ePdfDataType_Unknown:
        case ePdfDataType_RawData:
        default:
            // Assume we have a keyword
            reType     = ePdfContentsType_Keyword;
            rpszKeyword = pszToken;
            break;
    }
    return true;
}
PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas )
    : PdfTokenizer(), m_readingInlineImgData(false)
{
    if( !pCanvas ) 
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

    PdfObject* pContents = pCanvas->GetContents();
    if( pContents && pContents->IsArray()  )
    {
        PdfArray& a = pContents->GetArray();
        for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it )
        {
            if ( !(*it).IsReference() )
            {
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" );

            }

            if ( !pContents->GetOwner()->GetObject( (*it).GetReference() ) )
            {
                // some damaged PDFs may have dangling references
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array NULL reference" );
            }

            m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) );
        }
    }
    else if ( pContents && pContents->HasStream() )
    {
        m_lstContents.push_back( pContents );
    }
    else if ( pContents && pContents->IsDictionary() )
    {
        m_lstContents.push_back( pContents );
        PdfError::LogMessage(eLogSeverity_Information,
                  "PdfContentsTokenizer: found canvas-dictionary without stream => empty page");
        // OC 18.09.2010 BugFix: Found an empty page in a PDF document:
        //    103 0 obj
        //    <<
        //    /Type /Page
        //    /MediaBox [ 0 0 595 842 ]
        //    /Parent 3 0 R
        //    /Resources <<
        //    /ProcSet [ /PDF ]
        //    >>
        //    /Rotate 0
        //    >>
        //    endobj
    }
    else
    {
        PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" );
    }

    if( m_lstContents.size() )
    {
        SetCurrentContentsStream( m_lstContents.front() );
        m_lstContents.pop_front();
    }
}