コード例 #1
0
PdfObject* PdfContents::GetContentsForAppending() const
{
//    if ( mContObj->GetDataType() == ePdfDataType_Stream || 
//         mContObj->GetDataType() == ePdfDataType_Dictionary ) {

    // Use PdfObject::HasStream() instead of the datatype ePdfDataType_Stream
    // as large parts of the code rely on all PdfObjects having the datatype
    // ePdfDataType_Dictionary wether they have a stream or not
    if( mContObj->GetDataType() == ePdfDataType_Dictionary ) {
        return mContObj;	// just return the stream itself
    } else if ( mContObj->GetDataType() == ePdfDataType_Array ) {
        /*
          Create a new stream, add it to the array, return it
        */
        PdfObject*	newStm = mContObj->GetOwner()->CreateObject();
        newStm->GetStream();
        PdfReference	pdfr( newStm->Reference().ObjectNumber(), newStm->Reference().GenerationNumber() );
        
        PdfArray&	cArr = mContObj->GetArray();
        cArr.push_back( pdfr );
        return newStm;
    } else {
        PODOFO_RAISE_ERROR( ePdfError_InvalidDataType );
    }
}
コード例 #2
0
ファイル: PdfFontSimple.cpp プロジェクト: yjwong/podofo
void PdfFontSimple::Init( bool bEmbed, const PdfName & rsSubType )
{
    PdfObject*    pWidth;
    PdfObject*    pDescriptor;
    PdfVariant    var;
    PdfArray      array;

    pWidth = this->GetObject()->GetOwner()->CreateObject();
    if( !pWidth )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

    m_pMetrics->GetWidthArray( *pWidth, m_pEncoding->GetFirstChar(), m_pEncoding->GetLastChar(), m_pEncoding );

    pDescriptor = this->GetObject()->GetOwner()->CreateObject( "FontDescriptor" );
    if( !pDescriptor )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

	std::string name;
	if ( m_bIsSubsetting )
		name = this->GetObject()->GetOwner()->GetNextSubsetPrefix();
	name += this->GetBaseFont().GetName();

    this->GetObject()->GetDictionary().AddKey( PdfName::KeySubtype, rsSubType );
    this->GetObject()->GetDictionary().AddKey("BaseFont", PdfName( name ) );
    this->GetObject()->GetDictionary().AddKey("FirstChar", PdfVariant( static_cast<pdf_int64>(m_pEncoding->GetFirstChar()) ) );
    this->GetObject()->GetDictionary().AddKey("LastChar", PdfVariant( static_cast<pdf_int64>(m_pEncoding->GetLastChar()) ) );
    m_pEncoding->AddToDictionary( this->GetObject()->GetDictionary() ); // Add encoding key

    this->GetObject()->GetDictionary().AddKey("Widths", pWidth->Reference() );
    this->GetObject()->GetDictionary().AddKey( "FontDescriptor", pDescriptor->Reference() );

    m_pMetrics->GetBoundingBox( array );

    pDescriptor->GetDictionary().AddKey( "FontName", PdfName( name ) );
    //pDescriptor->GetDictionary().AddKey( "FontWeight", (long)m_pMetrics->Weight() );
    pDescriptor->GetDictionary().AddKey( PdfName::KeyFlags, PdfVariant( static_cast<pdf_int64>(PODOFO_LL_LITERAL(32)) ) ); // TODO: 0 ????
    pDescriptor->GetDictionary().AddKey( "FontBBox", array );
    pDescriptor->GetDictionary().AddKey( "ItalicAngle", PdfVariant( static_cast<pdf_int64>(m_pMetrics->GetItalicAngle()) ) );
    pDescriptor->GetDictionary().AddKey( "Ascent", m_pMetrics->GetPdfAscent() );
    pDescriptor->GetDictionary().AddKey( "Descent", m_pMetrics->GetPdfDescent() );
    pDescriptor->GetDictionary().AddKey( "CapHeight", m_pMetrics->GetPdfAscent() ); // m_pMetrics->CapHeight() );
    pDescriptor->GetDictionary().AddKey( "StemV", PdfVariant( static_cast<pdf_int64>(PODOFO_LL_LITERAL(1)) ) );               // m_pMetrics->StemV() );

    // Peter Petrov 24 September 2008
    m_pDescriptor = pDescriptor;

    if( bEmbed )
    {
        this->EmbedFontFile( pDescriptor );
        m_bWasEmbedded = true;
    }
}
コード例 #3
0
ファイル: PdfPage.cpp プロジェクト: arunjalota/paperman
unsigned int PdfPage::GetPageNumber() const
{
    unsigned int        nPageNumber = 0;
    PdfObject*          pParent     = m_pObject->GetIndirectKey( "Parent" );
    PdfReference ref                = m_pObject->Reference();

    while( pParent ) 
    {
        const PdfArray& kids        = pParent->GetIndirectKey( "Kids" )->GetArray();
        PdfArray::const_iterator it = kids.begin();

        while( it != kids.end() && (*it).GetReference() != ref )
        {
            PdfObject* pNode = m_pObject->GetOwner()->GetObject( (*it).GetReference() );

            if( pNode->GetDictionary().GetKey( PdfName::KeyType )->GetName() == PdfName( "Pages" ) )
                nPageNumber += static_cast<int>(pNode->GetDictionary().GetKey( "Count" )->GetNumber());
            else 
                // if we do not have a page tree node, 
                // we most likely have a page object:
                // so the page count is 1
                ++nPageNumber;

            ++it;
        }

        ref     = pParent->Reference();
        pParent = pParent->GetIndirectKey( "Parent" );
    }

    return ++nPageNumber;
}
コード例 #4
0
ファイル: PagesTreeTest.cpp プロジェクト: arunjalota/paperman
void PagesTreeTest::CreateTestTreeCustom( PoDoFo::PdfMemDocument & rDoc )
{
    const int COUNT = PODOFO_TEST_NUM_PAGES / 10;
    PdfObject* pRoot = rDoc.GetPagesTree()->GetObject();
    PdfArray rootKids;
    

    for(int z=0; z<COUNT; z++) 
    {
        PdfObject* pNode = rDoc.GetObjects().CreateObject("Pages");
        PdfArray nodeKids;

        for(int i=0; i<COUNT; i++) 
        {
            PdfPage* pPage = new PdfPage( PdfPage::CreateStandardPageSize( ePdfPageSize_A4 ),
                                          &(rDoc.GetObjects()) );
            pPage->GetObject()->GetDictionary().AddKey( PODOFO_TEST_PAGE_KEY, 
                                                        static_cast<long long>(z * COUNT + i) );

            //printf("Creating page %i z=%i i=%i\n", z * COUNT + i, z, i );
            nodeKids.push_back( pPage->GetObject()->Reference() );
        }

        pNode->GetDictionary().AddKey( PdfName("Kids"), nodeKids );
        pNode->GetDictionary().AddKey( PdfName("Count"), static_cast<long long>(COUNT) );
        rootKids.push_back( pNode->Reference() );
    }

    pRoot->GetDictionary().AddKey( PdfName("Kids"), rootKids );
    pRoot->GetDictionary().AddKey( PdfName("Count"), static_cast<long long>(PODOFO_TEST_NUM_PAGES) );
}
コード例 #5
0
void PdfFileSpec::Init( const char* pszFilename, const unsigned char* data, ptrdiff_t size, bool bStripPath ) 
{
    PdfObject* pEmbeddedStream;
    PdfString filename( MaybeStripPath( pszFilename, true) );

    this->GetObject()->GetDictionary().AddKey( "F", this->CreateFileSpecification( MaybeStripPath( pszFilename, bStripPath) ) );
    this->GetObject()->GetDictionary().AddKey( "UF", filename.ToUnicode () );

    PdfDictionary ef;

    pEmbeddedStream = this->CreateObject( "EmbeddedFile" );
    this->EmbeddFileFromMem( pEmbeddedStream, data, size );

    ef.AddKey( "F",  pEmbeddedStream->Reference() );

    this->GetObject()->GetDictionary().AddKey( "EF", ef );
}
コード例 #6
0
void PdfFontTrueType::EmbedFontFile( PdfObject* pDescriptor )
{
    PdfObject* pContents;
    pdf_long   lSize = 0;
    
    m_bWasEmbedded = true;    
        
    pContents = this->GetObject()->GetOwner()->CreateObject();
    if( !pContents )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }
        
    pDescriptor->GetDictionary().AddKey( "FontFile2", pContents->Reference() );

    // if the data was loaded from memory - use it from there
    // otherwise, load from disk
    if ( m_pMetrics->GetFontDataLen() && m_pMetrics->GetFontData() ) 
    {
        // FIXME const_cast<char*> is dangerous if string literals may ever be passed
        char* pBuffer = const_cast<char*>( m_pMetrics->GetFontData() );
        lSize = m_pMetrics->GetFontDataLen();
        
        // Set Length1 before creating the stream
        // as PdfStreamedDocument does not allow 
        // adding keys to an object after a stream was written
        pContents->GetDictionary().AddKey( "Length1", PdfVariant( static_cast<pdf_int64>(lSize) ) );
        pContents->GetStream()->Set( pBuffer, lSize );
    } 
    else 
    {
        PdfFileInputStream stream( m_pMetrics->GetFilename() );
        lSize = stream.GetFileLength();

        // Set Length1 before creating the stream
        // as PdfStreamedDocument does not allow 
        // adding keys to an object after a stream was written
        pContents->GetDictionary().AddKey( "Length1", PdfVariant( static_cast<pdf_int64>(lSize) ) );
        pContents->GetStream()->Set( &stream );
            
    }
}
コード例 #7
0
void PdfFileSpec::Init( const char* pszFilename, bool bEmbedd, bool bStripPath ) 
{
    PdfObject* pEmbeddedStream;
    PdfString filename( MaybeStripPath( pszFilename, true) );

    this->GetObject()->GetDictionary().AddKey( "F", this->CreateFileSpecification( MaybeStripPath( pszFilename, bStripPath ) ) );
    this->GetObject()->GetDictionary().AddKey( "UF", filename.ToUnicode () );

    if( bEmbedd ) 
    {
        PdfDictionary ef;

        pEmbeddedStream = this->CreateObject( "EmbeddedFile" );
        this->EmbeddFile( pEmbeddedStream, pszFilename );

        ef.AddKey( "F",  pEmbeddedStream->Reference() );

        this->GetObject()->GetDictionary().AddKey( "EF", ef );
    }
}
コード例 #8
0
PdfObject* PdfVecObjects::RemoveObject( const PdfReference & ref, bool bMarkAsFree )
{
    if( !m_bSorted )
        this->Sort();


    PdfObject*         pObj;
    PdfObject refObj( ref, NULL );
    std::pair<TIVecObjects,TIVecObjects> it = 
        std::equal_range( m_vector.begin(), m_vector.end(), &refObj, ObjectComparatorPredicate() );

    if( it.first != it.second )
    {
        pObj = *(it.first);
        if( bMarkAsFree )
            this->AddFreeObject( pObj->Reference() );
        m_vector.erase( it.first );
        return pObj;
    }
    
    return NULL;
}
コード例 #9
0
ファイル: PdfFontCID.cpp プロジェクト: diorahman/podofo
void PdfFontCID::Init( bool bEmbed )
{
    PdfObject* pDescriptor;
    PdfObject* pDescendantFonts;
    PdfObject* pCIDSystemInfo;
    PdfObject* pUnicode;

    PdfVariant var;
    PdfArray   array;

    // The descendant font is a CIDFont:
    pDescendantFonts = this->GetObject()->GetOwner()->CreateObject("Font");
    pCIDSystemInfo   = this->GetObject()->GetOwner()->CreateObject();
    pDescriptor      = this->GetObject()->GetOwner()->CreateObject("FontDescriptor");
    pUnicode         = this->GetObject()->GetOwner()->CreateObject(); // The ToUnicode CMap

    // Now setting each of the entries of the font
    this->GetObject()->GetDictionary().AddKey( PdfName::KeySubtype, PdfName("Type0") );
    this->GetObject()->GetDictionary().AddKey( "BaseFont", this->GetBaseFont() );
    this->GetObject()->GetDictionary().AddKey( "ToUnicode", pUnicode->Reference() );

    // The encoding is here usually a (Predefined) CMap from PdfIdentityEncoding:
    m_pEncoding->AddToDictionary( this->GetObject()->GetDictionary() );

    // The DecendantFonts, should be an indirect object:
    array.push_back( pDescendantFonts->Reference() );
    this->GetObject()->GetDictionary().AddKey( "DescendantFonts", array );

    // Setting the DescendantFonts paras
    // This is a type2 CIDFont, which is also known as TrueType:
    pDescendantFonts->GetDictionary().AddKey( PdfName::KeySubtype, PdfName("CIDFontType2") );

    // Same base font as the owner font:
    pDescendantFonts->GetDictionary().AddKey( "BaseFont", this->GetBaseFont() );

    // The CIDSystemInfo, should be an indirect object:
    pDescendantFonts->GetDictionary().AddKey( "CIDSystemInfo", pCIDSystemInfo->Reference() );

    // The FontDescriptor, should be an indirect object:
    pDescendantFonts->GetDictionary().AddKey( "FontDescriptor", pDescriptor->Reference() );
    pDescendantFonts->GetDictionary().AddKey( "CIDToGIDMap", PdfName("Identity") );

    // Add the width keys
    this->CreateWidth( pDescendantFonts );

    // Create the ToUnicode CMap
    this->CreateCMap( pUnicode );

    // Setting the CIDSystemInfo paras:
    pCIDSystemInfo->GetDictionary().AddKey( "Registry", PdfString("Adobe") );
    pCIDSystemInfo->GetDictionary().AddKey( "Ordering", PdfString("Identity") );
    pCIDSystemInfo->GetDictionary().AddKey( "Supplement", PdfVariant(static_cast<pdf_int64>(0LL)) );


    // Setting the FontDescriptor paras:
    array.Clear();
    m_pMetrics->GetBoundingBox( array );

    pDescriptor->GetDictionary().AddKey( "FontName", this->GetBaseFont() );
    pDescriptor->GetDictionary().AddKey( PdfName::KeyFlags, PdfVariant( static_cast<pdf_int64>(32LL) ) ); // TODO: 0 ????
    pDescriptor->GetDictionary().AddKey( "FontBBox", array );
    pDescriptor->GetDictionary().AddKey( "ItalicAngle", PdfVariant( static_cast<pdf_int64>(m_pMetrics->GetItalicAngle()) ) );
    pDescriptor->GetDictionary().AddKey( "Ascent", m_pMetrics->GetPdfAscent() );
    pDescriptor->GetDictionary().AddKey( "Descent", m_pMetrics->GetPdfDescent() );
    pDescriptor->GetDictionary().AddKey( "CapHeight", m_pMetrics->GetPdfAscent() ); // m_pMetrics->CapHeight() );
    pDescriptor->GetDictionary().AddKey( "StemV", PdfVariant( static_cast<pdf_int64>(1LL) ) );               // m_pMetrics->StemV() );

    // Peter Petrov 24 September 2008
    m_pDescriptor = pDescriptor;
    
    if( bEmbed )
    {
        this->EmbedFont( pDescriptor );
        m_bWasEmbedded = true;
    }
}
コード例 #10
0
ファイル: PdfFontType1.cpp プロジェクト: arunjalota/paperman
void PdfFontType1::EmbedFontFile( PdfObject* pDescriptor )
{
    pdf_long        lSize    = 0;
    pdf_long        lLength1 = 0L;
    pdf_long        lLength2 = 0L;
    pdf_long        lLength3 = 0L;
    PdfObject*  pContents;
    const char* pBuffer;
    char*       pAllocated = NULL;

    m_bWasEmbedded = true;

    pContents = m_pObject->GetOwner()->CreateObject();
    if( !pContents )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }
        
    pDescriptor->GetDictionary().AddKey( "FontFile", pContents->Reference() );

    // if the data was loaded from memory - use it from there
    // otherwise, load from disk
    if ( m_pMetrics->GetFontDataLen() && m_pMetrics->GetFontData() ) 
    {
        pBuffer = m_pMetrics->GetFontData();
        lSize   = m_pMetrics->GetFontDataLen();
    }
    else
    {
        FILE* hFile = fopen( m_pMetrics->GetFilename(), "rb" );
        if( !hFile )
        {
            PODOFO_RAISE_ERROR_INFO( ePdfError_FileNotFound, m_pMetrics->GetFilename() );
        }

        fseek( hFile, 0L, SEEK_END );
        lSize = ftell( hFile );
        fseek( hFile, 0L, SEEK_SET );

        pAllocated = static_cast<char*>(malloc( sizeof(char) * lSize ));
        if( !pAllocated )
        {
            fclose( hFile );
            PODOFO_RAISE_ERROR( ePdfError_OutOfMemory );
        }

        fread( pAllocated, sizeof(char), lSize, hFile );
        fclose( hFile );

        pBuffer = pAllocated;
    }

	// Remove binary segment headers from pfb
	unsigned char *pBinary = reinterpret_cast<unsigned char*>(const_cast<char*>(pBuffer));
	while( *pBinary == 0x80 )	// binary segment header
	{
		const int	cHeaderLength  = 6;
		int			iSegmentType   = pBinary[1];	// binary segment type
		long		lSegmentLength = 0L;
		long		lSegmentDelta  = static_cast<long>(&pBuffer[lSize] - reinterpret_cast<const char*>(pBinary) );

		switch( iSegmentType )
		{
			case 1:									// ASCII text
				lSegmentLength = pBinary[2] + 		// little endian
								 pBinary[3] * 256L + 
								 pBinary[4] * 65536L +
								 pBinary[5] * 16777216L;
				if( lLength1 == 0L )
					lLength1 = lSegmentLength;
				else
					lLength3 = lSegmentLength;
				lSize -= cHeaderLength;
				memmove( pBinary, &pBinary[cHeaderLength], lSegmentDelta );
				pBinary = &pBinary[lSegmentLength];
				break;
			case 2:									// binary data
				lSegmentLength = pBinary[2] + 		// little endian
								 pBinary[3] * 256L + 
								 pBinary[4] * 65536L +
								 pBinary[5] * 16777216L;
				lLength2 = lSegmentLength;
				lSize -= cHeaderLength;
				memmove( pBinary, &pBinary[cHeaderLength], lSegmentDelta );
				pBinary = &pBinary[lSegmentLength];
				break;
			case 3:									// end-of-file
				pContents->GetStream()->Set( pBuffer, lSize - 2L );
				if( pAllocated )
					free( pAllocated );

				pContents->GetDictionary().AddKey( "Length1", PdfVariant( static_cast<long long>(lLength1) ) );
                pContents->GetDictionary().AddKey( "Length2", PdfVariant( static_cast<long long>(lLength2) ) );
                pContents->GetDictionary().AddKey( "Length3", PdfVariant( static_cast<long long>(lLength3) ) );

				return;
			default:
				break;
		}
	}

	// Parse the font data buffer to get the values for length1, length2 and length3
	lLength1 = FindInBuffer( "eexec", pBuffer, lSize );
	if( lLength1 > 0 )
		lLength1 += 6; // 6 == eexec + lf
	else
		lLength1 = 0;

	if( lLength1 )
	{
		lLength2 = FindInBuffer( "cleartomark", pBuffer, lSize );
		if( lLength2 > 0 )
			lLength2 = lSize - lLength1 - 520; // 520 == 512 + strlen(cleartomark)
		else
			lLength1 = 0;
	}

	lLength3 = lSize - lLength2 - lLength1;
    
	// TODO: Pdf Supports only Type1 fonts with binary encrypted sections and not the hex format
	pContents->GetStream()->Set( pBuffer, lSize );
    if( pAllocated )
        free( pAllocated );

    pContents->GetDictionary().AddKey( "Length1", PdfVariant( static_cast<long long>(lLength1) ) );
    pContents->GetDictionary().AddKey( "Length2", PdfVariant( static_cast<long long>(lLength2) ) );
    pContents->GetDictionary().AddKey( "Length3", PdfVariant( static_cast<long long>(lLength3) ) );
}
コード例 #11
0
ファイル: PdfPagesTree.cpp プロジェクト: yjwong/podofo
PdfObject* PdfPagesTree::GetPageNode( int nPageNum, PdfObject* pParent, 
                                      PdfObjectList & rLstParents ) 
{
    if( !pParent ) 
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

    if( !pParent->GetDictionary().HasKey( PdfName("Kids") ) )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidKey );
    }

    
    const PdfObject* pObj = pParent->GetIndirectKey( "Kids" );
    if( pObj == NULL || !pObj->IsArray() )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidDataType );
    }

    const PdfArray & rKidsArray = pObj->GetArray(); 
    PdfArray::const_iterator it = rKidsArray.begin();

    const size_t numDirectKids = rKidsArray.size();
    const size_t numKids = GetChildCount(pParent);

    // use <= since nPageNum is 0-based
    if( static_cast<int>(numKids) <= nPageNum ) 
    {
        PdfError::LogMessage( eLogSeverity_Critical,
	    "Cannot retrieve page %i from a document with only %i pages.",
                              nPageNum, static_cast<int>(numKids) );
        return NULL;
    }

    //printf("Fetching: %i %i %i\n", numDirectKids, numKids, nPageNum );
    if( numDirectKids == numKids && static_cast<size_t>(nPageNum) < numDirectKids )
    {
        // This node has only page nodes as kids,
        // so we can access the array directly
        rLstParents.push_back( pParent );
        return GetPageNodeFromArray( nPageNum, rKidsArray, rLstParents );
    } 
    else
    {
        // We have to traverse the tree
        while( it != rKidsArray.end() ) 
        {
            if( (*it).IsArray() ) 
            { // Fixes PDFs broken by having trees with arrays nested once
                
                rLstParents.push_back( pParent );

                // the following code is to find the reference to log this with
                const PdfReference & rIterArrayRef = (*it).Reference();
                PdfReference refToLog;
                bool isDirectObject // don't worry about 0-num. indirect ones
                    = ( !(rIterArrayRef.ObjectNumber() ) );
                if ( isDirectObject ) 
		{
                    if ( !(pObj->Reference().ObjectNumber() ) ) // rKidsArray's
		    {
                        refToLog = pParent->Reference();
                    }
		    else
                    {
                        refToLog = pObj->Reference();
                    }
                }
                else
                {
                    refToLog = rIterArrayRef;
                }
                PdfError::LogMessage( eLogSeverity_Error,
                                    "Entry in Kids array is itself an array"
                    "%s reference: %s\n", isDirectObject ? " (direct object)"
                    ", in object with" : ",", refToLog.ToString().c_str() );

                    const PdfArray & rIterArray = (*it).GetArray();

                    // is the array large enough to potentially have the page?
                    if( static_cast<size_t>(nPageNum) < rIterArray.GetSize() )
                    {
                        PdfObject* pPageNode = GetPageNodeFromArray( nPageNum,
                                                    rIterArray, rLstParents );
                        if ( pPageNode ) // and if not, search further
                            return pPageNode;
                    }
            }
            else if( (*it).IsReference() ) 
            {
                PdfObject* pChild = GetRoot()->GetOwner()->GetObject( (*it).GetReference() );
                if (!pChild) 
                {
                    PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Child not found: %s\n", 
                                          nPageNum, (*it).GetReference().ToString().c_str()); 
                    return NULL;
                }

                if( this->IsTypePages(pChild) ) 
                {
                    int childCount = GetChildCount( pChild );
                    if( childCount < nPageNum + 1 ) // Pages are 0 based, but count is not
                    {
                        // skip this page node
                        // and go to the next one
                        nPageNum -= childCount;
                    }
                    else
                    {
                        rLstParents.push_back( pParent );
                        return this->GetPageNode( nPageNum, pChild, rLstParents );
                    }
                }
                else if( this->IsTypePage(pChild) ) 
                {
                    if( 0 == nPageNum )
                    {
                        rLstParents.push_back( pParent );
                        return pChild;
                    } 

                    // Skip a normal page
                    if(nPageNum > 0 )
                        nPageNum--;
                }
		else
		{
                    const PdfReference & rLogRef = pChild->Reference();
                    pdf_objnum nLogObjNum = rLogRef.ObjectNumber();
                    pdf_gennum nLogGenNum = rLogRef.GenerationNumber();
		    PdfError::LogMessage( eLogSeverity_Critical,
                                          "Requesting page index %i. "
                        "Invalid datatype referenced in kids array: %s\n"
                        "Reference to invalid object: %i %i R\n", nPageNum,
                        pChild->GetDataTypeString(), nLogObjNum, nLogGenNum);
                }
            }
            else
            {
                PdfError::LogMessage( eLogSeverity_Critical, "Requesting page index %i. Invalid datatype in kids array: %s\n", 
                                      nPageNum, (*it).GetDataTypeString()); 
                return NULL;
            }
            
            ++it;
        }
    }

    return NULL;
}
コード例 #12
0
void TextExtractor::ExtractText( PdfMemDocument* pDocument, PdfPage* pPage ) 
{
    const char*      pszToken = NULL;
    PdfVariant       var;
    EPdfContentsType eType;

    PdfContentsTokenizer tokenizer( pPage );

    double dCurPosX     = 0.0;
    double dCurPosY     = 0.0;
    double dCurFontSize = 0.0;
    bool   bTextBlock   = false;
    PdfFont* pCurFont   = NULL;

    std::stack<PdfVariant> stack;

    while( tokenizer.ReadNext( eType, pszToken, var ) )
    {
        if( eType == ePdfContentsType_Keyword )
        {
            // support 'l' and 'm' tokens
            if( strcmp( pszToken, "l" ) == 0 || 
                strcmp( pszToken, "m" ) == 0 )
            {
                dCurPosX = stack.top().GetReal();
                stack.pop();
                dCurPosY = stack.top().GetReal();
                stack.pop();
            }
            else if( strcmp( pszToken, "BT" ) == 0 ) 
            {
                bTextBlock   = true;     
                // BT does not reset font
                // dCurFontSize = 0.0;
                // pCurFont     = NULL;
            }
            else if( strcmp( pszToken, "ET" ) == 0 ) 
            {
                if( !bTextBlock ) 
                    fprintf( stderr, "WARNING: Found ET without BT!\n" );
            }

            if( bTextBlock ) 
            {
                if( strcmp( pszToken, "Tf" ) == 0 ) 
                {
                    dCurFontSize = stack.top().GetReal();
                    stack.pop();
                    PdfName fontName = stack.top().GetName();
                    PdfObject* pFont = pPage->GetFromResources( PdfName("Font"), fontName );
                    if( !pFont ) 
                    {
                        PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidHandle, "Cannot create font!" );
                    }

                    pCurFont = pDocument->GetFont( pFont );
                    if( !pCurFont ) 
                    {
                        fprintf( stderr, "WARNING: Unable to create font for object %i %i R\n",
                                 pFont->Reference().ObjectNumber(),
                                 pFont->Reference().GenerationNumber() );
                    }
                }
                else if( strcmp( pszToken, "Tj" ) == 0 ||
                         strcmp( pszToken, "'" ) == 0 ) 
                {
                    AddTextElement( dCurPosX, dCurPosY, pCurFont, stack.top().GetString() );
                    stack.pop();
                }
                else if( strcmp( pszToken, "\"" ) == 0 )
                {
                    AddTextElement( dCurPosX, dCurPosY, pCurFont, stack.top().GetString() );
                    stack.pop();
                    stack.pop(); // remove char spacing from stack
                    stack.pop(); // remove word spacing from stack
                }
                else if( strcmp( pszToken, "TJ" ) == 0 ) 
                {
                    PdfArray array = stack.top().GetArray();
                    stack.pop();
                    
                    for( int i=0; i<static_cast<int>(array.GetSize()); i++ ) 
                    {
                        if( array[i].IsString() )
                            AddTextElement( dCurPosX, dCurPosY, pCurFont, array[i].GetString() );
                    }
                }
            }
        }
        else if ( eType == ePdfContentsType_Variant )
        {
            stack.push( var );
        }
        else
        {
            // Impossible; type must be keyword or variant
            PODOFO_RAISE_ERROR( ePdfError_InternalLogic );
        }
    }
}