Пример #1
0
void PdfPagesTree::InsertPagesIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, 
                                       int nIndex, const std::vector<PdfObject*>& vecPages )
{
    if( !pParent || !vecPages.size() ) 
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

    // 1. Add the reference of the new page to the kids array of pParent
    // 2. Increase count of every node in lstParents (which also includes pParent)
    // 3. Add Parent key to the page

    // 1. Add reference
    const PdfArray oldKids = pParent->GetDictionary().GetKey( PdfName("Kids") )->GetArray();
    PdfArray newKids;
    newKids.reserve( oldKids.GetSize() + vecPages.size() );

    bool bIsPushedIn = false;
    int i=0;
    for (PdfArray::const_iterator it=oldKids.begin(); it!=oldKids.end(); ++it, ++i ) 
    {
        if ( !bIsPushedIn && (nIndex < i) )    // Pushing before
        {
            for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages)
            {
                newKids.push_back( (*itPages)->Reference() );    // Push all new kids at once
            }
            bIsPushedIn = true;
        }
        newKids.push_back( *it );    // Push in the old kids
    }

    // If new kids are still not pushed in then they may be appending to the end
    if ( !bIsPushedIn && ( (nIndex + 1) == static_cast<int>(oldKids.size())) ) 
    {
        for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages)
        {
            newKids.push_back( (*itPages)->Reference() );    // Push all new kids at once
        }
        bIsPushedIn = true;
    }

    pParent->GetDictionary().AddKey( PdfName("Kids"), newKids );
 

    // 2. increase count
    for ( PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin(); itParents != rlstParents.rend(); ++itParents )
    {
        this->ChangePagesCount( *itParents, vecPages.size() );
    } 

    // 3. add parent key to each of the pages
    for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages)
    {
        (*itPages)->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference() );
    }
}
Пример #2
0
void PdfPagesTree::InsertPageIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, 
                                       int nIndex, PdfObject* pPage )
{
    if( !pParent || !pPage ) 
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

    // 1. Add the reference of the new page to the kids array of pParent
    // 2. Increase count of every node in lstParents (which also includes pParent)
    // 3. Add Parent key to the page

    // 1. Add reference
    const PdfArray oldKids = pParent->GetDictionary().GetKey( PdfName("Kids") )->GetArray();
    PdfArray::const_iterator it = oldKids.begin();
    PdfArray newKids;

    newKids.reserve( oldKids.GetSize() + 1 );

    if( nIndex < 0 ) 
    {
        newKids.push_back( pPage->Reference() );
    }

    int i = 0;
    while( it != oldKids.end() ) 
    {
        newKids.push_back( *it );

        if( i == nIndex ) 
            newKids.push_back( pPage->Reference() );

        ++i;
        ++it;
    }

    /*
    PdfVariant var2( newKids );
    std::string str2;
    var2.ToString(str2);
    printf("newKids= %s\n", str2.c_str() );
    */

    pParent->GetDictionary().AddKey( PdfName("Kids"), newKids );
 
    // 2. increase count
    PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin();
    while( itParents != rlstParents.rend() )
    {
        this->ChangePagesCount( *itParents, 1 );

        ++itParents;
    } 

    // 3. add parent key to the page
    pPage->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference() );
}
Пример #3
0
void PdfSampledFunction::Init( const PdfArray & rDomain,  const PdfArray & rRange, const PdfFunction::Sample & rlstSamples )
{
	PdfArray Size;
	for( unsigned i = 0; i < rDomain.GetSize() / 2; i++ )
		Size.push_back( PdfObject( (long long) (rDomain.GetSize()) / 2L ) );

    this->GetObject()->GetDictionary().AddKey( PdfName("Domain"), rDomain );
    this->GetObject()->GetDictionary().AddKey( PdfName("Range"), rRange );
    this->GetObject()->GetDictionary().AddKey( PdfName("Size"), Size );
    this->GetObject()->GetDictionary().AddKey( PdfName("Order"), PdfObject( 1LL ) );
    this->GetObject()->GetDictionary().AddKey( PdfName("BitsPerSample"), PdfObject( 8LL ) );

    this->GetObject()->GetStream()->BeginAppend();
    PdfFunction::Sample::const_iterator it = rlstSamples.begin();
    while( it != rlstSamples.end() )
    {
        this->GetObject()->GetStream()->Append( & ( *it ), 1 );
        ++it;
    }
    this->GetObject()->GetStream()->EndAppend();
}
PdfFontMetricsObject::PdfFontMetricsObject( PdfObject* pFont, PdfObject* pDescriptor, const PdfEncoding* const pEncoding )
    : PdfFontMetrics( ePdfFontType_Unknown, "", NULL ),
      m_pEncoding( pEncoding ), m_dDefWidth(0.0)
{
    if( !pDescriptor )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

	const PdfName & rSubType = pFont->GetDictionary().GetKey( PdfName::KeySubtype )->GetName();

    // OC 15.08.2010 BugFix: /FirstChar /LastChar /Widths are in the Font dictionary and not in the FontDescriptor
	if ( rSubType == PdfName("Type1") || rSubType == PdfName("TrueType") ) {
		m_sName        = pDescriptor->GetIndirectKey( "FontName" )->GetName();
		m_bbox         = pDescriptor->GetIndirectKey( "FontBBox" )->GetArray();
    m_nFirst       = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "FirstChar", 0L ));
    m_nLast        = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "LastChar", 0L ));
	 // OC 15.08.2010 BugFix: GetIndirectKey() instead of GetDictionary().GetKey() and "Widths" instead of "Width"
    PdfObject* widths = pFont->GetIndirectKey( "Widths" );
    if( widths != NULL )
    {
        m_width        = widths->GetArray();
        m_missingWidth = NULL;
    }
    else
    {
        widths = pDescriptor->GetDictionary().GetKey( "MissingWidth" );
        if( widths == NULL ) 
        {
            PODOFO_RAISE_ERROR_INFO( ePdfError_NoObject, "Font object defines neither Widths, nor MissingWidth values!" );
            m_missingWidth = widths;
        }
    }
	} else if ( rSubType == PdfName("CIDFontType0") || rSubType == PdfName("CIDFontType2") ) {
		PdfObject *pObj = pDescriptor->GetIndirectKey( "FontName" );
		if (pObj) {
			m_sName = pObj->GetName();
		}
		pObj = pDescriptor->GetIndirectKey( "FontBBox" );
		if (pObj) {
			m_bbox = pObj->GetArray();
		}
		m_nFirst = 0;
		m_nLast = 0;

		m_dDefWidth = static_cast<double>(pFont->GetDictionary().GetKeyAsLong( "DW", 1000L ));
		PdfVariant default_width(m_dDefWidth);
		PdfObject * pw = pFont->GetIndirectKey( "W" );

		for (int i = m_nFirst; i <= m_nLast; ++i) {
			m_width.push_back(default_width);
		}
		if (pw) {
			PdfArray w = pw->GetArray();
			int pos = 0;
			while (pos < static_cast<int>(w.GetSize())) {
				int start = static_cast<int>(w[pos++].GetNumber());
				PODOFO_ASSERT (start >= 0);
				if (w[pos].IsArray()) {
					PdfArray widths = w[pos++].GetArray();
					int length = start + static_cast<int>(widths.GetSize());
					PODOFO_ASSERT (length >= start);
					if (length > static_cast<int>(m_width.GetSize())) {
						m_width.resize(length, default_width);
					}
					for (int i = 0; i < static_cast<int>(widths.GetSize()); ++i) {
						m_width[start + i] = widths[i];
					}
				} else {
					int end = static_cast<int>(w[pos++].GetNumber());
					int length = start + end;
					PODOFO_ASSERT (length >= start);
					if (length > static_cast<int>(m_width.GetSize())) {
						m_width.resize(length, default_width);
					}
					pdf_int64 width = w[pos++].GetNumber();
					for (int i = start; i <= end; ++i)
						m_width[i] = PdfVariant(width);
				}
			}
		}
		m_nLast = m_width.GetSize() - 1;
	} else {
        PODOFO_RAISE_ERROR_INFO( ePdfError_UnsupportedFontFormat, rSubType.GetEscapedName().c_str() );
	}


    m_nWeight      = static_cast<unsigned int>(pDescriptor->GetDictionary().GetKeyAsLong( "FontWeight", 400L ));
    m_nItalicAngle = static_cast<int>(pDescriptor->GetDictionary().GetKeyAsLong( "ItalicAngle", 0L ));

    m_dPdfAscent   = pDescriptor->GetDictionary().GetKeyAsReal( "Ascent", 0.0 );
    m_dAscent      = m_dPdfAscent / 1000.0;
    m_dPdfDescent  = pDescriptor->GetDictionary().GetKeyAsReal( "Descent", 0.0 );
    m_dDescent     = m_dPdfDescent / 1000.0;
    m_dLineSpacing = m_dAscent + m_dDescent;
    
    // Try to fine some sensible values
    m_dUnderlineThickness = 1.0;
    m_dUnderlinePosition  = 0.0;
    m_dStrikeOutThickness = m_dUnderlinePosition;
    m_dStrikeOutPosition  = m_dAscent / 2.0;

    m_bSymbol = false; // TODO
}
Пример #5
0
void PDFAnalyzer::inspectExtGStateObj(PdfObject* extGStateObj, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, PDFGraphicState & currGS)
{
	PdfObject* bmObj = extGStateObj->GetIndirectKey("BM");
	if (bmObj && bmObj->IsName())
	{
		currGS.blendModes.clear();
		currGS.blendModes.append(bmObj->GetName().GetEscapedName().c_str());
		if (!(bmObj->GetName() == "Normal" || bmObj->GetName() == "Compatible"))
			hasTransparency = true;
	}
	else if (bmObj && bmObj->IsArray())
	{
		PdfArray arr = bmObj->GetArray();
		currGS.blendModes.clear();
		for(int i=0; i<arr.GetSize(); ++i)
			currGS.blendModes.append(arr[i].GetName().GetEscapedName().c_str());
		if (arr[0].IsName() && !(arr[0].GetName() == "Normal" || arr[0].GetName() == "Compatible"))
			hasTransparency = true;
	}
	PdfObject* caObj = extGStateObj->GetIndirectKey("ca");
	if (caObj && (caObj->IsReal() || caObj->IsNumber()))
	{
		currGS.fillAlphaConstant = caObj->GetReal();
		if (caObj->GetReal() < 1)
			hasTransparency = true;
	}
	PdfObject* cAObj = extGStateObj->GetIndirectKey("CA");
	if (cAObj && (cAObj->IsReal() || cAObj->IsNumber()))
	{
		if (cAObj->GetReal() < 1)
		hasTransparency = true;
	}
	PdfObject* sMaskObj = extGStateObj->GetIndirectKey("SMask");
	if (sMaskObj && !(sMaskObj->IsName() && sMaskObj->GetName() == "None"))
		hasTransparency = true;
	PdfObject* fontObj = extGStateObj->GetIndirectKey("Font");
	if (fontObj && fontObj->IsArray())
	{
		PdfArray arr = fontObj->GetArray();
		if (arr[0].IsReference())
		{
			PdfReference ref = arr[0].GetReference();
			PdfObject* fontObject = m_doc->GetObjects().GetObject(ref);
			if (fontObject)
			{
				PDFFont font = getFontInfo(fontObject);
				usedFonts.append(font);
				currGS.font.first = font;
				currGS.font.second = arr[1].GetReal();
			}

		}
	}
	PdfObject* lwObj = extGStateObj->GetIndirectKey("LW");
	if (lwObj)
		currGS.lineWidth = lwObj->GetReal();
	PdfObject* lcObj = extGStateObj->GetIndirectKey("LC");
	if (lcObj)
		currGS.lineCap = lcObj->GetNumber();
	PdfObject* ljObj = extGStateObj->GetIndirectKey("LJ");
	if (ljObj)
		currGS.lineJoin = ljObj->GetNumber();
	PdfObject* mlObj = extGStateObj->GetIndirectKey("ML");
	if (mlObj)
		currGS.miterLimit = mlObj->GetReal();
	PdfObject* dObj = extGStateObj->GetIndirectKey("D");
	if (dObj)
	{
		PdfArray dashArr = dObj->GetArray()[0];
		currGS.dashPattern.first.clear();
		for (int i=0; i<dashArr.GetSize(); ++i)
			currGS.dashPattern.first.append(dashArr[i].GetNumber());
		currGS.dashPattern.second = dObj->GetArray()[1].GetNumber();
	}
}
Пример #6
0
void TextExtractor::ExtractText( PdfMemDocument* pDocument, PdfPage* pPage ) 
{
    const char*      pszToken = NULL;
    PdfVariant       var;
    EPdfContentsType eType;

    PdfContentsTokenizer tokenizer( pPage );

    double dCurPosX     = 0.0;
    double dCurPosY     = 0.0;
    double dCurFontSize = 0.0;
    bool   bTextBlock   = false;
    PdfFont* pCurFont   = NULL;

    std::stack<PdfVariant> stack;

    while( tokenizer.ReadNext( eType, pszToken, var ) )
    {
        if( eType == ePdfContentsType_Keyword )
        {
            // support 'l' and 'm' tokens
            if( strcmp( pszToken, "l" ) == 0 || 
                strcmp( pszToken, "m" ) == 0 )
            {
                dCurPosX = stack.top().GetReal();
                stack.pop();
                dCurPosY = stack.top().GetReal();
                stack.pop();
            }
            else if( strcmp( pszToken, "BT" ) == 0 ) 
            {
                bTextBlock   = true;     
                // BT does not reset font
                // dCurFontSize = 0.0;
                // pCurFont     = NULL;
            }
            else if( strcmp( pszToken, "ET" ) == 0 ) 
            {
                if( !bTextBlock ) 
                    fprintf( stderr, "WARNING: Found ET without BT!\n" );
            }

            if( bTextBlock ) 
            {
                if( strcmp( pszToken, "Tf" ) == 0 ) 
                {
                    dCurFontSize = stack.top().GetReal();
                    stack.pop();
                    PdfName fontName = stack.top().GetName();
                    PdfObject* pFont = pPage->GetFromResources( PdfName("Font"), fontName );
                    if( !pFont ) 
                    {
                        PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidHandle, "Cannot create font!" );
                    }

                    pCurFont = pDocument->GetFont( pFont );
                    if( !pCurFont ) 
                    {
                        fprintf( stderr, "WARNING: Unable to create font for object %i %i R\n",
                                 pFont->Reference().ObjectNumber(),
                                 pFont->Reference().GenerationNumber() );
                    }
                }
                else if( strcmp( pszToken, "Tj" ) == 0 ||
                         strcmp( pszToken, "'" ) == 0 ) 
                {
                    AddTextElement( dCurPosX, dCurPosY, pCurFont, stack.top().GetString() );
                    stack.pop();
                }
                else if( strcmp( pszToken, "\"" ) == 0 )
                {
                    AddTextElement( dCurPosX, dCurPosY, pCurFont, stack.top().GetString() );
                    stack.pop();
                    stack.pop(); // remove char spacing from stack
                    stack.pop(); // remove word spacing from stack
                }
                else if( strcmp( pszToken, "TJ" ) == 0 ) 
                {
                    PdfArray array = stack.top().GetArray();
                    stack.pop();
                    
                    for( int i=0; i<static_cast<int>(array.GetSize()); i++ ) 
                    {
                        if( array[i].IsString() )
                            AddTextElement( dCurPosX, dCurPosY, pCurFont, array[i].GetString() );
                    }
                }
            }
        }
        else if ( eType == ePdfContentsType_Variant )
        {
            stack.push( var );
        }
        else
        {
            // Impossible; type must be keyword or variant
            PODOFO_RAISE_ERROR( ePdfError_InternalLogic );
        }
    }
}