void PdfPagesTree::InsertPagesIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, int nIndex, const std::vector<PdfObject*>& vecPages ) { if( !pParent || !vecPages.size() ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } // 1. Add the reference of the new page to the kids array of pParent // 2. Increase count of every node in lstParents (which also includes pParent) // 3. Add Parent key to the page // 1. Add reference const PdfArray oldKids = pParent->GetDictionary().GetKey( PdfName("Kids") )->GetArray(); PdfArray newKids; newKids.reserve( oldKids.GetSize() + vecPages.size() ); bool bIsPushedIn = false; int i=0; for (PdfArray::const_iterator it=oldKids.begin(); it!=oldKids.end(); ++it, ++i ) { if ( !bIsPushedIn && (nIndex < i) ) // Pushing before { for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages) { newKids.push_back( (*itPages)->Reference() ); // Push all new kids at once } bIsPushedIn = true; } newKids.push_back( *it ); // Push in the old kids } // If new kids are still not pushed in then they may be appending to the end if ( !bIsPushedIn && ( (nIndex + 1) == static_cast<int>(oldKids.size())) ) { for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages) { newKids.push_back( (*itPages)->Reference() ); // Push all new kids at once } bIsPushedIn = true; } pParent->GetDictionary().AddKey( PdfName("Kids"), newKids ); // 2. increase count for ( PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin(); itParents != rlstParents.rend(); ++itParents ) { this->ChangePagesCount( *itParents, vecPages.size() ); } // 3. add parent key to each of the pages for (std::vector<PdfObject*>::const_iterator itPages=vecPages.begin(); itPages!=vecPages.end(); ++itPages) { (*itPages)->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference() ); } }
void PdfPagesTree::InsertPageIntoNode( PdfObject* pParent, const PdfObjectList & rlstParents, int nIndex, PdfObject* pPage ) { if( !pParent || !pPage ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } // 1. Add the reference of the new page to the kids array of pParent // 2. Increase count of every node in lstParents (which also includes pParent) // 3. Add Parent key to the page // 1. Add reference const PdfArray oldKids = pParent->GetDictionary().GetKey( PdfName("Kids") )->GetArray(); PdfArray::const_iterator it = oldKids.begin(); PdfArray newKids; newKids.reserve( oldKids.GetSize() + 1 ); if( nIndex < 0 ) { newKids.push_back( pPage->Reference() ); } int i = 0; while( it != oldKids.end() ) { newKids.push_back( *it ); if( i == nIndex ) newKids.push_back( pPage->Reference() ); ++i; ++it; } /* PdfVariant var2( newKids ); std::string str2; var2.ToString(str2); printf("newKids= %s\n", str2.c_str() ); */ pParent->GetDictionary().AddKey( PdfName("Kids"), newKids ); // 2. increase count PdfObjectList::const_reverse_iterator itParents = rlstParents.rbegin(); while( itParents != rlstParents.rend() ) { this->ChangePagesCount( *itParents, 1 ); ++itParents; } // 3. add parent key to the page pPage->GetDictionary().AddKey( PdfName("Parent"), pParent->Reference() ); }
void PdfSampledFunction::Init( const PdfArray & rDomain, const PdfArray & rRange, const PdfFunction::Sample & rlstSamples ) { PdfArray Size; for( unsigned i = 0; i < rDomain.GetSize() / 2; i++ ) Size.push_back( PdfObject( (long long) (rDomain.GetSize()) / 2L ) ); this->GetObject()->GetDictionary().AddKey( PdfName("Domain"), rDomain ); this->GetObject()->GetDictionary().AddKey( PdfName("Range"), rRange ); this->GetObject()->GetDictionary().AddKey( PdfName("Size"), Size ); this->GetObject()->GetDictionary().AddKey( PdfName("Order"), PdfObject( 1LL ) ); this->GetObject()->GetDictionary().AddKey( PdfName("BitsPerSample"), PdfObject( 8LL ) ); this->GetObject()->GetStream()->BeginAppend(); PdfFunction::Sample::const_iterator it = rlstSamples.begin(); while( it != rlstSamples.end() ) { this->GetObject()->GetStream()->Append( & ( *it ), 1 ); ++it; } this->GetObject()->GetStream()->EndAppend(); }
PdfFontMetricsObject::PdfFontMetricsObject( PdfObject* pFont, PdfObject* pDescriptor, const PdfEncoding* const pEncoding ) : PdfFontMetrics( ePdfFontType_Unknown, "", NULL ), m_pEncoding( pEncoding ), m_dDefWidth(0.0) { if( !pDescriptor ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } const PdfName & rSubType = pFont->GetDictionary().GetKey( PdfName::KeySubtype )->GetName(); // OC 15.08.2010 BugFix: /FirstChar /LastChar /Widths are in the Font dictionary and not in the FontDescriptor if ( rSubType == PdfName("Type1") || rSubType == PdfName("TrueType") ) { m_sName = pDescriptor->GetIndirectKey( "FontName" )->GetName(); m_bbox = pDescriptor->GetIndirectKey( "FontBBox" )->GetArray(); m_nFirst = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "FirstChar", 0L )); m_nLast = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "LastChar", 0L )); // OC 15.08.2010 BugFix: GetIndirectKey() instead of GetDictionary().GetKey() and "Widths" instead of "Width" PdfObject* widths = pFont->GetIndirectKey( "Widths" ); if( widths != NULL ) { m_width = widths->GetArray(); m_missingWidth = NULL; } else { widths = pDescriptor->GetDictionary().GetKey( "MissingWidth" ); if( widths == NULL ) { PODOFO_RAISE_ERROR_INFO( ePdfError_NoObject, "Font object defines neither Widths, nor MissingWidth values!" ); m_missingWidth = widths; } } } else if ( rSubType == PdfName("CIDFontType0") || rSubType == PdfName("CIDFontType2") ) { PdfObject *pObj = pDescriptor->GetIndirectKey( "FontName" ); if (pObj) { m_sName = pObj->GetName(); } pObj = pDescriptor->GetIndirectKey( "FontBBox" ); if (pObj) { m_bbox = pObj->GetArray(); } m_nFirst = 0; m_nLast = 0; m_dDefWidth = static_cast<double>(pFont->GetDictionary().GetKeyAsLong( "DW", 1000L )); PdfVariant default_width(m_dDefWidth); PdfObject * pw = pFont->GetIndirectKey( "W" ); for (int i = m_nFirst; i <= m_nLast; ++i) { m_width.push_back(default_width); } if (pw) { PdfArray w = pw->GetArray(); int pos = 0; while (pos < static_cast<int>(w.GetSize())) { int start = static_cast<int>(w[pos++].GetNumber()); PODOFO_ASSERT (start >= 0); if (w[pos].IsArray()) { PdfArray widths = w[pos++].GetArray(); int length = start + static_cast<int>(widths.GetSize()); PODOFO_ASSERT (length >= start); if (length > static_cast<int>(m_width.GetSize())) { m_width.resize(length, default_width); } for (int i = 0; i < static_cast<int>(widths.GetSize()); ++i) { m_width[start + i] = widths[i]; } } else { int end = static_cast<int>(w[pos++].GetNumber()); int length = start + end; PODOFO_ASSERT (length >= start); if (length > static_cast<int>(m_width.GetSize())) { m_width.resize(length, default_width); } pdf_int64 width = w[pos++].GetNumber(); for (int i = start; i <= end; ++i) m_width[i] = PdfVariant(width); } } } m_nLast = m_width.GetSize() - 1; } else { PODOFO_RAISE_ERROR_INFO( ePdfError_UnsupportedFontFormat, rSubType.GetEscapedName().c_str() ); } m_nWeight = static_cast<unsigned int>(pDescriptor->GetDictionary().GetKeyAsLong( "FontWeight", 400L )); m_nItalicAngle = static_cast<int>(pDescriptor->GetDictionary().GetKeyAsLong( "ItalicAngle", 0L )); m_dPdfAscent = pDescriptor->GetDictionary().GetKeyAsReal( "Ascent", 0.0 ); m_dAscent = m_dPdfAscent / 1000.0; m_dPdfDescent = pDescriptor->GetDictionary().GetKeyAsReal( "Descent", 0.0 ); m_dDescent = m_dPdfDescent / 1000.0; m_dLineSpacing = m_dAscent + m_dDescent; // Try to fine some sensible values m_dUnderlineThickness = 1.0; m_dUnderlinePosition = 0.0; m_dStrikeOutThickness = m_dUnderlinePosition; m_dStrikeOutPosition = m_dAscent / 2.0; m_bSymbol = false; // TODO }
void PDFAnalyzer::inspectExtGStateObj(PdfObject* extGStateObj, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, PDFGraphicState & currGS) { PdfObject* bmObj = extGStateObj->GetIndirectKey("BM"); if (bmObj && bmObj->IsName()) { currGS.blendModes.clear(); currGS.blendModes.append(bmObj->GetName().GetEscapedName().c_str()); if (!(bmObj->GetName() == "Normal" || bmObj->GetName() == "Compatible")) hasTransparency = true; } else if (bmObj && bmObj->IsArray()) { PdfArray arr = bmObj->GetArray(); currGS.blendModes.clear(); for(int i=0; i<arr.GetSize(); ++i) currGS.blendModes.append(arr[i].GetName().GetEscapedName().c_str()); if (arr[0].IsName() && !(arr[0].GetName() == "Normal" || arr[0].GetName() == "Compatible")) hasTransparency = true; } PdfObject* caObj = extGStateObj->GetIndirectKey("ca"); if (caObj && (caObj->IsReal() || caObj->IsNumber())) { currGS.fillAlphaConstant = caObj->GetReal(); if (caObj->GetReal() < 1) hasTransparency = true; } PdfObject* cAObj = extGStateObj->GetIndirectKey("CA"); if (cAObj && (cAObj->IsReal() || cAObj->IsNumber())) { if (cAObj->GetReal() < 1) hasTransparency = true; } PdfObject* sMaskObj = extGStateObj->GetIndirectKey("SMask"); if (sMaskObj && !(sMaskObj->IsName() && sMaskObj->GetName() == "None")) hasTransparency = true; PdfObject* fontObj = extGStateObj->GetIndirectKey("Font"); if (fontObj && fontObj->IsArray()) { PdfArray arr = fontObj->GetArray(); if (arr[0].IsReference()) { PdfReference ref = arr[0].GetReference(); PdfObject* fontObject = m_doc->GetObjects().GetObject(ref); if (fontObject) { PDFFont font = getFontInfo(fontObject); usedFonts.append(font); currGS.font.first = font; currGS.font.second = arr[1].GetReal(); } } } PdfObject* lwObj = extGStateObj->GetIndirectKey("LW"); if (lwObj) currGS.lineWidth = lwObj->GetReal(); PdfObject* lcObj = extGStateObj->GetIndirectKey("LC"); if (lcObj) currGS.lineCap = lcObj->GetNumber(); PdfObject* ljObj = extGStateObj->GetIndirectKey("LJ"); if (ljObj) currGS.lineJoin = ljObj->GetNumber(); PdfObject* mlObj = extGStateObj->GetIndirectKey("ML"); if (mlObj) currGS.miterLimit = mlObj->GetReal(); PdfObject* dObj = extGStateObj->GetIndirectKey("D"); if (dObj) { PdfArray dashArr = dObj->GetArray()[0]; currGS.dashPattern.first.clear(); for (int i=0; i<dashArr.GetSize(); ++i) currGS.dashPattern.first.append(dashArr[i].GetNumber()); currGS.dashPattern.second = dObj->GetArray()[1].GetNumber(); } }
void TextExtractor::ExtractText( PdfMemDocument* pDocument, PdfPage* pPage ) { const char* pszToken = NULL; PdfVariant var; EPdfContentsType eType; PdfContentsTokenizer tokenizer( pPage ); double dCurPosX = 0.0; double dCurPosY = 0.0; double dCurFontSize = 0.0; bool bTextBlock = false; PdfFont* pCurFont = NULL; std::stack<PdfVariant> stack; while( tokenizer.ReadNext( eType, pszToken, var ) ) { if( eType == ePdfContentsType_Keyword ) { // support 'l' and 'm' tokens if( strcmp( pszToken, "l" ) == 0 || strcmp( pszToken, "m" ) == 0 ) { dCurPosX = stack.top().GetReal(); stack.pop(); dCurPosY = stack.top().GetReal(); stack.pop(); } else if( strcmp( pszToken, "BT" ) == 0 ) { bTextBlock = true; // BT does not reset font // dCurFontSize = 0.0; // pCurFont = NULL; } else if( strcmp( pszToken, "ET" ) == 0 ) { if( !bTextBlock ) fprintf( stderr, "WARNING: Found ET without BT!\n" ); } if( bTextBlock ) { if( strcmp( pszToken, "Tf" ) == 0 ) { dCurFontSize = stack.top().GetReal(); stack.pop(); PdfName fontName = stack.top().GetName(); PdfObject* pFont = pPage->GetFromResources( PdfName("Font"), fontName ); if( !pFont ) { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidHandle, "Cannot create font!" ); } pCurFont = pDocument->GetFont( pFont ); if( !pCurFont ) { fprintf( stderr, "WARNING: Unable to create font for object %i %i R\n", pFont->Reference().ObjectNumber(), pFont->Reference().GenerationNumber() ); } } else if( strcmp( pszToken, "Tj" ) == 0 || strcmp( pszToken, "'" ) == 0 ) { AddTextElement( dCurPosX, dCurPosY, pCurFont, stack.top().GetString() ); stack.pop(); } else if( strcmp( pszToken, "\"" ) == 0 ) { AddTextElement( dCurPosX, dCurPosY, pCurFont, stack.top().GetString() ); stack.pop(); stack.pop(); // remove char spacing from stack stack.pop(); // remove word spacing from stack } else if( strcmp( pszToken, "TJ" ) == 0 ) { PdfArray array = stack.top().GetArray(); stack.pop(); for( int i=0; i<static_cast<int>(array.GetSize()); i++ ) { if( array[i].IsString() ) AddTextElement( dCurPosX, dCurPosY, pCurFont, array[i].GetString() ); } } } } else if ( eType == ePdfContentsType_Variant ) { stack.push( var ); } else { // Impossible; type must be keyword or variant PODOFO_RAISE_ERROR( ePdfError_InternalLogic ); } } }