PdfAnnotation* PdfPage::GetAnnotation( int index ) { PdfAnnotation* pAnnot; PdfReference ref; PdfObject* pObj = this->GetAnnotationsArray( false ); if( !(pObj && pObj->IsArray()) ) { PODOFO_RAISE_ERROR( ePdfError_InvalidDataType ); } if( index < 0 && static_cast<unsigned int>(index) >= pObj->GetArray().size() ) { PODOFO_RAISE_ERROR( ePdfError_ValueOutOfRange ); } ref = pObj->GetArray()[index].GetReference(); pAnnot = m_mapAnnotations[ref]; if( !pAnnot ) { pObj = m_pObject->GetOwner()->GetObject( ref ); if( !pObj ) { PdfError::DebugMessage( "Error looking up object %i %i R\n", ref.ObjectNumber(), ref.GenerationNumber() ); PODOFO_RAISE_ERROR( ePdfError_NoObject ); } pAnnot = new PdfAnnotation( pObj, this ); m_mapAnnotations[ref] = pAnnot; } return pAnnot; }
PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas ) : PdfTokenizer() { PdfObject* pContents = pCanvas->GetContents(); if( pContents && pContents->IsArray() ) { PdfArray& a = pContents->GetArray(); for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it ) { if ( !(*it).IsReference() ) { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" ); } m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) ); } } else if ( pContents && pContents->HasStream() ) { m_lstContents.push_back( pContents ); } else { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" ); } if( m_lstContents.size() ) { SetCurrentContentsStream( m_lstContents.front() ); m_lstContents.pop_front(); } }
PDFFont PDFAnalyzer::getFontInfo(PdfObject* fontObj) { PDFFont currFont; PdfObject* subtype = fontObj->GetIndirectKey("Subtype"); if (subtype && subtype->IsName()) { PdfObject* fontDesc = fontObj->GetIndirectKey("FontDescriptor"); if (subtype->GetName() == "Type1") currFont.fontType = F_Type1; else if (subtype->GetName() == "MMType1") currFont.fontType = F_MMType1; else if (subtype->GetName() == "TrueType") currFont.fontType = F_TrueType; else if (subtype->GetName() == "Type3") { currFont.fontType = F_Type3; currFont.isEmbedded = true; fontDesc = NULL; } else if (subtype->GetName() == "Type0") { PdfObject* descendantFonts = fontObj->GetIndirectKey("DescendantFonts"); if (descendantFonts && descendantFonts->IsArray()) { PdfObject descendantFont = descendantFonts->GetArray()[0]; descendantFont.SetOwner(descendantFonts->GetOwner()); PdfObject* subtypeDescFont = descendantFont.GetIndirectKey("Subtype"); fontDesc = descendantFont.MustGetIndirectKey("FontDescriptor"); if (subtypeDescFont && subtypeDescFont->IsName()) { if (subtypeDescFont->GetName() == "CIDFontType0") currFont.fontType = F_CIDFontType0; else if (subtypeDescFont->GetName() == "CIDFontType2") currFont.fontType = F_CIDFontType2; } } } if (fontDesc) { PdfObject* fontFile = fontDesc->GetIndirectKey("FontFile"); PdfObject* fontFile2 = fontDesc->GetIndirectKey("FontFile2"); PdfObject* fontFile3 = fontDesc->GetIndirectKey("FontFile3"); if (fontFile && fontFile->HasStream()) currFont.isEmbedded = true; if (fontFile2 && fontFile2->HasStream()) currFont.isEmbedded = true; if (fontFile3 && fontFile3->HasStream()) { currFont.isEmbedded = true; PdfObject* ff3Subtype = fontFile3->GetIndirectKey("Subtype"); if (ff3Subtype && ff3Subtype->IsName() && ff3Subtype->GetName() == "OpenType") currFont.isOpenType = true; } } } return currFont; }
const PdfRect PdfPage::GetPageBox( const char* inBox ) const { PdfRect pageBox; PdfObject* pObj; // Take advantage of inherited values - walking up the tree if necessary pObj = GetInheritedKeyFromObject( inBox, m_pObject ); // assign the value of the box from the array if ( pObj && pObj->IsArray() ) pageBox.FromArray( pObj->GetArray() ); return pageBox; }
void PdfPage::DeleteAnnotation( const PdfReference & ref ) { PdfAnnotation* pAnnot; PdfArray::iterator it; PdfObject* pObj = this->GetAnnotationsArray( false ); bool bFound = false; // delete the annotation from the array if( !(pObj && pObj->IsArray()) ) { PODOFO_RAISE_ERROR( ePdfError_InvalidDataType ); } it = pObj->GetArray().begin(); while( it != pObj->GetArray().end() ) { if( (*it).GetReference() == ref ) { pObj->GetArray().erase( it ); bFound = true; break; } ++it; } // if no such annotation was found // throw an error instead of deleting // another object with this reference if( !bFound ) { PODOFO_RAISE_ERROR( ePdfError_NoObject ); } // delete any cached PdfAnnotations pAnnot = m_mapAnnotations[ref]; if( pAnnot ) { delete pAnnot; m_mapAnnotations.erase( ref ); } // delete the PdfObject in the file delete m_pObject->GetOwner()->RemoveObject( ref ); }
void PdfPage::DeleteAnnotation( int index ) { PdfReference ref; PdfObject* pObj = this->GetAnnotationsArray( false ); if( !(pObj && pObj->IsArray()) ) { PODOFO_RAISE_ERROR( ePdfError_InvalidDataType ); } if( index < 0 && static_cast<unsigned int>(index) >= pObj->GetArray().size() ) { PODOFO_RAISE_ERROR( ePdfError_ValueOutOfRange ); } ref = pObj->GetArray()[index].GetReference(); this->DeleteAnnotation( ref ); }
PdfObject* PdfPage::GetAnnotationsArray( bool bCreate ) const { PdfObject* pObj; // check for it in the object itself if ( m_pObject->GetDictionary().HasKey( "Annots" ) ) { pObj = m_pObject->GetIndirectKey( "Annots" ); if( pObj && pObj->IsArray() ) return pObj; } else if( bCreate ) { PdfArray array; const_cast<PdfPage*>(this)->m_pObject->GetDictionary().AddKey( "Annots", array ); return m_pObject->GetDictionary().GetKey( "Annots" ); } return NULL; }
void PdfDestination::Init( PdfObject* pObject, PdfDocument* pDocument ) { bool bValueExpected = false; PdfObject* pValue = NULL; if ( pObject->GetDataType() == ePdfDataType_Array ) { m_array = pObject->GetArray(); m_pObject = pObject; } else if( pObject->GetDataType() == ePdfDataType_String ) { PdfNamesTree* pNames = pDocument->GetNamesTree( ePdfDontCreateObject ); if( !pNames ) { PODOFO_RAISE_ERROR( ePdfError_NoObject ); } pValue = pNames->GetValue( "Dests", pObject->GetString() ); bValueExpected = true; } else if( pObject->GetDataType() == ePdfDataType_Name ) { PdfMemDocument* pMemDoc = dynamic_cast<PdfMemDocument*>(pDocument); if ( !pMemDoc ) { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidHandle, "For reading from a document, only use PdfMemDocument." ); } PdfObject* pCatalog = pMemDoc->GetCatalog(); if ( !pCatalog ) { PODOFO_RAISE_ERROR( ePdfError_NoObject ); } PdfObject* pDests = pCatalog->GetIndirectKey( PdfName( "Dests" ) ); if( !pDests ) { // The error code has been chosen for its distinguishability. PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidKey, "No PDF-1.1-compatible destination dictionary found." ); } pValue = pDests->GetIndirectKey( pObject->GetName() ); bValueExpected = true; } else { PdfError::LogMessage( eLogSeverity_Error, "Unsupported object given to" " PdfDestination::Init of type %s", pObject->GetDataTypeString() ); m_array = PdfArray(); // needed to prevent crash on method calls // needed for GetObject() use w/o checking its return value for NULL m_pObject = pDocument->GetObjects()->CreateObject( m_array ); } if ( bValueExpected ) { if( !pValue ) { PODOFO_RAISE_ERROR( ePdfError_InvalidName ); } if( pValue->IsArray() ) m_array = pValue->GetArray(); else if( pValue->IsDictionary() ) m_array = pValue->GetDictionary().GetKey( "D" )->GetArray(); m_pObject = pValue; } }
void PDFProcessor::start () { int nNum = 0; try { PdfObject* pObj = NULL; // open document qDebug() << "Opening file: " << filename.toStdString().c_str(); PdfMemDocument document( filename.toStdString().c_str() ); // m_pszOutputDirectory = const_cast<char*>(pszOutput); TCIVecObjects it = document.GetObjects().begin(); while( it != document.GetObjects().end() ) { if( (*it)->IsDictionary() ) { PdfObject* pObjType = (*it)->GetDictionary().GetKey( PdfName::KeyType ); PdfObject* pObjSubType = (*it)->GetDictionary().GetKey( PdfName::KeySubtype ); if( ( pObjType && pObjType->IsName() && ( pObjType->GetName().GetName() == "XObject" ) ) || ( pObjSubType && pObjSubType->IsName() && ( pObjSubType->GetName().GetName() == "Image" ) ) ) { pObj = (*it)->GetDictionary().GetKey( PdfName::KeyFilter ); if( pObj && pObj->IsArray() && pObj->GetArray().GetSize() == 1 && pObj->GetArray()[0].IsName() && (pObj->GetArray()[0].GetName().GetName() == "DCTDecode") ) pObj = &pObj->GetArray()[0]; std::string filterName = pObj->GetName().GetName(); bool processed = 0; if( pObj && pObj->IsName() && ( filterName == "DCTDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } if( pObj && pObj->IsName() && ( filterName == "JPXDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } if( pObj && pObj->IsName() && ( filterName == "FlateDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } // else we found something strange, we do not care about it for now. if (processed == false) { qDebug() << "Unknown image type found:" << QString::fromStdString(filterName) << "\n"; nNum++; } document.FreeObjectMemory( *it ); } } ++it; } } catch( PdfError & e ) { qDebug() << "Error: An error ocurred during processing the pdf file:" << e.GetError(); e.PrintErrorMsg(); return;// e.GetError(); } // TODO: statistics of no of images etc // nNum = extractor.GetNumImagesExtracted(); qDebug() << "Extracted " << nNum << " images successfully from the PDF file.\n"; }
void PdfMemStream::FlateCompress() { PdfObject* pObj; PdfVariant vFilter( PdfName("FlateDecode" ) ); PdfVariant vFilterList; PdfArray tFilters; PdfArray::const_iterator tciFilters; if( !m_lLength ) return; // ePdfError_ErrOk // TODO: Handle DecodeParms if( m_pParent->GetDictionary().HasKey( "Filter" ) ) { pObj = m_pParent->GetIndirectKey( "Filter" ); if( pObj->IsName() ) { if( pObj->GetName() != "DCTDecode" && pObj->GetName() != "FlateDecode" ) { tFilters.push_back( vFilter ); tFilters.push_back( *pObj ); } } else if( pObj->IsArray() ) { tciFilters = pObj->GetArray().begin(); while( tciFilters != pObj->GetArray().end() ) { if( (*tciFilters).IsName() ) { // do not compress DCTDecoded are already FlateDecoded streams again if( (*tciFilters).GetName() == "DCTDecode" || (*tciFilters).GetName() == "FlateDecode" ) { return; } } ++tciFilters; } tFilters.push_back( vFilter ); tciFilters = pObj->GetArray().begin(); while( tciFilters != pObj->GetArray().end() ) { tFilters.push_back( (*tciFilters) ); ++tciFilters; } } else return; vFilterList = PdfVariant( tFilters ); m_pParent->GetDictionary().AddKey( "Filter", vFilterList ); FlateCompressStreamData(); // throws an exception on error } else { m_pParent->GetDictionary().AddKey( "Filter", PdfName( "FlateDecode" ) ); FlateCompressStreamData(); } }
void PDFAnalyzer::inspectExtGStateObj(PdfObject* extGStateObj, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, PDFGraphicState & currGS) { PdfObject* bmObj = extGStateObj->GetIndirectKey("BM"); if (bmObj && bmObj->IsName()) { currGS.blendModes.clear(); currGS.blendModes.append(bmObj->GetName().GetEscapedName().c_str()); if (!(bmObj->GetName() == "Normal" || bmObj->GetName() == "Compatible")) hasTransparency = true; } else if (bmObj && bmObj->IsArray()) { PdfArray arr = bmObj->GetArray(); currGS.blendModes.clear(); for(int i=0; i<arr.GetSize(); ++i) currGS.blendModes.append(arr[i].GetName().GetEscapedName().c_str()); if (arr[0].IsName() && !(arr[0].GetName() == "Normal" || arr[0].GetName() == "Compatible")) hasTransparency = true; } PdfObject* caObj = extGStateObj->GetIndirectKey("ca"); if (caObj && (caObj->IsReal() || caObj->IsNumber())) { currGS.fillAlphaConstant = caObj->GetReal(); if (caObj->GetReal() < 1) hasTransparency = true; } PdfObject* cAObj = extGStateObj->GetIndirectKey("CA"); if (cAObj && (cAObj->IsReal() || cAObj->IsNumber())) { if (cAObj->GetReal() < 1) hasTransparency = true; } PdfObject* sMaskObj = extGStateObj->GetIndirectKey("SMask"); if (sMaskObj && !(sMaskObj->IsName() && sMaskObj->GetName() == "None")) hasTransparency = true; PdfObject* fontObj = extGStateObj->GetIndirectKey("Font"); if (fontObj && fontObj->IsArray()) { PdfArray arr = fontObj->GetArray(); if (arr[0].IsReference()) { PdfReference ref = arr[0].GetReference(); PdfObject* fontObject = m_doc->GetObjects().GetObject(ref); if (fontObject) { PDFFont font = getFontInfo(fontObject); usedFonts.append(font); currGS.font.first = font; currGS.font.second = arr[1].GetReal(); } } } PdfObject* lwObj = extGStateObj->GetIndirectKey("LW"); if (lwObj) currGS.lineWidth = lwObj->GetReal(); PdfObject* lcObj = extGStateObj->GetIndirectKey("LC"); if (lcObj) currGS.lineCap = lcObj->GetNumber(); PdfObject* ljObj = extGStateObj->GetIndirectKey("LJ"); if (ljObj) currGS.lineJoin = ljObj->GetNumber(); PdfObject* mlObj = extGStateObj->GetIndirectKey("ML"); if (mlObj) currGS.miterLimit = mlObj->GetReal(); PdfObject* dObj = extGStateObj->GetIndirectKey("D"); if (dObj) { PdfArray dashArr = dObj->GetArray()[0]; currGS.dashPattern.first.clear(); for (int i=0; i<dashArr.GetSize(); ++i) currGS.dashPattern.first.append(dashArr[i].GetNumber()); currGS.dashPattern.second = dObj->GetArray()[1].GetNumber(); } }
PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas ) : PdfTokenizer(), m_readingInlineImgData(false) { if( !pCanvas ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } PdfObject* pContents = pCanvas->GetContents(); if( pContents && pContents->IsArray() ) { PdfArray& a = pContents->GetArray(); for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it ) { if ( !(*it).IsReference() ) { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" ); } if ( !pContents->GetOwner()->GetObject( (*it).GetReference() ) ) { // some damaged PDFs may have dangling references PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array NULL reference" ); } m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) ); } } else if ( pContents && pContents->HasStream() ) { m_lstContents.push_back( pContents ); } else if ( pContents && pContents->IsDictionary() ) { m_lstContents.push_back( pContents ); PdfError::LogMessage(eLogSeverity_Information, "PdfContentsTokenizer: found canvas-dictionary without stream => empty page"); // OC 18.09.2010 BugFix: Found an empty page in a PDF document: // 103 0 obj // << // /Type /Page // /MediaBox [ 0 0 595 842 ] // /Parent 3 0 R // /Resources << // /ProcSet [ /PDF ] // >> // /Rotate 0 // >> // endobj } else { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" ); } if( m_lstContents.size() ) { SetCurrentContentsStream( m_lstContents.front() ); m_lstContents.pop_front(); } }