void linkLayer::loadFromDoc(PoDoFo::PdfMemDocument* doc) { qDebug() << "linkLayer: Loading named destinations ..."; try { PoDoFo::PdfNamesTree* pNames = doc->GetNamesTree( PoDoFo::ePdfDontCreateObject ); if( ! pNames ) return; PoDoFo::PdfDictionary destsDict; pNames->ToDictionary( PoDoFo::PdfName("Dests"), destsDict ); PoDoFo::TKeyMap keyMap = destsDict.GetKeys(); QString tName; PoDoFo::PdfDestination *dest; PoDoFo::PdfObject *obj; for(PoDoFo::TKeyMap::const_iterator it = keyMap.begin(); it != keyMap.end(); ++it ) { try { tName = QString::fromUtf8( it->first.GetName().c_str() ); qDebug() << "Processing "<< tName; obj = pdfUtil::resolveRefs( doc, it->second ); if ( obj->IsArray() ) dest = new PoDoFo::PdfDestination( obj ); else if ( obj->IsDictionary() ) { obj->GetDictionary().GetKey("D")->SetOwner( &doc->GetObjects() ); dest = new PoDoFo::PdfDestination( obj->GetDictionary().GetKey("D") ); } else { qDebug() << "Element is neither an array, nor a dictionary:"<< obj->GetDataTypeString(); continue; } addTarget( tName, dest ); } catch ( PoDoFo::PdfError e ) { qDebug() << "linkLayer: Error adding named destination ("<<tName<<"):"<<e.what(); } } } catch ( PoDoFo::PdfError e ) { qDebug() << "linkLayer: Error processing names tree:" << e.what(); }; qDebug() << "linkLayer: Done loading named destinations."; }
bool parsepdf(const char* filename) { PoDoFo::PdfVecObjects objects; PoDoFo::PdfParser parser(&objects, filename); PoDoFo::TIVecObjects it = objects.begin(); bool result = false; do { PoDoFo::PdfObject *obj = (*it); #if (PODOFO_VERSION_MAJOR > 0) || (PODOFO_VERSION_MINOR > 8) || (PODOFO_VERSION_PATCH >= 3) if (obj->HasStream() && (obj->GetObjectLength(PoDoFo::ePdfWriteMode_Compact) > 10000)) { #else if (obj->HasStream() && (obj->GetObjectLength() > 10000)) { #endif PoDoFo::PdfStream *stream = obj->GetStream(); char *buffer; PoDoFo::pdf_long bufferLen; stream->GetFilteredCopy(&buffer, &bufferLen); //std::cerr << "Buffer length : " << bufferLen << std::endl; if (bufferLen > 1000) result = parseStream(buffer, bufferLen); free(buffer); } it++; } while (it != objects.end()); return result; } int main(int argc, char** argv) { if (argc != 2) { std::cerr << "ERROR: wrong number of argument" << std::endl; return -1; } else { parsepdf(argv[1]); } }
int main (int argc, char *argv[]) { using namespace PoDoFo; PoDoFo::PdfMemDocument *doc = NULL; int result = 0; try { PoDoFo::PdfError::EnableDebug(false); if (argc != 2 && argc != 4) { cout << "Syntax" << endl; cout << " " << argv[0] << " <pdf file> - display the XMP in a file (use \"-\" to specify stdin)" << endl; cout << "or" << endl; cout << " " << argv[0] << " <src pdf file> <xmp file> <new pdf file> - create a new PDF with the XMP in" << endl; return EXIT_FAILURE; } if ( string("-") == argv[1] ) { cin >> std::noskipws; #ifdef _MSC_VER _setmode(_fileno(stdin), _O_BINARY); // @TODO: MSVC specific binary setmode -- not sure if other platforms need it cin.sync_with_stdio(); #endif istream_iterator<char> it(std::cin); istream_iterator<char> end; string buffer(it, end); doc = new PoDoFo::PdfMemDocument(); doc->Load( buffer.c_str(), (long)buffer.size() ); } else { doc = new PoDoFo::PdfMemDocument(argv[1]); } if (argc == 2) { PoDoFo::PdfObject *metadata; if ((metadata = doc->GetMetadata()) == NULL) cout << "No metadata" << endl; else { PoDoFo::PdfStream *str = metadata->GetStream(); if (str != NULL) { char *buf; PoDoFo::pdf_long len; str->GetFilteredCopy(&buf, &len); for (PoDoFo::pdf_long i = 0; i < len; ++i) printf("%c", buf[i]); printf("\n"); fflush(stdout); free(buf); } } } if (argc == 4) { char *xmpBuf; FILE *fp; if ((fp = fopen(argv[2], "rb")) == NULL) cout << "Cannot open " << argv[2] << endl; else { if( fseek( fp, 0, SEEK_END ) == -1 ) { fclose( fp ); PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDeviceOperation, "Failed to seek to the end of the file" ); } long xmpLen = ftell(fp); if( xmpLen == -1 ) { fclose( fp ); PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDeviceOperation, "Failed to read size of the file" ); } xmpBuf = new char[xmpLen]; if( !xmpBuf ) { fclose( fp ); PODOFO_RAISE_ERROR( ePdfError_OutOfMemory ); } if( fseek( fp, 0, SEEK_SET ) == -1 ) { delete [] xmpBuf; fclose( fp ); PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDeviceOperation, "Failed to seek to the beginning of the file" ); } if( static_cast<long>( fread( xmpBuf, 1, xmpLen, fp ) ) != xmpLen ) { delete [] xmpBuf; fclose( fp ); PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDeviceOperation, "Failed to read whole file into the memory" ); } PoDoFo::PdfObject *metadata; if ((metadata = doc->GetMetadata()) != NULL) metadata->GetStream()->Set(xmpBuf, xmpLen, PoDoFo::TVecFilters()); else { metadata = doc->GetObjects().CreateObject("Metadata"); metadata->GetDictionary().AddKey(PoDoFo::PdfName("Subtype"), PoDoFo::PdfName("XML")); metadata->GetStream()->Set(xmpBuf, xmpLen, PoDoFo::TVecFilters()); doc->GetCatalog()->GetDictionary().AddKey(PoDoFo::PdfName("Metadata"), metadata->Reference()); } delete[] xmpBuf; doc->Write(argv[3]); } } } catch( PdfError & e ) {
std::list<FormImage> extract(const std::string& filename, Form& form) { std::list<FormImage> images; ColorSpace colorspace; PoDoFo::pdf_int64 componentbits; PoDoFo::PdfObject* obj = nullptr; PoDoFo::PdfObject* color = nullptr; PoDoFo::PdfObject* component = nullptr; PoDoFo::PdfMemDocument document(filename.c_str()); PoDoFo::TCIVecObjects it = document.GetObjects().begin(); while (it != document.GetObjects().end()) { if ((*it)->IsDictionary()) { PoDoFo::PdfObject* objType = (*it)->GetDictionary().GetKey(PoDoFo::PdfName::KeyType); PoDoFo::PdfObject* objSubType = (*it)->GetDictionary().GetKey(PoDoFo::PdfName::KeySubtype); if ((objType && objType->IsName() && objType->GetName().GetName() == "XObject") || (objSubType && objSubType->IsName() && objSubType->GetName().GetName() == "Image" )) { // Colorspace color = (*it)->GetDictionary().GetKey(PoDoFo::PdfName("ColorSpace")); colorspace = ColorSpace::Unknown; if (color && color->IsReference()) color = document.GetObjects().GetObject(color->GetReference()); // Follow ICCBased reference to the Alternate colorspace if (color && color->IsArray() && color->GetArray().GetSize() == 2 && // First item is ICCBased color->GetArray()[0].IsName() && color->GetArray()[0].GetName().GetName() == "ICCBased" && // Second item is reference to color space color->GetArray()[1].IsReference()) { color = document.GetObjects().GetObject(color->GetArray()[1].GetReference()); if (color) color = color->GetDictionary().GetKey(PoDoFo::PdfName("Alternate")); } // Check if either RGB or Grayscale (either the specified // colorspace or the alternate if using an ICCBased colorspace) if (color && color->IsName()) { std::string col = color->GetName().GetName(); if (col == "DeviceRGB") colorspace = ColorSpace::RGB; else if (col == "DeviceGray") colorspace = ColorSpace::Gray; } // Bits per component component = (*it)->GetDictionary().GetKey(PoDoFo::PdfName("BitsPerComponent")); componentbits = 8; if (component && component->IsNumber()) componentbits = component->GetNumber(); // Stream obj = (*it)->GetDictionary().GetKey(PoDoFo::PdfName::KeyFilter); // JPEG and Flate are in another array if (obj && obj->IsArray() && obj->GetArray().GetSize() == 1 && ((obj->GetArray()[0].IsName() && obj->GetArray()[0].GetName().GetName() == "DCTDecode") || (obj->GetArray()[0].IsName() && obj->GetArray()[0].GetName().GetName() == "FlateDecode"))) obj = &obj->GetArray()[0]; Pixels pixels; if (obj && obj->IsName()) { std::string name = obj->GetName().GetName(); if (name == "DCTDecode") pixels = readPDFImage(*it, PixelType::JPG, colorspace, componentbits, filename, form); else if (name == "CCITTFaxDecode") pixels = readPDFImage(*it, PixelType::TIF, colorspace, componentbits, filename, form); // PNM is the default //else if (name == "FlateDecode") // pixels = readPDFImage(*it, PixelType::PNM, colorspace, componentbits, filename, form); else pixels = readPDFImage(*it, PixelType::PNM, colorspace, componentbits, filename, form); } else { pixels = readPDFImage(*it, PixelType::PNM, colorspace, componentbits, filename, form); } document.FreeObjectMemory(*it); if (pixels.isLoaded()) images.push_back(FormImage(form, std::move(pixels))); } } ++it; } return images; }
int main (int argc, char *argv[]) { PoDoFo::PdfError::EnableDebug(false); if (argc != 2 && argc != 4) { cout << "Syntax" << endl; cout << " " << argv[0] << " <pdf file> - display the XMP in a file" << endl; cout << "or" << endl; cout << " " << argv[0] << " <src pdf file> <xmp file> <new pdf file> - create a new PDF with the XMP in" << endl; return EXIT_FAILURE; } PoDoFo::PdfMemDocument *doc = new PoDoFo::PdfMemDocument(argv[1]); if (argc == 2) { PoDoFo::PdfObject *metadata; if ((metadata = doc->GetMetadata()) == NULL) cout << "No metadata" << endl; else { PoDoFo::PdfStream *str = metadata->GetStream(); if (str != NULL) { char *buf; PoDoFo::pdf_long len; str->GetFilteredCopy(&buf, &len); for (PoDoFo::pdf_long i = 0; i < len; ++i) printf("%c", buf[i]); printf("\n"); fflush(stdout); free(buf); } } } if (argc == 4) { char *xmpBuf; FILE *fp; if ((fp = fopen(argv[2], "rb")) == NULL) cout << "Cannot open " << argv[2] << endl; else { fseek(fp, 0, SEEK_END); long xmpLen = ftell(fp); xmpBuf = new char[xmpLen]; fseek(fp, 0, SEEK_SET); fread(xmpBuf, 1, xmpLen, fp); fclose(fp); PoDoFo::PdfObject *metadata; if ((metadata = doc->GetMetadata()) != NULL) metadata->GetStream()->Set(xmpBuf, xmpLen, PoDoFo::TVecFilters()); else { metadata = doc->GetObjects().CreateObject("Metadata"); metadata->GetDictionary().AddKey(PoDoFo::PdfName("Subtype"), PoDoFo::PdfName("XML")); metadata->GetStream()->Set(xmpBuf, xmpLen, PoDoFo::TVecFilters()); doc->GetCatalog()->GetDictionary().AddKey(PoDoFo::PdfName("Metadata"), metadata->Reference()); } delete[] xmpBuf; doc->Write(argv[3]); } } delete doc; return EXIT_SUCCESS; }
std::ostream& ossimGeoPdfInfo::print(std::ostream& out) const { static const char MODULE[] = "ossimGeoPdfInfo::print"; int count = m_PdfMemDocument->GetPageCount(); PoDoFo::PdfString author = m_PdfMemDocument->GetInfo()->GetAuthor(); PoDoFo::PdfString creator = m_PdfMemDocument->GetInfo()->GetCreator(); PoDoFo::PdfString title = m_PdfMemDocument->GetInfo()->GetTitle(); PoDoFo::PdfString subject = m_PdfMemDocument->GetInfo()->GetSubject(); PoDoFo::PdfString keywords = m_PdfMemDocument->GetInfo()->GetKeywords(); PoDoFo::PdfString producer = m_PdfMemDocument->GetInfo()->GetProducer(); ossimString createDate; ossimString modifyDate; PoDoFo::PdfObject* obj = m_PdfMemDocument->GetInfo()->GetObject(); if (obj->IsDictionary()) { PoDoFo::PdfDictionary pdfDictionary = obj->GetDictionary(); PoDoFo::TKeyMap keyMap = pdfDictionary.GetKeys(); PoDoFo::TKeyMap::iterator it = keyMap.begin(); while (it != keyMap.end()) { ossimString refName = ossimString(it->first.GetName()); PoDoFo::PdfObject* refObj = it->second; std::string objStr; refObj->ToString(objStr); if (refName == "CreationDate") { createDate = ossimString(objStr); createDate = createDate.substitute("(", "", true).trim(); createDate = createDate.substitute(")", "", true).trim(); createDate = createDate.substitute("D:", "", true).trim(); } else if (refName == "ModDate") { modifyDate = ossimString(objStr); modifyDate = modifyDate.substitute("(", "", true).trim(); modifyDate = modifyDate.substitute(")", "", true).trim(); modifyDate = modifyDate.substitute("D:", "", true).trim(); } it++; } } try { m_PdfMemDocument->FreeObjectMemory(obj); } catch (...) { } ossimString authorStr = author.GetString(); ossimString creatorStr = creator.GetString(); ossimString titleStr = title.GetString(); ossimString producerStr = producer.GetString(); ossimString subjectStr = subject.GetString(); ossimString keywordsStr = keywords.GetString(); ossimString prefix = "geopdf."; out << prefix << "pagecount: " << ossimString::toString(count).c_str() << "\n"; if (!authorStr.empty()) { out << prefix << "author: " << authorStr.c_str() << "\n"; } if (!creatorStr.empty()) { out << prefix << "creator: " << creatorStr.c_str() << "\n"; } if (!titleStr.empty()) { out << prefix << "title: " << titleStr.c_str() << "\n"; } if (!producerStr.empty()) { out << prefix << "producer: " << producerStr.c_str() << "\n"; } if (!subjectStr.empty()) { out << prefix << "subject: " << subjectStr.c_str() << "\n"; } if (!keywordsStr.empty()) { out << prefix << "keywords: " << keywordsStr.c_str() << "\n"; } if (!createDate.empty()) { out << prefix << "creationdate: " << createDate.c_str() << "\n"; } if (!modifyDate.empty()) { out << prefix << "modificationdate: " << modifyDate.c_str() << "\n"; } if (traceDebug()) { ossimNotify(ossimNotifyLevel_DEBUG) << MODULE << " DEBUG Entered...\n"; } return out; }
QStringList FMPDFFontExtractor::list() { if(!document) return mfont.keys(); if(cachedList) return mfont.keys(); else cachedList = true; PoDoFo::TCIVecObjects objIt( document->GetObjects().begin() ); PoDoFo::PdfName pType("Type"); PoDoFo::PdfName pSubtype("Subtype"); PoDoFo::PdfName pFont("Font"); PoDoFo::PdfName pType1("Type1"); PoDoFo::PdfName pTrueType("TrueType"); PoDoFo::PdfName pFontDescriptor( "FontDescriptor" ); PoDoFo::PdfName pFontFile( "FontFile" ); PoDoFo::PdfName pFontFile3( "FontFile3" ); PoDoFo::PdfName pFontName( "FontName" ); while( objIt != document->GetObjects().end() ) { PoDoFo::PdfObject* obj(*objIt); if ( obj->IsDictionary() ) { if(obj->GetIndirectKey(pType)) { PoDoFo::PdfName type( obj->GetIndirectKey(pType)->GetName() ); if(type == pFont) { if(obj->GetIndirectKey( pSubtype )) { PoDoFo::PdfName subtype ( obj->GetIndirectKey( pSubtype )->GetName() ); if ((subtype == pType1) || (subtype == pTrueType)) { PoDoFo::PdfObject * fontDescriptor ( obj->GetIndirectKey ( pFontDescriptor ) ); if (fontDescriptor ) { bool hasFile(false); PoDoFo::PdfObject * fontFile ( fontDescriptor->GetIndirectKey ( pFontFile ) ); if ( !fontFile ) { fontFile = fontDescriptor->GetIndirectKey(pFontFile3) ; if ( !fontFile ) qWarning ( "Font not embedded not supported yet" ); else hasFile = true; } else hasFile = true; if(hasFile) { PoDoFo::PdfName fontName(fontDescriptor->GetIndirectKey(pFontName)->GetName()); if(1) { QString n(QString::fromStdString( fontName.GetName() )); mfont[n] = fontFile; // we know naming it pfb is wrong mType[n] = (subtype == pType1) ? "pfb" : "ttf"; } else qDebug()<<"Error: no /FontName key"; } } } } } } } objIt++; } return mfont.keys(); }