PDFColorSpace PDFAnalyzer::getCSType(PdfObject* cs) { try { // colorspace is either a name or an array if (cs && cs->IsName()) { PdfName csName = cs->GetName(); if (csName == "DeviceGray") return CS_DeviceGray; else if (csName == "DeviceRGB") return CS_DeviceRGB; else if (csName == "DeviceCMYK") return CS_DeviceCMYK; } else if (cs && cs->IsArray()) { PdfArray csArr = cs->GetArray(); PdfObject csTypePdfName = csArr[0]; if (csTypePdfName.IsName()) { PdfName csTypeName = csTypePdfName.GetName(); if (csTypeName == "ICCBased") return CS_ICCBased; else if (csTypeName == "CalGray") return CS_CalGray; else if (csTypeName == "CalRGB") return CS_CalRGB; else if (csTypeName == "Lab") return CS_Lab; else if (csTypeName == "Indexed") { PdfObject base = cs->GetArray()[1]; PdfObject* pBase = &base; if (base.IsReference()) { pBase = cs->GetOwner()->GetObject(base.GetReference()); } pBase->SetOwner(cs->GetOwner()); return getCSType(pBase); } else if (csTypeName == "Separation") return CS_Separation; else if (csTypeName == "DeviceN") return CS_DeviceN; else if (csTypeName == "Pattern") return CS_Pattern; } } } catch (PdfError & e) { qDebug() << "Error in identifying the color type"; e.PrintErrorMsg(); return CS_Unknown; } return CS_Unknown; }
PDFFont PDFAnalyzer::getFontInfo(PdfObject* fontObj) { PDFFont currFont; PdfObject* subtype = fontObj->GetIndirectKey("Subtype"); if (subtype && subtype->IsName()) { PdfObject* fontDesc = fontObj->GetIndirectKey("FontDescriptor"); if (subtype->GetName() == "Type1") currFont.fontType = F_Type1; else if (subtype->GetName() == "MMType1") currFont.fontType = F_MMType1; else if (subtype->GetName() == "TrueType") currFont.fontType = F_TrueType; else if (subtype->GetName() == "Type3") { currFont.fontType = F_Type3; currFont.isEmbedded = true; fontDesc = NULL; } else if (subtype->GetName() == "Type0") { PdfObject* descendantFonts = fontObj->GetIndirectKey("DescendantFonts"); if (descendantFonts && descendantFonts->IsArray()) { PdfObject descendantFont = descendantFonts->GetArray()[0]; descendantFont.SetOwner(descendantFonts->GetOwner()); PdfObject* subtypeDescFont = descendantFont.GetIndirectKey("Subtype"); fontDesc = descendantFont.MustGetIndirectKey("FontDescriptor"); if (subtypeDescFont && subtypeDescFont->IsName()) { if (subtypeDescFont->GetName() == "CIDFontType0") currFont.fontType = F_CIDFontType0; else if (subtypeDescFont->GetName() == "CIDFontType2") currFont.fontType = F_CIDFontType2; } } } if (fontDesc) { PdfObject* fontFile = fontDesc->GetIndirectKey("FontFile"); PdfObject* fontFile2 = fontDesc->GetIndirectKey("FontFile2"); PdfObject* fontFile3 = fontDesc->GetIndirectKey("FontFile3"); if (fontFile && fontFile->HasStream()) currFont.isEmbedded = true; if (fontFile2 && fontFile2->HasStream()) currFont.isEmbedded = true; if (fontFile3 && fontFile3->HasStream()) { currFont.isEmbedded = true; PdfObject* ff3Subtype = fontFile3->GetIndirectKey("Subtype"); if (ff3Subtype && ff3Subtype->IsName() && ff3Subtype->GetName() == "OpenType") currFont.isOpenType = true; } } } return currFont; }
TVecFilters PdfFilterFactory::CreateFilterList( const PdfObject* pObject ) { TVecFilters filters; const PdfObject* pObj = NULL; if( pObject->IsDictionary() && pObject->GetDictionary().HasKey( "Filter" ) ) pObj = pObject->GetDictionary().GetKey( "Filter" ); else if( pObject->IsArray() ) pObj = pObject; else if( pObject->IsName() ) pObj = pObject; if (!pObj) // Object had no /Filter key . Return a null filter list. return filters; if( pObj->IsName() ) filters.push_back( PdfFilterFactory::FilterNameToType( pObj->GetName() ) ); else if( pObj->IsArray() ) { TCIVariantList it = pObj->GetArray().begin(); while( it != pObj->GetArray().end() ) { if ( (*it).IsName() ) { filters.push_back( PdfFilterFactory::FilterNameToType( (*it).GetName() ) ); } else if ( (*it).IsReference() ) { PdfObject* pFilter = pObject->GetOwner()->GetObject( (*it).GetReference() ); if( pFilter == NULL ) { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Filter array contained unexpected reference" ); } filters.push_back( PdfFilterFactory::FilterNameToType( pFilter->GetName() ) ); } else { PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Filter array contained unexpected non-name type" ); } ++it; } } return filters; }
void PDFProcessor::start () { int nNum = 0; try { PdfObject* pObj = NULL; // open document qDebug() << "Opening file: " << filename.toStdString().c_str(); PdfMemDocument document( filename.toStdString().c_str() ); // m_pszOutputDirectory = const_cast<char*>(pszOutput); TCIVecObjects it = document.GetObjects().begin(); while( it != document.GetObjects().end() ) { if( (*it)->IsDictionary() ) { PdfObject* pObjType = (*it)->GetDictionary().GetKey( PdfName::KeyType ); PdfObject* pObjSubType = (*it)->GetDictionary().GetKey( PdfName::KeySubtype ); if( ( pObjType && pObjType->IsName() && ( pObjType->GetName().GetName() == "XObject" ) ) || ( pObjSubType && pObjSubType->IsName() && ( pObjSubType->GetName().GetName() == "Image" ) ) ) { pObj = (*it)->GetDictionary().GetKey( PdfName::KeyFilter ); if( pObj && pObj->IsArray() && pObj->GetArray().GetSize() == 1 && pObj->GetArray()[0].IsName() && (pObj->GetArray()[0].GetName().GetName() == "DCTDecode") ) pObj = &pObj->GetArray()[0]; std::string filterName = pObj->GetName().GetName(); bool processed = 0; if( pObj && pObj->IsName() && ( filterName == "DCTDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } if( pObj && pObj->IsName() && ( filterName == "JPXDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } if( pObj && pObj->IsName() && ( filterName == "FlateDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } // else we found something strange, we do not care about it for now. if (processed == false) { qDebug() << "Unknown image type found:" << QString::fromStdString(filterName) << "\n"; nNum++; } document.FreeObjectMemory( *it ); } } ++it; } } catch( PdfError & e ) { qDebug() << "Error: An error ocurred during processing the pdf file:" << e.GetError(); e.PrintErrorMsg(); return;// e.GetError(); } // TODO: statistics of no of images etc // nNum = extractor.GetNumImagesExtracted(); qDebug() << "Extracted " << nNum << " images successfully from the PDF file.\n"; }
PdfFontMetricsObject::PdfFontMetricsObject( PdfObject* pFont, PdfObject* pDescriptor, const PdfEncoding* const pEncoding ) : PdfFontMetrics( ePdfFontType_Unknown, "", NULL ), m_pEncoding( pEncoding ), m_dDefWidth(0.0) { if( !pDescriptor ) { PODOFO_RAISE_ERROR( ePdfError_InvalidHandle ); } const PdfName & rSubType = pFont->GetDictionary().GetKey( PdfName::KeySubtype )->GetName(); // OC 15.08.2010 BugFix: /FirstChar /LastChar /Widths are in the Font dictionary and not in the FontDescriptor if ( rSubType == PdfName("Type1") || rSubType == PdfName("TrueType") ) { m_sName = pDescriptor->GetIndirectKey( "FontName" )->GetName(); m_bbox = pDescriptor->GetIndirectKey( "FontBBox" )->GetArray(); m_nFirst = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "FirstChar", 0L )); m_nLast = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "LastChar", 0L )); // OC 15.08.2010 BugFix: GetIndirectKey() instead of GetDictionary().GetKey() and "Widths" instead of "Width" PdfObject* widths = pFont->GetIndirectKey( "Widths" ); if( widths != NULL ) { m_width = widths->GetArray(); m_missingWidth = NULL; } else { widths = pDescriptor->GetDictionary().GetKey( "MissingWidth" ); if( widths == NULL ) { PODOFO_RAISE_ERROR_INFO( ePdfError_NoObject, "Font object defines neither Widths, nor MissingWidth values!" ); m_missingWidth = widths; } } } else if ( rSubType == PdfName("CIDFontType0") || rSubType == PdfName("CIDFontType2") ) { PdfObject *pObj = pDescriptor->GetIndirectKey( "FontName" ); if (pObj) { m_sName = pObj->GetName(); } pObj = pDescriptor->GetIndirectKey( "FontBBox" ); if (pObj) { m_bbox = pObj->GetArray(); } m_nFirst = 0; m_nLast = 0; m_dDefWidth = static_cast<double>(pFont->GetDictionary().GetKeyAsLong( "DW", 1000L )); PdfVariant default_width(m_dDefWidth); PdfObject * pw = pFont->GetIndirectKey( "W" ); for (int i = m_nFirst; i <= m_nLast; ++i) { m_width.push_back(default_width); } if (pw) { PdfArray w = pw->GetArray(); int pos = 0; while (pos < static_cast<int>(w.GetSize())) { int start = static_cast<int>(w[pos++].GetNumber()); PODOFO_ASSERT (start >= 0); if (w[pos].IsArray()) { PdfArray widths = w[pos++].GetArray(); int length = start + static_cast<int>(widths.GetSize()); PODOFO_ASSERT (length >= start); if (length > static_cast<int>(m_width.GetSize())) { m_width.resize(length, default_width); } for (int i = 0; i < static_cast<int>(widths.GetSize()); ++i) { m_width[start + i] = widths[i]; } } else { int end = static_cast<int>(w[pos++].GetNumber()); int length = start + end; PODOFO_ASSERT (length >= start); if (length > static_cast<int>(m_width.GetSize())) { m_width.resize(length, default_width); } pdf_int64 width = w[pos++].GetNumber(); for (int i = start; i <= end; ++i) m_width[i] = PdfVariant(width); } } } m_nLast = m_width.GetSize() - 1; } else { PODOFO_RAISE_ERROR_INFO( ePdfError_UnsupportedFontFormat, rSubType.GetEscapedName().c_str() ); } m_nWeight = static_cast<unsigned int>(pDescriptor->GetDictionary().GetKeyAsLong( "FontWeight", 400L )); m_nItalicAngle = static_cast<int>(pDescriptor->GetDictionary().GetKeyAsLong( "ItalicAngle", 0L )); m_dPdfAscent = pDescriptor->GetDictionary().GetKeyAsReal( "Ascent", 0.0 ); m_dAscent = m_dPdfAscent / 1000.0; m_dPdfDescent = pDescriptor->GetDictionary().GetKeyAsReal( "Descent", 0.0 ); m_dDescent = m_dPdfDescent / 1000.0; m_dLineSpacing = m_dAscent + m_dDescent; // Try to fine some sensible values m_dUnderlineThickness = 1.0; m_dUnderlinePosition = 0.0; m_dStrikeOutThickness = m_dUnderlinePosition; m_dStrikeOutPosition = m_dAscent / 2.0; m_bSymbol = false; // TODO }
PdfFont* PdfFontFactory::CreateFont( FT_Library*, PdfObject* pObject ) { PdfFontMetrics* pMetrics = NULL; PdfFont* pFont = NULL; PdfObject* pDescriptor = NULL; PdfObject* pEncoding = NULL; if( pObject->GetDictionary().GetKey( PdfName::KeyType )->GetName() != PdfName("Font") ) { PODOFO_RAISE_ERROR( ePdfError_InvalidDataType ); } const PdfName & rSubType = pObject->GetDictionary().GetKey( PdfName::KeySubtype )->GetName(); if( rSubType == PdfName("Type0") ) { // The PDF reference states that DescendantFonts must be an array, // some applications (e.g. MS Word) put the array into an indirect object though. const PdfArray & descendant = pObject->GetIndirectKey( "DescendantFonts" )->GetArray(); PdfObject* pFontObject = pObject->GetOwner()->GetObject( descendant[0].GetReference() ); pDescriptor = pFontObject->GetIndirectKey( "FontDescriptor" ); pEncoding = pObject->GetIndirectKey( "Encoding" ); if ( pEncoding && pDescriptor ) // OC 18.08.2010: Avoid sigsegv { const PdfEncoding* const pPdfEncoding = PdfEncodingObjectFactory::CreateEncoding( pEncoding ); // OC 15.08.2010 BugFix: Parameter pFontObject added: TODO: untested pMetrics = new PdfFontMetricsObject( pFontObject, pDescriptor, pPdfEncoding ); pFont = new PdfFontCID( pMetrics, pPdfEncoding, pObject, false ); } } else if( rSubType == PdfName("Type1") ) { // TODO: Old documents do not have a FontDescriptor for // the 14 standard fonts. This suggestions is // deprecated now, but give us problems with old documents. pDescriptor = pObject->GetIndirectKey( "FontDescriptor" ); pEncoding = pObject->GetIndirectKey( "Encoding" ); // OC 13.08.2010: Handle missing FontDescriptor for the 14 standard fonts: if( !pDescriptor ) { // Check if its a PdfFontType1Base14 PdfObject* pBaseFont = NULL; pBaseFont = pObject->GetIndirectKey( "BaseFont" ); const char* pszBaseFontName = pBaseFont->GetName().GetName().c_str(); PdfFontMetricsBase14* pMetrics = PODOFO_Base14FontDef_FindBuiltinData(pszBaseFontName); if ( pMetrics != NULL ) { // pEncoding may be undefined, found a valid pdf with // 20 0 obj // << // /Type /Font // /BaseFont /ZapfDingbats // /Subtype /Type1 // >> // endobj // If pEncoding is null then // use StandardEncoding for Courier, Times, Helvetica font families // and special encodings for Symbol and ZapfDingbats const PdfEncoding* pPdfEncoding = NULL; if ( pEncoding!= NULL ) pPdfEncoding = PdfEncodingObjectFactory::CreateEncoding( pEncoding ); else if ( !pMetrics->IsSymbol() ) pPdfEncoding = PdfEncodingFactory::GlobalStandardEncodingInstance(); else if ( strcmp(pszBaseFontName, "Symbol") == 0 ) pPdfEncoding = PdfEncodingFactory::GlobalSymbolEncodingInstance(); else if ( strcmp(pszBaseFontName, "ZapfDingbats") == 0 ) pPdfEncoding = PdfEncodingFactory::GlobalZapfDingbatsEncodingInstance(); return new PdfFontType1Base14(pMetrics, pPdfEncoding, pObject); } } const PdfEncoding* pPdfEncoding = NULL; if ( pEncoding != NULL ) pPdfEncoding = PdfEncodingObjectFactory::CreateEncoding( pEncoding ); else if ( pDescriptor ) { // OC 18.08.2010 TODO: Encoding has to be taken from the font's built-in encoding // Its extremely complicated to interpret the type1 font programs // so i try to determine if its a symbolic font by reading the FontDescriptor Flags // Flags & 4 --> Symbolic, Flags & 32 --> Nonsymbolic pdf_int32 lFlags = static_cast<pdf_int32>(pDescriptor->GetDictionary().GetKeyAsLong( "Flags", 0L )); if ( lFlags & 32 ) // Nonsymbolic, otherwise pEncoding remains NULL pPdfEncoding = PdfEncodingFactory::GlobalStandardEncodingInstance(); } if ( pPdfEncoding && pDescriptor ) // OC 18.08.2010: Avoid sigsegv { // OC 15.08.2010 BugFix: Parameter pObject added: pMetrics = new PdfFontMetricsObject( pObject, pDescriptor, pPdfEncoding ); pFont = new PdfFontType1( pMetrics, pPdfEncoding, pObject ); } } else if( rSubType == PdfName("TrueType") ) { pDescriptor = pObject->GetIndirectKey( "FontDescriptor" ); pEncoding = pObject->GetIndirectKey( "Encoding" ); if ( pEncoding && pDescriptor ) // OC 18.08.2010: Avoid sigsegv { const PdfEncoding* const pPdfEncoding = PdfEncodingObjectFactory::CreateEncoding( pEncoding ); // OC 15.08.2010 BugFix: Parameter pObject added: pMetrics = new PdfFontMetricsObject( pObject, pDescriptor, pPdfEncoding ); pFont = new PdfFontTrueType( pMetrics, pPdfEncoding, pObject ); } } return pFont; }
void PdfMemStream::FlateCompress() { PdfObject* pObj; PdfVariant vFilter( PdfName("FlateDecode" ) ); PdfVariant vFilterList; PdfArray tFilters; PdfArray::const_iterator tciFilters; if( !m_lLength ) return; // ePdfError_ErrOk // TODO: Handle DecodeParms if( m_pParent->GetDictionary().HasKey( "Filter" ) ) { pObj = m_pParent->GetIndirectKey( "Filter" ); if( pObj->IsName() ) { if( pObj->GetName() != "DCTDecode" && pObj->GetName() != "FlateDecode" ) { tFilters.push_back( vFilter ); tFilters.push_back( *pObj ); } } else if( pObj->IsArray() ) { tciFilters = pObj->GetArray().begin(); while( tciFilters != pObj->GetArray().end() ) { if( (*tciFilters).IsName() ) { // do not compress DCTDecoded are already FlateDecoded streams again if( (*tciFilters).GetName() == "DCTDecode" || (*tciFilters).GetName() == "FlateDecode" ) { return; } } ++tciFilters; } tFilters.push_back( vFilter ); tciFilters = pObj->GetArray().begin(); while( tciFilters != pObj->GetArray().end() ) { tFilters.push_back( (*tciFilters) ); ++tciFilters; } } else return; vFilterList = PdfVariant( tFilters ); m_pParent->GetDictionary().AddKey( "Filter", vFilterList ); FlateCompressStreamData(); // throws an exception on error } else { m_pParent->GetDictionary().AddKey( "Filter", PdfName( "FlateDecode" ) ); FlateCompressStreamData(); } }
void PDFAnalyzer::inspectExtGStateObj(PdfObject* extGStateObj, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, PDFGraphicState & currGS) { PdfObject* bmObj = extGStateObj->GetIndirectKey("BM"); if (bmObj && bmObj->IsName()) { currGS.blendModes.clear(); currGS.blendModes.append(bmObj->GetName().GetEscapedName().c_str()); if (!(bmObj->GetName() == "Normal" || bmObj->GetName() == "Compatible")) hasTransparency = true; } else if (bmObj && bmObj->IsArray()) { PdfArray arr = bmObj->GetArray(); currGS.blendModes.clear(); for(int i=0; i<arr.GetSize(); ++i) currGS.blendModes.append(arr[i].GetName().GetEscapedName().c_str()); if (arr[0].IsName() && !(arr[0].GetName() == "Normal" || arr[0].GetName() == "Compatible")) hasTransparency = true; } PdfObject* caObj = extGStateObj->GetIndirectKey("ca"); if (caObj && (caObj->IsReal() || caObj->IsNumber())) { currGS.fillAlphaConstant = caObj->GetReal(); if (caObj->GetReal() < 1) hasTransparency = true; } PdfObject* cAObj = extGStateObj->GetIndirectKey("CA"); if (cAObj && (cAObj->IsReal() || cAObj->IsNumber())) { if (cAObj->GetReal() < 1) hasTransparency = true; } PdfObject* sMaskObj = extGStateObj->GetIndirectKey("SMask"); if (sMaskObj && !(sMaskObj->IsName() && sMaskObj->GetName() == "None")) hasTransparency = true; PdfObject* fontObj = extGStateObj->GetIndirectKey("Font"); if (fontObj && fontObj->IsArray()) { PdfArray arr = fontObj->GetArray(); if (arr[0].IsReference()) { PdfReference ref = arr[0].GetReference(); PdfObject* fontObject = m_doc->GetObjects().GetObject(ref); if (fontObject) { PDFFont font = getFontInfo(fontObject); usedFonts.append(font); currGS.font.first = font; currGS.font.second = arr[1].GetReal(); } } } PdfObject* lwObj = extGStateObj->GetIndirectKey("LW"); if (lwObj) currGS.lineWidth = lwObj->GetReal(); PdfObject* lcObj = extGStateObj->GetIndirectKey("LC"); if (lcObj) currGS.lineCap = lcObj->GetNumber(); PdfObject* ljObj = extGStateObj->GetIndirectKey("LJ"); if (ljObj) currGS.lineJoin = ljObj->GetNumber(); PdfObject* mlObj = extGStateObj->GetIndirectKey("ML"); if (mlObj) currGS.miterLimit = mlObj->GetReal(); PdfObject* dObj = extGStateObj->GetIndirectKey("D"); if (dObj) { PdfArray dashArr = dObj->GetArray()[0]; currGS.dashPattern.first.clear(); for (int i=0; i<dashArr.GetSize(); ++i) currGS.dashPattern.first.append(dashArr[i].GetNumber()); currGS.dashPattern.second = dObj->GetArray()[1].GetNumber(); } }
bool PDFAnalyzer::inspectCanvas(PdfCanvas* canvas, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, QList<PDFImage> & imgs) { // this method can be used to get used color spaces, detect transparency, and get used fonts in either PdfPage or PdfXObject PdfObject* colorSpaceRes; PdfObject* xObjects; PdfObject* transGroup; PdfObject* extGState; PdfObject* fontRes; QMap<PdfName, PDFColorSpace> processedNamedCS; QMap<PdfName, PDFFont> processedNamedFont; QList<PdfName> processedNamedXObj; QList<PdfName> processedNamedGS; try { // get hold of a PdfObject pointer of this canvas // needed for the finding resources code below to work PdfPage* page = dynamic_cast<PdfPage*>(canvas); PdfObject* canvasObject = page?(page->GetObject()):((dynamic_cast<PdfXObject*>(canvas))->GetObject()); // find a resource with ColorSpace entry PdfObject* resources = canvas->GetResources(); for (PdfObject* par = canvasObject; par && !resources; par = par->GetIndirectKey("Parent")) { resources = par->GetIndirectKey("Resources"); } colorSpaceRes = resources?resources->GetIndirectKey("ColorSpace"):NULL; xObjects = resources?resources->GetIndirectKey("XObject"):NULL; extGState = resources?resources->GetIndirectKey("ExtGState"):NULL; fontRes = resources?resources->GetIndirectKey("Font"):NULL; // getting the transparency group of this content stream (if available) transGroup = canvasObject?canvasObject->GetIndirectKey("Group"):NULL; if (transGroup) { PdfObject* subtype = transGroup->GetIndirectKey("S"); if (subtype && subtype->GetName() == "Transparency") { // having transparency group means there's transparency in the PDF hasTransparency = true; // reporting the color space used in transparency group (Section 7.5.5, PDF 1.6 Spec) PdfObject* cs = transGroup->GetIndirectKey("CS"); if (cs) { PDFColorSpace retval = getCSType(cs); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); } } } } catch (PdfError & e) { qDebug() << "Error in analyzing stream's resources."; e.PrintErrorMsg(); return false; } try { // start parsing the content stream PdfContentsTokenizer tokenizer(canvas); EPdfContentsType t; const char * kwText; PdfVariant var; bool readToken; int tokenNumber = 0; QList<PdfVariant> args; bool inlineImgDict = false; QStack<PDFGraphicState> gsStack; PDFGraphicState currGS; while ((readToken = tokenizer.ReadNext(t, kwText, var))) { ++tokenNumber; if (t == ePdfContentsType_Variant) { args.append(var); } else if (t == ePdfContentsType_Keyword) { QString kw(kwText); switch(kwNameMap.value(kw, KW_Undefined)) { case KW_q: gsStack.push(currGS); break; case KW_Q: currGS = gsStack.pop(); break; case KW_cm: { if (args.size() == 6) { double mt[6]; for (int i=0; i<6; ++i) { mt[i] = args[i].GetReal(); } QMatrix transMatrix(mt[0], mt[1], mt[2], mt[3], mt[4], mt[5]); currGS.ctm = transMatrix*currGS.ctm; } } break; case KW_w: currGS.lineWidth = args[0].GetReal(); break; case KW_J: currGS.lineCap = args[0].GetNumber(); break; case KW_j: currGS.lineJoin = args[0].GetNumber(); break; case KW_M: currGS.lineJoin = args[0].GetReal(); break; case KW_d: { currGS.dashPattern.first.clear(); PdfArray dashArr = args[0].GetArray(); for (int i=0; i<dashArr.size(); ++i) currGS.dashPattern.first.append(dashArr[i].GetNumber()); currGS.dashPattern.second = args[0].GetNumber(); } break; case KW_g: if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); currGS.fillCS = CS_DeviceGray; currGS.fillColor.clear(); currGS.fillColor.append(args[0].GetReal()); break; case KW_G: if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); currGS.strokeCS = CS_DeviceGray; currGS.strokeColor.clear(); currGS.strokeColor.append(args[0].GetReal()); break; case KW_rg: if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); currGS.fillCS = CS_DeviceRGB; currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) currGS.fillColor.append(args[i].GetReal()); break; case KW_RG: if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); currGS.strokeCS = CS_DeviceRGB; currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) currGS.strokeColor.append(args[i].GetReal()); break; case KW_k: if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); currGS.fillCS = CS_DeviceCMYK; currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) currGS.fillColor.append(args[i].GetReal()); break; case KW_K: if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); currGS.strokeCS = CS_DeviceCMYK; currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) currGS.strokeColor.append(args[i].GetReal()); break; case KW_cs: { if (args.size() == 1 && args[0].IsName()) { if (args[0].GetName() == "DeviceGray") { currGS.fillCS = CS_DeviceGray; currGS.fillColor.clear(); currGS.fillColor.append(0); if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); } else if (args[0].GetName() == "DeviceRGB") { currGS.fillCS = CS_DeviceRGB; currGS.fillColor.clear(); for (int i=0; i<3; ++i) currGS.fillColor.append(0); if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); } else if (args[0].GetName() == "DeviceCMYK") { currGS.fillCS = CS_DeviceCMYK; currGS.fillColor.clear(); for (int i=0; i<3; ++i) currGS.fillColor.append(0); currGS.fillColor.append(1); if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); } else if (args[0].GetName() == "Pattern") { currGS.fillCS = CS_Pattern; if (!usedColorSpaces.contains(CS_Pattern)) usedColorSpaces.append(CS_Pattern); } else { if (processedNamedCS.contains(args[0].GetName())) { currGS.fillCS = processedNamedCS.value(args[0].GetName()); } else { if (colorSpaceRes && colorSpaceRes->GetIndirectKey(args[0].GetName())) { PdfObject* csEntry = colorSpaceRes->GetIndirectKey(args[0].GetName()); PDFColorSpace retval = getCSType(csEntry); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); currGS.fillCS = retval; processedNamedCS.insert(args[0].GetName(), retval); } else { qDebug() << "Supplied colorspace is undefined!"; return false; } } } } else { qDebug() << "Wrong syntax in specifying color space!"; return false; } } break; case KW_CS: { if (args.size() == 1 && args[0].IsName()) { if (args[0].GetName() == "DeviceGray") { currGS.strokeCS = CS_DeviceGray; currGS.strokeColor.clear(); currGS.strokeColor.append(0); if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); } else if (args[0].GetName() == "DeviceRGB") { currGS.fillCS = CS_DeviceRGB; currGS.strokeColor.clear(); for (int i=0; i<3; ++i) currGS.strokeColor.append(0); if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); } else if (args[0].GetName() == "DeviceCMYK") { currGS.fillCS = CS_DeviceCMYK; currGS.strokeColor.clear(); for (int i=0; i<3; ++i) currGS.strokeColor.append(0); currGS.strokeColor.append(1); if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); } else if (args[0].GetName() == "Pattern") { currGS.fillCS = CS_Pattern; if (!usedColorSpaces.contains(CS_Pattern)) usedColorSpaces.append(CS_Pattern); } else { if (processedNamedCS.contains(args[0].GetName())) { currGS.strokeCS = processedNamedCS.value(args[0].GetName()); } else { if (colorSpaceRes && colorSpaceRes->GetIndirectKey(args[0].GetName())) { PdfObject* csEntry = colorSpaceRes->GetIndirectKey(args[0].GetName()); PDFColorSpace retval = getCSType(csEntry); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); currGS.strokeCS = retval; processedNamedCS.insert(args[0].GetName(), retval); } else { qDebug() << "Supplied colorspace is undefined!"; return false; } } } } else { qDebug() << "Wrong syntax in specifying color space!"; return false; } } break; case KW_sc: currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) currGS.fillColor.append(args[i].GetReal()); break; case KW_SC: currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) currGS.strokeColor.append(args[i].GetReal()); break; case KW_scn: currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) { if (args[i].IsReal() || args[i].IsNumber()) currGS.fillColor.append(args[i].GetReal()); } break; case KW_SCN: currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) { if (args[i].IsReal() || args[i].IsNumber()) currGS.strokeColor.append(args[i].GetReal()); } break; case KW_Do: // image or form XObject { if (!processedNamedXObj.contains(args[0].GetName())) { if (args.size() == 1 && args[0].IsName() && xObjects) { PdfObject* xObject = xObjects->GetIndirectKey(args[0].GetName()); PdfObject* subtypeObject = xObject?xObject->GetIndirectKey("Subtype"):NULL; if (subtypeObject && subtypeObject->IsName()) { if (subtypeObject->GetName() == "Image") { PdfObject* imgColorSpace = xObject->GetIndirectKey("ColorSpace"); if (imgColorSpace) { PDFColorSpace retval = getCSType(imgColorSpace); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); } PdfObject* sMaskObj = xObject->GetIndirectKey("SMask"); if (sMaskObj) hasTransparency = true; PDFImage img; img.imgName = args[0].GetName().GetEscapedName().c_str(); double width = xObject->GetIndirectKey("Width")->GetReal(); double height = xObject->GetIndirectKey("Height")->GetReal(); img.dpiX = qRound(width/(currGS.ctm.m11()/72)); img.dpiY = qRound(height/(currGS.ctm.m22()/72)); imgs.append(img); } else if (subtypeObject->GetName() == "Form") { PdfXObject xObj(xObject); inspectCanvas(&xObj, usedColorSpaces, hasTransparency, usedFonts, imgs); // recursive call } } else { qDebug() << "Supplied external object is undefined!"; return false; } processedNamedXObj.append(args[0].GetName()); } else { qDebug() << "Wrong syntax for Do operator or there's no XObject defined!"; return false; } } } break; case KW_BI: inlineImgDict = true; break; case KW_ID: if (inlineImgDict) { PdfName colorspace("ColorSpace"); PdfName cs("CS"); if (args.contains(colorspace) || args.contains(cs)) { int csIdx = args.contains(colorspace)?args.indexOf(colorspace):args.indexOf(cs); if (args[csIdx+1].IsName()) { PdfName csName = args[csIdx+1].GetName(); if ((csName == "G" || csName == "DeviceGray") && !usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); else if ((csName == "RGB" || csName == "DeviceRGB") && !usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); else if ((csName == "CMYK" || csName == "DeviceCMYK") && !usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); else if (!processedNamedCS.contains(csName)) { if (colorSpaceRes && colorSpaceRes->GetIndirectKey(csName)) { PdfObject* csEntry = colorSpaceRes->GetIndirectKey(csName); if (csEntry) { PDFColorSpace retval = getCSType(csEntry); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); processedNamedCS.insert(csName, retval); } } else { qDebug() << "Supplied colorspace for inline image is undefined!"; return false; } } } } PdfName height("Height"); PdfName h("H"); PdfName width("Width"); PdfName w("W"); if ((args.contains(height) || args.contains(h)) && (args.contains(width) || args.contains(w))) { int heightIdx = args.contains(height)?args.indexOf(height):args.indexOf(h); int widthIdx = args.contains(width)?args.indexOf(width):args.indexOf(w); double height = args[heightIdx+1].GetReal(); double width = args[widthIdx+1].GetReal(); PDFImage img; img.imgName = "Inline Image"; img.dpiX = qRound(width/(currGS.ctm.m11()/72)); img.dpiY = qRound(height/(currGS.ctm.m22()/72)); imgs.append(img); } inlineImgDict = false; } break; case KW_gs: { if (!processedNamedGS.contains(args[0].GetName())) { if (args.size() == 1 && args[0].IsName() && extGState) { PdfObject* extGStateObj = extGState->GetIndirectKey(args[0].GetName()); if (extGStateObj) { inspectExtGStateObj(extGStateObj, usedColorSpaces, hasTransparency, usedFonts, currGS); } else { qDebug() << "Named graphic state used with gs operator is undefined in current ExtGState"; return false; } processedNamedGS.append(args[0].GetName()); } else { qDebug() << "Wrong syntax in applying extended graphic state (gs operator) or there's no ExtGState defined!"; return false; } } } break; case KW_Tf: { if (processedNamedFont.contains(args[0].GetName())) { currGS.font.first = processedNamedFont.value(args[0].GetName()); currGS.font.second = args[1].GetReal(); } else { if (args.size() == 2 && args[0].IsName() && fontRes) { PdfObject* fontObj = fontRes->GetIndirectKey(args[0].GetName()); if (fontObj) { PDFFont retval = getFontInfo(fontObj); usedFonts.append(retval); processedNamedFont.insert(args[0].GetName(), retval); currGS.font.first = retval; currGS.font.second = args[1].GetReal(); } else { qDebug() << "The specified font cannot be found in current Resources!"; return false; } } else { qDebug() << "Wrong syntax in use of Tf operator or there's no Font defined in current Resources dictionary!"; return false; } } } break; case KW_Undefined: default: break; } args.clear(); } } } catch (PdfError & e) { qDebug() << "Error in parsing content stream"; e.PrintErrorMsg(); return false; } return true; }
void PdfParser::ReadXRefStreamContents( pdf_long lOffset, bool bReadOnlyTrailer ) { char* pBuffer; char* pStart; pdf_long lBufferLen; long long lSize = 0; PdfVariant vWArray; PdfObject* pObj; long nW[W_ARRAY_SIZE] = { 0, 0, 0 }; int i; m_device.Device()->Seek( lOffset ); PdfParserObject xrefObject( m_vecObjects, m_device, m_buffer ); // Ignore the encryption in the XREF as the XREF stream must no be encrypted (see PDF Reference 3.4.7) xrefObject.ParseFile( NULL ); if( !xrefObject.GetDictionary().HasKey( PdfName::KeyType ) ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } pObj = xrefObject.GetDictionary().GetKey( PdfName::KeyType ); if( !pObj->IsName() || ( pObj->GetName() != "XRef" ) ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } if( !m_pTrailer ) m_pTrailer = new PdfParserObject( m_vecObjects, m_device, m_buffer ); MergeTrailer( &xrefObject ); if( bReadOnlyTrailer ) return; if( !xrefObject.GetDictionary().HasKey( PdfName::KeySize ) || !xrefObject.GetDictionary().HasKey( "W" ) ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } lSize = xrefObject.GetDictionary().GetKeyAsLong( PdfName::KeySize, 0 ); vWArray = *(xrefObject.GetDictionary().GetKey( "W" )); // The pdf reference states that W is always an array with 3 entries // all of them have to be integeres if( !vWArray.IsArray() || vWArray.GetArray().size() != 3 ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } for( i=0;i<W_ARRAY_SIZE;i++ ) { if( !vWArray.GetArray()[i].IsNumber() ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } nW[i] = static_cast<long>(vWArray.GetArray()[i].GetNumber()); } std::vector<long long> vecIndeces; // get the first object number in this crossref stream. // it is not required to have an index key though. if( xrefObject.GetDictionary().HasKey( "Index" ) ) { // reuse vWArray!! vWArray = *(xrefObject.GetDictionary().GetKey( "Index" )); if( !vWArray.IsArray() ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } TCIVariantList it = vWArray.GetArray().begin(); while ( it != vWArray.GetArray().end() ) { vecIndeces.push_back( (*it).GetNumber() ); ++it; } } else { vecIndeces.push_back( 0 ); vecIndeces.push_back( lSize ); } if( vecIndeces.size() % 2 ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } if( !xrefObject.HasStreamToParse() ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } xrefObject.GetStream()->GetFilteredCopy( &pBuffer, &lBufferLen ); pStart = pBuffer; int nCurIndex = 0; while( nCurIndex < static_cast<pdf_long>(vecIndeces.size()) && pBuffer - pStart < lBufferLen ) { int nFirstObj = static_cast<int>(vecIndeces[nCurIndex]); long long nCount = vecIndeces[nCurIndex+1]; while( nCount-- && pBuffer - pStart < lBufferLen ) { ReadXRefStreamEntry( pBuffer, lBufferLen, nW, nFirstObj++ ); pBuffer += (nW[0] + nW[1] + nW[2]); } nCurIndex += 2; } free( pStart ); if( xrefObject.GetDictionary().HasKey("Prev") ) { lOffset = static_cast<pdf_long>(xrefObject.GetDictionary().GetKeyAsLong( "Prev", 0 )); ReadXRefStreamContents( lOffset, bReadOnlyTrailer ); } }