PDFFont PDFAnalyzer::getFontInfo(PdfObject* fontObj) { PDFFont currFont; PdfObject* subtype = fontObj->GetIndirectKey("Subtype"); if (subtype && subtype->IsName()) { PdfObject* fontDesc = fontObj->GetIndirectKey("FontDescriptor"); if (subtype->GetName() == "Type1") currFont.fontType = F_Type1; else if (subtype->GetName() == "MMType1") currFont.fontType = F_MMType1; else if (subtype->GetName() == "TrueType") currFont.fontType = F_TrueType; else if (subtype->GetName() == "Type3") { currFont.fontType = F_Type3; currFont.isEmbedded = true; fontDesc = NULL; } else if (subtype->GetName() == "Type0") { PdfObject* descendantFonts = fontObj->GetIndirectKey("DescendantFonts"); if (descendantFonts && descendantFonts->IsArray()) { PdfObject descendantFont = descendantFonts->GetArray()[0]; descendantFont.SetOwner(descendantFonts->GetOwner()); PdfObject* subtypeDescFont = descendantFont.GetIndirectKey("Subtype"); fontDesc = descendantFont.MustGetIndirectKey("FontDescriptor"); if (subtypeDescFont && subtypeDescFont->IsName()) { if (subtypeDescFont->GetName() == "CIDFontType0") currFont.fontType = F_CIDFontType0; else if (subtypeDescFont->GetName() == "CIDFontType2") currFont.fontType = F_CIDFontType2; } } } if (fontDesc) { PdfObject* fontFile = fontDesc->GetIndirectKey("FontFile"); PdfObject* fontFile2 = fontDesc->GetIndirectKey("FontFile2"); PdfObject* fontFile3 = fontDesc->GetIndirectKey("FontFile3"); if (fontFile && fontFile->HasStream()) currFont.isEmbedded = true; if (fontFile2 && fontFile2->HasStream()) currFont.isEmbedded = true; if (fontFile3 && fontFile3->HasStream()) { currFont.isEmbedded = true; PdfObject* ff3Subtype = fontFile3->GetIndirectKey("Subtype"); if (ff3Subtype && ff3Subtype->IsName() && ff3Subtype->GetName() == "OpenType") currFont.isOpenType = true; } } } return currFont; }
PDFColorSpace PDFAnalyzer::getCSType(PdfObject* cs) { try { // colorspace is either a name or an array if (cs && cs->IsName()) { PdfName csName = cs->GetName(); if (csName == "DeviceGray") return CS_DeviceGray; else if (csName == "DeviceRGB") return CS_DeviceRGB; else if (csName == "DeviceCMYK") return CS_DeviceCMYK; } else if (cs && cs->IsArray()) { PdfArray csArr = cs->GetArray(); PdfObject csTypePdfName = csArr[0]; if (csTypePdfName.IsName()) { PdfName csTypeName = csTypePdfName.GetName(); if (csTypeName == "ICCBased") return CS_ICCBased; else if (csTypeName == "CalGray") return CS_CalGray; else if (csTypeName == "CalRGB") return CS_CalRGB; else if (csTypeName == "Lab") return CS_Lab; else if (csTypeName == "Indexed") { PdfObject base = cs->GetArray()[1]; PdfObject* pBase = &base; if (base.IsReference()) { pBase = cs->GetOwner()->GetObject(base.GetReference()); } pBase->SetOwner(cs->GetOwner()); return getCSType(pBase); } else if (csTypeName == "Separation") return CS_Separation; else if (csTypeName == "DeviceN") return CS_DeviceN; else if (csTypeName == "Pattern") return CS_Pattern; } } } catch (PdfError & e) { qDebug() << "Error in identifying the color type"; e.PrintErrorMsg(); return CS_Unknown; } return CS_Unknown; }
void PDFProcessor::start () { int nNum = 0; try { PdfObject* pObj = NULL; // open document qDebug() << "Opening file: " << filename.toStdString().c_str(); PdfMemDocument document( filename.toStdString().c_str() ); // m_pszOutputDirectory = const_cast<char*>(pszOutput); TCIVecObjects it = document.GetObjects().begin(); while( it != document.GetObjects().end() ) { if( (*it)->IsDictionary() ) { PdfObject* pObjType = (*it)->GetDictionary().GetKey( PdfName::KeyType ); PdfObject* pObjSubType = (*it)->GetDictionary().GetKey( PdfName::KeySubtype ); if( ( pObjType && pObjType->IsName() && ( pObjType->GetName().GetName() == "XObject" ) ) || ( pObjSubType && pObjSubType->IsName() && ( pObjSubType->GetName().GetName() == "Image" ) ) ) { pObj = (*it)->GetDictionary().GetKey( PdfName::KeyFilter ); if( pObj && pObj->IsArray() && pObj->GetArray().GetSize() == 1 && pObj->GetArray()[0].IsName() && (pObj->GetArray()[0].GetName().GetName() == "DCTDecode") ) pObj = &pObj->GetArray()[0]; std::string filterName = pObj->GetName().GetName(); bool processed = 0; if( pObj && pObj->IsName() && ( filterName == "DCTDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } if( pObj && pObj->IsName() && ( filterName == "JPXDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } if( pObj && pObj->IsName() && ( filterName == "FlateDecode" ) ) { // The only filter is JPEG -> create a JPEG file qDebug() << "JPG found.\n"; processed = true; nNum++; } // else we found something strange, we do not care about it for now. if (processed == false) { qDebug() << "Unknown image type found:" << QString::fromStdString(filterName) << "\n"; nNum++; } document.FreeObjectMemory( *it ); } } ++it; } } catch( PdfError & e ) { qDebug() << "Error: An error ocurred during processing the pdf file:" << e.GetError(); e.PrintErrorMsg(); return;// e.GetError(); } // TODO: statistics of no of images etc // nNum = extractor.GetNumImagesExtracted(); qDebug() << "Extracted " << nNum << " images successfully from the PDF file.\n"; }
void PdfMemStream::FlateCompress() { PdfObject* pObj; PdfVariant vFilter( PdfName("FlateDecode" ) ); PdfVariant vFilterList; PdfArray tFilters; PdfArray::const_iterator tciFilters; if( !m_lLength ) return; // ePdfError_ErrOk // TODO: Handle DecodeParms if( m_pParent->GetDictionary().HasKey( "Filter" ) ) { pObj = m_pParent->GetIndirectKey( "Filter" ); if( pObj->IsName() ) { if( pObj->GetName() != "DCTDecode" && pObj->GetName() != "FlateDecode" ) { tFilters.push_back( vFilter ); tFilters.push_back( *pObj ); } } else if( pObj->IsArray() ) { tciFilters = pObj->GetArray().begin(); while( tciFilters != pObj->GetArray().end() ) { if( (*tciFilters).IsName() ) { // do not compress DCTDecoded are already FlateDecoded streams again if( (*tciFilters).GetName() == "DCTDecode" || (*tciFilters).GetName() == "FlateDecode" ) { return; } } ++tciFilters; } tFilters.push_back( vFilter ); tciFilters = pObj->GetArray().begin(); while( tciFilters != pObj->GetArray().end() ) { tFilters.push_back( (*tciFilters) ); ++tciFilters; } } else return; vFilterList = PdfVariant( tFilters ); m_pParent->GetDictionary().AddKey( "Filter", vFilterList ); FlateCompressStreamData(); // throws an exception on error } else { m_pParent->GetDictionary().AddKey( "Filter", PdfName( "FlateDecode" ) ); FlateCompressStreamData(); } }
void PDFAnalyzer::inspectExtGStateObj(PdfObject* extGStateObj, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, PDFGraphicState & currGS) { PdfObject* bmObj = extGStateObj->GetIndirectKey("BM"); if (bmObj && bmObj->IsName()) { currGS.blendModes.clear(); currGS.blendModes.append(bmObj->GetName().GetEscapedName().c_str()); if (!(bmObj->GetName() == "Normal" || bmObj->GetName() == "Compatible")) hasTransparency = true; } else if (bmObj && bmObj->IsArray()) { PdfArray arr = bmObj->GetArray(); currGS.blendModes.clear(); for(int i=0; i<arr.GetSize(); ++i) currGS.blendModes.append(arr[i].GetName().GetEscapedName().c_str()); if (arr[0].IsName() && !(arr[0].GetName() == "Normal" || arr[0].GetName() == "Compatible")) hasTransparency = true; } PdfObject* caObj = extGStateObj->GetIndirectKey("ca"); if (caObj && (caObj->IsReal() || caObj->IsNumber())) { currGS.fillAlphaConstant = caObj->GetReal(); if (caObj->GetReal() < 1) hasTransparency = true; } PdfObject* cAObj = extGStateObj->GetIndirectKey("CA"); if (cAObj && (cAObj->IsReal() || cAObj->IsNumber())) { if (cAObj->GetReal() < 1) hasTransparency = true; } PdfObject* sMaskObj = extGStateObj->GetIndirectKey("SMask"); if (sMaskObj && !(sMaskObj->IsName() && sMaskObj->GetName() == "None")) hasTransparency = true; PdfObject* fontObj = extGStateObj->GetIndirectKey("Font"); if (fontObj && fontObj->IsArray()) { PdfArray arr = fontObj->GetArray(); if (arr[0].IsReference()) { PdfReference ref = arr[0].GetReference(); PdfObject* fontObject = m_doc->GetObjects().GetObject(ref); if (fontObject) { PDFFont font = getFontInfo(fontObject); usedFonts.append(font); currGS.font.first = font; currGS.font.second = arr[1].GetReal(); } } } PdfObject* lwObj = extGStateObj->GetIndirectKey("LW"); if (lwObj) currGS.lineWidth = lwObj->GetReal(); PdfObject* lcObj = extGStateObj->GetIndirectKey("LC"); if (lcObj) currGS.lineCap = lcObj->GetNumber(); PdfObject* ljObj = extGStateObj->GetIndirectKey("LJ"); if (ljObj) currGS.lineJoin = ljObj->GetNumber(); PdfObject* mlObj = extGStateObj->GetIndirectKey("ML"); if (mlObj) currGS.miterLimit = mlObj->GetReal(); PdfObject* dObj = extGStateObj->GetIndirectKey("D"); if (dObj) { PdfArray dashArr = dObj->GetArray()[0]; currGS.dashPattern.first.clear(); for (int i=0; i<dashArr.GetSize(); ++i) currGS.dashPattern.first.append(dashArr[i].GetNumber()); currGS.dashPattern.second = dObj->GetArray()[1].GetNumber(); } }
bool PDFAnalyzer::inspectCanvas(PdfCanvas* canvas, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, QList<PDFImage> & imgs) { // this method can be used to get used color spaces, detect transparency, and get used fonts in either PdfPage or PdfXObject PdfObject* colorSpaceRes; PdfObject* xObjects; PdfObject* transGroup; PdfObject* extGState; PdfObject* fontRes; QMap<PdfName, PDFColorSpace> processedNamedCS; QMap<PdfName, PDFFont> processedNamedFont; QList<PdfName> processedNamedXObj; QList<PdfName> processedNamedGS; try { // get hold of a PdfObject pointer of this canvas // needed for the finding resources code below to work PdfPage* page = dynamic_cast<PdfPage*>(canvas); PdfObject* canvasObject = page?(page->GetObject()):((dynamic_cast<PdfXObject*>(canvas))->GetObject()); // find a resource with ColorSpace entry PdfObject* resources = canvas->GetResources(); for (PdfObject* par = canvasObject; par && !resources; par = par->GetIndirectKey("Parent")) { resources = par->GetIndirectKey("Resources"); } colorSpaceRes = resources?resources->GetIndirectKey("ColorSpace"):NULL; xObjects = resources?resources->GetIndirectKey("XObject"):NULL; extGState = resources?resources->GetIndirectKey("ExtGState"):NULL; fontRes = resources?resources->GetIndirectKey("Font"):NULL; // getting the transparency group of this content stream (if available) transGroup = canvasObject?canvasObject->GetIndirectKey("Group"):NULL; if (transGroup) { PdfObject* subtype = transGroup->GetIndirectKey("S"); if (subtype && subtype->GetName() == "Transparency") { // having transparency group means there's transparency in the PDF hasTransparency = true; // reporting the color space used in transparency group (Section 7.5.5, PDF 1.6 Spec) PdfObject* cs = transGroup->GetIndirectKey("CS"); if (cs) { PDFColorSpace retval = getCSType(cs); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); } } } } catch (PdfError & e) { qDebug() << "Error in analyzing stream's resources."; e.PrintErrorMsg(); return false; } try { // start parsing the content stream PdfContentsTokenizer tokenizer(canvas); EPdfContentsType t; const char * kwText; PdfVariant var; bool readToken; int tokenNumber = 0; QList<PdfVariant> args; bool inlineImgDict = false; QStack<PDFGraphicState> gsStack; PDFGraphicState currGS; while ((readToken = tokenizer.ReadNext(t, kwText, var))) { ++tokenNumber; if (t == ePdfContentsType_Variant) { args.append(var); } else if (t == ePdfContentsType_Keyword) { QString kw(kwText); switch(kwNameMap.value(kw, KW_Undefined)) { case KW_q: gsStack.push(currGS); break; case KW_Q: currGS = gsStack.pop(); break; case KW_cm: { if (args.size() == 6) { double mt[6]; for (int i=0; i<6; ++i) { mt[i] = args[i].GetReal(); } QMatrix transMatrix(mt[0], mt[1], mt[2], mt[3], mt[4], mt[5]); currGS.ctm = transMatrix*currGS.ctm; } } break; case KW_w: currGS.lineWidth = args[0].GetReal(); break; case KW_J: currGS.lineCap = args[0].GetNumber(); break; case KW_j: currGS.lineJoin = args[0].GetNumber(); break; case KW_M: currGS.lineJoin = args[0].GetReal(); break; case KW_d: { currGS.dashPattern.first.clear(); PdfArray dashArr = args[0].GetArray(); for (int i=0; i<dashArr.size(); ++i) currGS.dashPattern.first.append(dashArr[i].GetNumber()); currGS.dashPattern.second = args[0].GetNumber(); } break; case KW_g: if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); currGS.fillCS = CS_DeviceGray; currGS.fillColor.clear(); currGS.fillColor.append(args[0].GetReal()); break; case KW_G: if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); currGS.strokeCS = CS_DeviceGray; currGS.strokeColor.clear(); currGS.strokeColor.append(args[0].GetReal()); break; case KW_rg: if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); currGS.fillCS = CS_DeviceRGB; currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) currGS.fillColor.append(args[i].GetReal()); break; case KW_RG: if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); currGS.strokeCS = CS_DeviceRGB; currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) currGS.strokeColor.append(args[i].GetReal()); break; case KW_k: if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); currGS.fillCS = CS_DeviceCMYK; currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) currGS.fillColor.append(args[i].GetReal()); break; case KW_K: if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); currGS.strokeCS = CS_DeviceCMYK; currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) currGS.strokeColor.append(args[i].GetReal()); break; case KW_cs: { if (args.size() == 1 && args[0].IsName()) { if (args[0].GetName() == "DeviceGray") { currGS.fillCS = CS_DeviceGray; currGS.fillColor.clear(); currGS.fillColor.append(0); if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); } else if (args[0].GetName() == "DeviceRGB") { currGS.fillCS = CS_DeviceRGB; currGS.fillColor.clear(); for (int i=0; i<3; ++i) currGS.fillColor.append(0); if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); } else if (args[0].GetName() == "DeviceCMYK") { currGS.fillCS = CS_DeviceCMYK; currGS.fillColor.clear(); for (int i=0; i<3; ++i) currGS.fillColor.append(0); currGS.fillColor.append(1); if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); } else if (args[0].GetName() == "Pattern") { currGS.fillCS = CS_Pattern; if (!usedColorSpaces.contains(CS_Pattern)) usedColorSpaces.append(CS_Pattern); } else { if (processedNamedCS.contains(args[0].GetName())) { currGS.fillCS = processedNamedCS.value(args[0].GetName()); } else { if (colorSpaceRes && colorSpaceRes->GetIndirectKey(args[0].GetName())) { PdfObject* csEntry = colorSpaceRes->GetIndirectKey(args[0].GetName()); PDFColorSpace retval = getCSType(csEntry); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); currGS.fillCS = retval; processedNamedCS.insert(args[0].GetName(), retval); } else { qDebug() << "Supplied colorspace is undefined!"; return false; } } } } else { qDebug() << "Wrong syntax in specifying color space!"; return false; } } break; case KW_CS: { if (args.size() == 1 && args[0].IsName()) { if (args[0].GetName() == "DeviceGray") { currGS.strokeCS = CS_DeviceGray; currGS.strokeColor.clear(); currGS.strokeColor.append(0); if (!usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); } else if (args[0].GetName() == "DeviceRGB") { currGS.fillCS = CS_DeviceRGB; currGS.strokeColor.clear(); for (int i=0; i<3; ++i) currGS.strokeColor.append(0); if (!usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); } else if (args[0].GetName() == "DeviceCMYK") { currGS.fillCS = CS_DeviceCMYK; currGS.strokeColor.clear(); for (int i=0; i<3; ++i) currGS.strokeColor.append(0); currGS.strokeColor.append(1); if (!usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); } else if (args[0].GetName() == "Pattern") { currGS.fillCS = CS_Pattern; if (!usedColorSpaces.contains(CS_Pattern)) usedColorSpaces.append(CS_Pattern); } else { if (processedNamedCS.contains(args[0].GetName())) { currGS.strokeCS = processedNamedCS.value(args[0].GetName()); } else { if (colorSpaceRes && colorSpaceRes->GetIndirectKey(args[0].GetName())) { PdfObject* csEntry = colorSpaceRes->GetIndirectKey(args[0].GetName()); PDFColorSpace retval = getCSType(csEntry); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); currGS.strokeCS = retval; processedNamedCS.insert(args[0].GetName(), retval); } else { qDebug() << "Supplied colorspace is undefined!"; return false; } } } } else { qDebug() << "Wrong syntax in specifying color space!"; return false; } } break; case KW_sc: currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) currGS.fillColor.append(args[i].GetReal()); break; case KW_SC: currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) currGS.strokeColor.append(args[i].GetReal()); break; case KW_scn: currGS.fillColor.clear(); for (int i=0; i<args.size(); ++i) { if (args[i].IsReal() || args[i].IsNumber()) currGS.fillColor.append(args[i].GetReal()); } break; case KW_SCN: currGS.strokeColor.clear(); for (int i=0; i<args.size(); ++i) { if (args[i].IsReal() || args[i].IsNumber()) currGS.strokeColor.append(args[i].GetReal()); } break; case KW_Do: // image or form XObject { if (!processedNamedXObj.contains(args[0].GetName())) { if (args.size() == 1 && args[0].IsName() && xObjects) { PdfObject* xObject = xObjects->GetIndirectKey(args[0].GetName()); PdfObject* subtypeObject = xObject?xObject->GetIndirectKey("Subtype"):NULL; if (subtypeObject && subtypeObject->IsName()) { if (subtypeObject->GetName() == "Image") { PdfObject* imgColorSpace = xObject->GetIndirectKey("ColorSpace"); if (imgColorSpace) { PDFColorSpace retval = getCSType(imgColorSpace); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); } PdfObject* sMaskObj = xObject->GetIndirectKey("SMask"); if (sMaskObj) hasTransparency = true; PDFImage img; img.imgName = args[0].GetName().GetEscapedName().c_str(); double width = xObject->GetIndirectKey("Width")->GetReal(); double height = xObject->GetIndirectKey("Height")->GetReal(); img.dpiX = qRound(width/(currGS.ctm.m11()/72)); img.dpiY = qRound(height/(currGS.ctm.m22()/72)); imgs.append(img); } else if (subtypeObject->GetName() == "Form") { PdfXObject xObj(xObject); inspectCanvas(&xObj, usedColorSpaces, hasTransparency, usedFonts, imgs); // recursive call } } else { qDebug() << "Supplied external object is undefined!"; return false; } processedNamedXObj.append(args[0].GetName()); } else { qDebug() << "Wrong syntax for Do operator or there's no XObject defined!"; return false; } } } break; case KW_BI: inlineImgDict = true; break; case KW_ID: if (inlineImgDict) { PdfName colorspace("ColorSpace"); PdfName cs("CS"); if (args.contains(colorspace) || args.contains(cs)) { int csIdx = args.contains(colorspace)?args.indexOf(colorspace):args.indexOf(cs); if (args[csIdx+1].IsName()) { PdfName csName = args[csIdx+1].GetName(); if ((csName == "G" || csName == "DeviceGray") && !usedColorSpaces.contains(CS_DeviceGray)) usedColorSpaces.append(CS_DeviceGray); else if ((csName == "RGB" || csName == "DeviceRGB") && !usedColorSpaces.contains(CS_DeviceRGB)) usedColorSpaces.append(CS_DeviceRGB); else if ((csName == "CMYK" || csName == "DeviceCMYK") && !usedColorSpaces.contains(CS_DeviceCMYK)) usedColorSpaces.append(CS_DeviceCMYK); else if (!processedNamedCS.contains(csName)) { if (colorSpaceRes && colorSpaceRes->GetIndirectKey(csName)) { PdfObject* csEntry = colorSpaceRes->GetIndirectKey(csName); if (csEntry) { PDFColorSpace retval = getCSType(csEntry); if (retval != CS_Unknown && !usedColorSpaces.contains(retval)) usedColorSpaces.append(retval); processedNamedCS.insert(csName, retval); } } else { qDebug() << "Supplied colorspace for inline image is undefined!"; return false; } } } } PdfName height("Height"); PdfName h("H"); PdfName width("Width"); PdfName w("W"); if ((args.contains(height) || args.contains(h)) && (args.contains(width) || args.contains(w))) { int heightIdx = args.contains(height)?args.indexOf(height):args.indexOf(h); int widthIdx = args.contains(width)?args.indexOf(width):args.indexOf(w); double height = args[heightIdx+1].GetReal(); double width = args[widthIdx+1].GetReal(); PDFImage img; img.imgName = "Inline Image"; img.dpiX = qRound(width/(currGS.ctm.m11()/72)); img.dpiY = qRound(height/(currGS.ctm.m22()/72)); imgs.append(img); } inlineImgDict = false; } break; case KW_gs: { if (!processedNamedGS.contains(args[0].GetName())) { if (args.size() == 1 && args[0].IsName() && extGState) { PdfObject* extGStateObj = extGState->GetIndirectKey(args[0].GetName()); if (extGStateObj) { inspectExtGStateObj(extGStateObj, usedColorSpaces, hasTransparency, usedFonts, currGS); } else { qDebug() << "Named graphic state used with gs operator is undefined in current ExtGState"; return false; } processedNamedGS.append(args[0].GetName()); } else { qDebug() << "Wrong syntax in applying extended graphic state (gs operator) or there's no ExtGState defined!"; return false; } } } break; case KW_Tf: { if (processedNamedFont.contains(args[0].GetName())) { currGS.font.first = processedNamedFont.value(args[0].GetName()); currGS.font.second = args[1].GetReal(); } else { if (args.size() == 2 && args[0].IsName() && fontRes) { PdfObject* fontObj = fontRes->GetIndirectKey(args[0].GetName()); if (fontObj) { PDFFont retval = getFontInfo(fontObj); usedFonts.append(retval); processedNamedFont.insert(args[0].GetName(), retval); currGS.font.first = retval; currGS.font.second = args[1].GetReal(); } else { qDebug() << "The specified font cannot be found in current Resources!"; return false; } } else { qDebug() << "Wrong syntax in use of Tf operator or there's no Font defined in current Resources dictionary!"; return false; } } } break; case KW_Undefined: default: break; } args.clear(); } } } catch (PdfError & e) { qDebug() << "Error in parsing content stream"; e.PrintErrorMsg(); return false; } return true; }
void PdfParser::ReadXRefStreamContents( pdf_long lOffset, bool bReadOnlyTrailer ) { char* pBuffer; char* pStart; pdf_long lBufferLen; long long lSize = 0; PdfVariant vWArray; PdfObject* pObj; long nW[W_ARRAY_SIZE] = { 0, 0, 0 }; int i; m_device.Device()->Seek( lOffset ); PdfParserObject xrefObject( m_vecObjects, m_device, m_buffer ); // Ignore the encryption in the XREF as the XREF stream must no be encrypted (see PDF Reference 3.4.7) xrefObject.ParseFile( NULL ); if( !xrefObject.GetDictionary().HasKey( PdfName::KeyType ) ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } pObj = xrefObject.GetDictionary().GetKey( PdfName::KeyType ); if( !pObj->IsName() || ( pObj->GetName() != "XRef" ) ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } if( !m_pTrailer ) m_pTrailer = new PdfParserObject( m_vecObjects, m_device, m_buffer ); MergeTrailer( &xrefObject ); if( bReadOnlyTrailer ) return; if( !xrefObject.GetDictionary().HasKey( PdfName::KeySize ) || !xrefObject.GetDictionary().HasKey( "W" ) ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } lSize = xrefObject.GetDictionary().GetKeyAsLong( PdfName::KeySize, 0 ); vWArray = *(xrefObject.GetDictionary().GetKey( "W" )); // The pdf reference states that W is always an array with 3 entries // all of them have to be integeres if( !vWArray.IsArray() || vWArray.GetArray().size() != 3 ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } for( i=0;i<W_ARRAY_SIZE;i++ ) { if( !vWArray.GetArray()[i].IsNumber() ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } nW[i] = static_cast<long>(vWArray.GetArray()[i].GetNumber()); } std::vector<long long> vecIndeces; // get the first object number in this crossref stream. // it is not required to have an index key though. if( xrefObject.GetDictionary().HasKey( "Index" ) ) { // reuse vWArray!! vWArray = *(xrefObject.GetDictionary().GetKey( "Index" )); if( !vWArray.IsArray() ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } TCIVariantList it = vWArray.GetArray().begin(); while ( it != vWArray.GetArray().end() ) { vecIndeces.push_back( (*it).GetNumber() ); ++it; } } else { vecIndeces.push_back( 0 ); vecIndeces.push_back( lSize ); } if( vecIndeces.size() % 2 ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } if( !xrefObject.HasStreamToParse() ) { PODOFO_RAISE_ERROR( ePdfError_NoXRef ); } xrefObject.GetStream()->GetFilteredCopy( &pBuffer, &lBufferLen ); pStart = pBuffer; int nCurIndex = 0; while( nCurIndex < static_cast<pdf_long>(vecIndeces.size()) && pBuffer - pStart < lBufferLen ) { int nFirstObj = static_cast<int>(vecIndeces[nCurIndex]); long long nCount = vecIndeces[nCurIndex+1]; while( nCount-- && pBuffer - pStart < lBufferLen ) { ReadXRefStreamEntry( pBuffer, lBufferLen, nW, nFirstObj++ ); pBuffer += (nW[0] + nW[1] + nW[2]); } nCurIndex += 2; } free( pStart ); if( xrefObject.GetDictionary().HasKey("Prev") ) { lOffset = static_cast<pdf_long>(xrefObject.GetDictionary().GetKeyAsLong( "Prev", 0 )); ReadXRefStreamContents( lOffset, bReadOnlyTrailer ); } }