Example #1
0
PDFColorSpace PDFAnalyzer::getCSType(PdfObject* cs)
{
	try {
		// colorspace is either a name or an array
		if (cs && cs->IsName())
		{
			PdfName csName = cs->GetName();
			if (csName == "DeviceGray")
				return CS_DeviceGray;
			else if (csName == "DeviceRGB")
				return CS_DeviceRGB;
			else if (csName == "DeviceCMYK")
				return CS_DeviceCMYK;
		}
		else if (cs && cs->IsArray())
		{
			PdfArray csArr = cs->GetArray();
			PdfObject csTypePdfName = csArr[0];
			if (csTypePdfName.IsName())
			{
				PdfName csTypeName = csTypePdfName.GetName();
				if (csTypeName == "ICCBased")
					return CS_ICCBased;
				else if (csTypeName == "CalGray")
					return CS_CalGray;
				else if (csTypeName == "CalRGB")
					return CS_CalRGB;
				else if (csTypeName == "Lab")
					return CS_Lab;
				else if (csTypeName == "Indexed")
				{
					PdfObject base = cs->GetArray()[1];
					PdfObject* pBase = &base;
					if (base.IsReference())
					{
						pBase = cs->GetOwner()->GetObject(base.GetReference());
					}
					pBase->SetOwner(cs->GetOwner());
					return getCSType(pBase);
				}
				else if (csTypeName == "Separation")
					return CS_Separation;
				else if (csTypeName == "DeviceN")
					return CS_DeviceN;
				else if (csTypeName == "Pattern")
					return CS_Pattern;
			}
		}
	}
	catch (PdfError & e)
	{
		qDebug() << "Error in identifying the color type";
		e.PrintErrorMsg();
		return CS_Unknown;
	}
	return CS_Unknown;
}
Example #2
0
PDFFont PDFAnalyzer::getFontInfo(PdfObject* fontObj)
{
	PDFFont currFont;
	PdfObject* subtype = fontObj->GetIndirectKey("Subtype");
	if (subtype && subtype->IsName())
	{
		PdfObject* fontDesc = fontObj->GetIndirectKey("FontDescriptor");
		if (subtype->GetName() == "Type1")
			currFont.fontType = F_Type1;
		else if (subtype->GetName() == "MMType1")
			currFont.fontType = F_MMType1;
		else if (subtype->GetName() == "TrueType")
			currFont.fontType = F_TrueType;
		else if (subtype->GetName() == "Type3")
		{
			currFont.fontType = F_Type3;
			currFont.isEmbedded = true;
			fontDesc = NULL;
		}
		else if (subtype->GetName() == "Type0")
		{
			PdfObject* descendantFonts = fontObj->GetIndirectKey("DescendantFonts");
			if (descendantFonts && descendantFonts->IsArray())
			{
				PdfObject descendantFont = descendantFonts->GetArray()[0];
				descendantFont.SetOwner(descendantFonts->GetOwner());
				PdfObject* subtypeDescFont = descendantFont.GetIndirectKey("Subtype");
				fontDesc = descendantFont.MustGetIndirectKey("FontDescriptor");
				if (subtypeDescFont && subtypeDescFont->IsName())
				{
					if (subtypeDescFont->GetName() == "CIDFontType0")
						currFont.fontType = F_CIDFontType0;
					else if (subtypeDescFont->GetName() == "CIDFontType2")
						currFont.fontType = F_CIDFontType2;
				}
			}
		}
		if (fontDesc)
		{
			PdfObject* fontFile = fontDesc->GetIndirectKey("FontFile");
			PdfObject* fontFile2 = fontDesc->GetIndirectKey("FontFile2");
			PdfObject* fontFile3 = fontDesc->GetIndirectKey("FontFile3");
			if (fontFile && fontFile->HasStream())
				currFont.isEmbedded = true;
			if (fontFile2 && fontFile2->HasStream())
				currFont.isEmbedded = true;
			if (fontFile3 && fontFile3->HasStream())
			{
				currFont.isEmbedded = true;
				PdfObject* ff3Subtype = fontFile3->GetIndirectKey("Subtype");
				if (ff3Subtype && ff3Subtype->IsName() && ff3Subtype->GetName() == "OpenType")
					currFont.isOpenType = true;
			}
		}
	}
	return currFont;
}
Example #3
0
TVecFilters PdfFilterFactory::CreateFilterList( const PdfObject* pObject )
{
    TVecFilters filters;

    const PdfObject* pObj    = NULL;

    if( pObject->IsDictionary() && pObject->GetDictionary().HasKey( "Filter" ) )
        pObj = pObject->GetDictionary().GetKey( "Filter" );
    else if( pObject->IsArray() )
        pObj = pObject;
    else if( pObject->IsName() ) 
        pObj = pObject;


    if (!pObj)
	// Object had no /Filter key . Return a null filter list.
	return filters;

    if( pObj->IsName() ) 
        filters.push_back( PdfFilterFactory::FilterNameToType( pObj->GetName() ) );
    else if( pObj->IsArray() ) 
    {
        TCIVariantList it = pObj->GetArray().begin();

        while( it != pObj->GetArray().end() )
        {
            if ( (*it).IsName() )
			{
                filters.push_back( PdfFilterFactory::FilterNameToType( (*it).GetName() ) );
            }
            else if ( (*it).IsReference() )
            {
                PdfObject* pFilter = pObject->GetOwner()->GetObject( (*it).GetReference() );
                if( pFilter == NULL ) 
                {
                    PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Filter array contained unexpected reference" );
                }

                filters.push_back( PdfFilterFactory::FilterNameToType( pFilter->GetName() ) );
            }
            else 
            {
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Filter array contained unexpected non-name type" );
			}
                
            ++it;
        }
    }

    return filters;
}
Example #4
0
    void PDFProcessor::start ()
    {
        int nNum = 0;
        try
        {
            PdfObject*  pObj  = NULL;

            // open document
            qDebug() << "Opening file: " << filename.toStdString().c_str();
            PdfMemDocument document( filename.toStdString().c_str() );

//    	    m_pszOutputDirectory = const_cast<char*>(pszOutput);

            TCIVecObjects it = document.GetObjects().begin();

            while( it != document.GetObjects().end() )
            {
                if( (*it)->IsDictionary() )
                {
                    PdfObject* pObjType = (*it)->GetDictionary().GetKey( PdfName::KeyType );
                    PdfObject* pObjSubType = (*it)->GetDictionary().GetKey( PdfName::KeySubtype );
                    if( ( pObjType && pObjType->IsName() && ( pObjType->GetName().GetName() == "XObject" ) ) ||
                            ( pObjSubType && pObjSubType->IsName() && ( pObjSubType->GetName().GetName() == "Image" ) ) )
                    {
                        pObj = (*it)->GetDictionary().GetKey( PdfName::KeyFilter );
                        if( pObj && pObj->IsArray() && pObj->GetArray().GetSize() == 1 &&
                                pObj->GetArray()[0].IsName() && (pObj->GetArray()[0].GetName().GetName() == "DCTDecode") )
                            pObj = &pObj->GetArray()[0];

                        std::string filterName = pObj->GetName().GetName();
                        bool processed = 0;

                        if( pObj && pObj->IsName() && ( filterName == "DCTDecode" ) )
                        {
                            // The only filter is JPEG -> create a JPEG file
                            qDebug() << "JPG found.\n";
                            processed = true;
                            nNum++;
                        }

                        if( pObj && pObj->IsName() && ( filterName == "JPXDecode" ) )
                        {
                            // The only filter is JPEG -> create a JPEG file
                            qDebug() << "JPG found.\n";
                            processed = true;
                            nNum++;
                        }

                        if( pObj && pObj->IsName() && ( filterName == "FlateDecode" ) )
                        {
                            // The only filter is JPEG -> create a JPEG file
                            qDebug() << "JPG found.\n";
                            processed = true;
                            nNum++;
                        }


                        // else we found something strange, we do not care about it for now.
                        if (processed == false)
                        {
                            qDebug() << "Unknown image type found:" << QString::fromStdString(filterName) << "\n";
                            nNum++;
                        }

                        document.FreeObjectMemory( *it );
                    }
                }

                ++it;
            }


        }
        catch( PdfError & e )
        {
            qDebug() << "Error: An error ocurred during processing the pdf file:" << e.GetError();
            e.PrintErrorMsg();
            return;// e.GetError();
        }

        // TODO: statistics of no of images etc
//      nNum = extractor.GetNumImagesExtracted();

        qDebug() << "Extracted " << nNum << " images successfully from the PDF file.\n";
    }
PdfFontMetricsObject::PdfFontMetricsObject( PdfObject* pFont, PdfObject* pDescriptor, const PdfEncoding* const pEncoding )
    : PdfFontMetrics( ePdfFontType_Unknown, "", NULL ),
      m_pEncoding( pEncoding ), m_dDefWidth(0.0)
{
    if( !pDescriptor )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

	const PdfName & rSubType = pFont->GetDictionary().GetKey( PdfName::KeySubtype )->GetName();

    // OC 15.08.2010 BugFix: /FirstChar /LastChar /Widths are in the Font dictionary and not in the FontDescriptor
	if ( rSubType == PdfName("Type1") || rSubType == PdfName("TrueType") ) {
		m_sName        = pDescriptor->GetIndirectKey( "FontName" )->GetName();
		m_bbox         = pDescriptor->GetIndirectKey( "FontBBox" )->GetArray();
    m_nFirst       = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "FirstChar", 0L ));
    m_nLast        = static_cast<int>(pFont->GetDictionary().GetKeyAsLong( "LastChar", 0L ));
	 // OC 15.08.2010 BugFix: GetIndirectKey() instead of GetDictionary().GetKey() and "Widths" instead of "Width"
    PdfObject* widths = pFont->GetIndirectKey( "Widths" );
    if( widths != NULL )
    {
        m_width        = widths->GetArray();
        m_missingWidth = NULL;
    }
    else
    {
        widths = pDescriptor->GetDictionary().GetKey( "MissingWidth" );
        if( widths == NULL ) 
        {
            PODOFO_RAISE_ERROR_INFO( ePdfError_NoObject, "Font object defines neither Widths, nor MissingWidth values!" );
            m_missingWidth = widths;
        }
    }
	} else if ( rSubType == PdfName("CIDFontType0") || rSubType == PdfName("CIDFontType2") ) {
		PdfObject *pObj = pDescriptor->GetIndirectKey( "FontName" );
		if (pObj) {
			m_sName = pObj->GetName();
		}
		pObj = pDescriptor->GetIndirectKey( "FontBBox" );
		if (pObj) {
			m_bbox = pObj->GetArray();
		}
		m_nFirst = 0;
		m_nLast = 0;

		m_dDefWidth = static_cast<double>(pFont->GetDictionary().GetKeyAsLong( "DW", 1000L ));
		PdfVariant default_width(m_dDefWidth);
		PdfObject * pw = pFont->GetIndirectKey( "W" );

		for (int i = m_nFirst; i <= m_nLast; ++i) {
			m_width.push_back(default_width);
		}
		if (pw) {
			PdfArray w = pw->GetArray();
			int pos = 0;
			while (pos < static_cast<int>(w.GetSize())) {
				int start = static_cast<int>(w[pos++].GetNumber());
				PODOFO_ASSERT (start >= 0);
				if (w[pos].IsArray()) {
					PdfArray widths = w[pos++].GetArray();
					int length = start + static_cast<int>(widths.GetSize());
					PODOFO_ASSERT (length >= start);
					if (length > static_cast<int>(m_width.GetSize())) {
						m_width.resize(length, default_width);
					}
					for (int i = 0; i < static_cast<int>(widths.GetSize()); ++i) {
						m_width[start + i] = widths[i];
					}
				} else {
					int end = static_cast<int>(w[pos++].GetNumber());
					int length = start + end;
					PODOFO_ASSERT (length >= start);
					if (length > static_cast<int>(m_width.GetSize())) {
						m_width.resize(length, default_width);
					}
					pdf_int64 width = w[pos++].GetNumber();
					for (int i = start; i <= end; ++i)
						m_width[i] = PdfVariant(width);
				}
			}
		}
		m_nLast = m_width.GetSize() - 1;
	} else {
        PODOFO_RAISE_ERROR_INFO( ePdfError_UnsupportedFontFormat, rSubType.GetEscapedName().c_str() );
	}


    m_nWeight      = static_cast<unsigned int>(pDescriptor->GetDictionary().GetKeyAsLong( "FontWeight", 400L ));
    m_nItalicAngle = static_cast<int>(pDescriptor->GetDictionary().GetKeyAsLong( "ItalicAngle", 0L ));

    m_dPdfAscent   = pDescriptor->GetDictionary().GetKeyAsReal( "Ascent", 0.0 );
    m_dAscent      = m_dPdfAscent / 1000.0;
    m_dPdfDescent  = pDescriptor->GetDictionary().GetKeyAsReal( "Descent", 0.0 );
    m_dDescent     = m_dPdfDescent / 1000.0;
    m_dLineSpacing = m_dAscent + m_dDescent;
    
    // Try to fine some sensible values
    m_dUnderlineThickness = 1.0;
    m_dUnderlinePosition  = 0.0;
    m_dStrikeOutThickness = m_dUnderlinePosition;
    m_dStrikeOutPosition  = m_dAscent / 2.0;

    m_bSymbol = false; // TODO
}
Example #6
0
PdfFont* PdfFontFactory::CreateFont( FT_Library*, PdfObject* pObject )
{
    PdfFontMetrics* pMetrics    = NULL;
    PdfFont*        pFont       = NULL;
    PdfObject*      pDescriptor = NULL;
    PdfObject*      pEncoding   = NULL;

    if( pObject->GetDictionary().GetKey( PdfName::KeyType )->GetName() != PdfName("Font") )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidDataType );
    }

    const PdfName & rSubType = pObject->GetDictionary().GetKey( PdfName::KeySubtype )->GetName();
    if( rSubType == PdfName("Type0") ) 
    {
        // The PDF reference states that DescendantFonts must be an array,
        // some applications (e.g. MS Word) put the array into an indirect object though.
        const PdfArray & descendant  = 
            pObject->GetIndirectKey( "DescendantFonts" )->GetArray();
        PdfObject* pFontObject = pObject->GetOwner()->GetObject( descendant[0].GetReference() );

        pDescriptor = pFontObject->GetIndirectKey( "FontDescriptor" );
        pEncoding   = pObject->GetIndirectKey( "Encoding" );

        if ( pEncoding && pDescriptor ) // OC 18.08.2010: Avoid sigsegv
        {
           const PdfEncoding* const pPdfEncoding = 
               PdfEncodingObjectFactory::CreateEncoding( pEncoding );

           // OC 15.08.2010 BugFix: Parameter pFontObject added: TODO: untested
           pMetrics    = new PdfFontMetricsObject( pFontObject, pDescriptor, pPdfEncoding );
           pFont       = new PdfFontCID( pMetrics, pPdfEncoding, pObject, false );
        }
    }
    else if( rSubType == PdfName("Type1") ) 
    {
        // TODO: Old documents do not have a FontDescriptor for 
        //       the 14 standard fonts. This suggestions is 
        //       deprecated now, but give us problems with old documents.
        pDescriptor = pObject->GetIndirectKey( "FontDescriptor" );
        pEncoding   = pObject->GetIndirectKey( "Encoding" );

        // OC 13.08.2010: Handle missing FontDescriptor for the 14 standard fonts:
        if( !pDescriptor )
        {
           // Check if its a PdfFontType1Base14
           PdfObject* pBaseFont = NULL;
           pBaseFont = pObject->GetIndirectKey( "BaseFont" );
           const char* pszBaseFontName = pBaseFont->GetName().GetName().c_str();
           PdfFontMetricsBase14* pMetrics = PODOFO_Base14FontDef_FindBuiltinData(pszBaseFontName);
           if ( pMetrics != NULL )
           {
               // pEncoding may be undefined, found a valid pdf with
               //   20 0 obj
               //   <<
               //   /Type /Font
               //   /BaseFont /ZapfDingbats
               //   /Subtype /Type1
               //   >>
               //   endobj 
               // If pEncoding is null then
               // use StandardEncoding for Courier, Times, Helvetica font families
               // and special encodings for Symbol and ZapfDingbats
               const PdfEncoding* pPdfEncoding = NULL;
               if ( pEncoding!= NULL )
                   pPdfEncoding = PdfEncodingObjectFactory::CreateEncoding( pEncoding );
               else if ( !pMetrics->IsSymbol() )
                   pPdfEncoding = PdfEncodingFactory::GlobalStandardEncodingInstance();
               else if ( strcmp(pszBaseFontName, "Symbol") == 0 )
                   pPdfEncoding = PdfEncodingFactory::GlobalSymbolEncodingInstance();
               else if ( strcmp(pszBaseFontName, "ZapfDingbats") == 0 )
                   pPdfEncoding = PdfEncodingFactory::GlobalZapfDingbatsEncodingInstance();
               return new PdfFontType1Base14(pMetrics, pPdfEncoding, pObject);
           }
        }
        const PdfEncoding* pPdfEncoding = NULL;
        if ( pEncoding != NULL )
            pPdfEncoding = PdfEncodingObjectFactory::CreateEncoding( pEncoding );
        else if ( pDescriptor )
        {
           // OC 18.08.2010 TODO: Encoding has to be taken from the font's built-in encoding
           // Its extremely complicated to interpret the type1 font programs
           // so i try to determine if its a symbolic font by reading the FontDescriptor Flags
           // Flags & 4 --> Symbolic, Flags & 32 --> Nonsymbolic
            pdf_int32 lFlags = static_cast<pdf_int32>(pDescriptor->GetDictionary().GetKeyAsLong( "Flags", 0L ));
            if ( lFlags & 32 ) // Nonsymbolic, otherwise pEncoding remains NULL
                pPdfEncoding = PdfEncodingFactory::GlobalStandardEncodingInstance();
        }
        if ( pPdfEncoding && pDescriptor ) // OC 18.08.2010: Avoid sigsegv
        {
            // OC 15.08.2010 BugFix: Parameter pObject added:
            pMetrics    = new PdfFontMetricsObject( pObject, pDescriptor, pPdfEncoding );
            pFont       = new PdfFontType1( pMetrics, pPdfEncoding, pObject );
        }
    }
    else if( rSubType == PdfName("TrueType") ) 
    {
        pDescriptor = pObject->GetIndirectKey( "FontDescriptor" );
        pEncoding   = pObject->GetIndirectKey( "Encoding" );

        if ( pEncoding && pDescriptor ) // OC 18.08.2010: Avoid sigsegv
        {
           const PdfEncoding* const pPdfEncoding = 
               PdfEncodingObjectFactory::CreateEncoding( pEncoding );

           // OC 15.08.2010 BugFix: Parameter pObject added:
           pMetrics    = new PdfFontMetricsObject( pObject, pDescriptor, pPdfEncoding );
           pFont       = new PdfFontTrueType( pMetrics, pPdfEncoding, pObject );
        }
    }

    return pFont;
}
void PdfMemStream::FlateCompress()
{
    PdfObject*        pObj;
    PdfVariant        vFilter( PdfName("FlateDecode" ) );
    PdfVariant        vFilterList;
    PdfArray          tFilters;

    PdfArray::const_iterator tciFilters;
    
    if( !m_lLength )
        return; // ePdfError_ErrOk

    // TODO: Handle DecodeParms
    if( m_pParent->GetDictionary().HasKey( "Filter" ) )
    {
        pObj = m_pParent->GetIndirectKey( "Filter" );

        if( pObj->IsName() )
        {
            if( pObj->GetName() != "DCTDecode" && pObj->GetName() != "FlateDecode" )
            {
                tFilters.push_back( vFilter );
                tFilters.push_back( *pObj );
            }
        }
        else if( pObj->IsArray() )
        {
            tciFilters = pObj->GetArray().begin();

            while( tciFilters != pObj->GetArray().end() )
            {
                if( (*tciFilters).IsName() )
                {
                    // do not compress DCTDecoded are already FlateDecoded streams again
                    if( (*tciFilters).GetName() == "DCTDecode" || (*tciFilters).GetName() == "FlateDecode" )
                    {
                        return;
                    }
                }

                ++tciFilters;
            }

            tFilters.push_back( vFilter );

            tciFilters = pObj->GetArray().begin();

            while( tciFilters != pObj->GetArray().end() )
            {
                tFilters.push_back( (*tciFilters) );
                
                ++tciFilters;
            }
        }
        else
            return;

        vFilterList = PdfVariant( tFilters );
        m_pParent->GetDictionary().AddKey( "Filter", vFilterList );

        FlateCompressStreamData(); // throws an exception on error
    }
    else
    {
        m_pParent->GetDictionary().AddKey( "Filter", PdfName( "FlateDecode" ) );
        FlateCompressStreamData();
    }
}
Example #8
0
void PDFAnalyzer::inspectExtGStateObj(PdfObject* extGStateObj, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, PDFGraphicState & currGS)
{
	PdfObject* bmObj = extGStateObj->GetIndirectKey("BM");
	if (bmObj && bmObj->IsName())
	{
		currGS.blendModes.clear();
		currGS.blendModes.append(bmObj->GetName().GetEscapedName().c_str());
		if (!(bmObj->GetName() == "Normal" || bmObj->GetName() == "Compatible"))
			hasTransparency = true;
	}
	else if (bmObj && bmObj->IsArray())
	{
		PdfArray arr = bmObj->GetArray();
		currGS.blendModes.clear();
		for(int i=0; i<arr.GetSize(); ++i)
			currGS.blendModes.append(arr[i].GetName().GetEscapedName().c_str());
		if (arr[0].IsName() && !(arr[0].GetName() == "Normal" || arr[0].GetName() == "Compatible"))
			hasTransparency = true;
	}
	PdfObject* caObj = extGStateObj->GetIndirectKey("ca");
	if (caObj && (caObj->IsReal() || caObj->IsNumber()))
	{
		currGS.fillAlphaConstant = caObj->GetReal();
		if (caObj->GetReal() < 1)
			hasTransparency = true;
	}
	PdfObject* cAObj = extGStateObj->GetIndirectKey("CA");
	if (cAObj && (cAObj->IsReal() || cAObj->IsNumber()))
	{
		if (cAObj->GetReal() < 1)
		hasTransparency = true;
	}
	PdfObject* sMaskObj = extGStateObj->GetIndirectKey("SMask");
	if (sMaskObj && !(sMaskObj->IsName() && sMaskObj->GetName() == "None"))
		hasTransparency = true;
	PdfObject* fontObj = extGStateObj->GetIndirectKey("Font");
	if (fontObj && fontObj->IsArray())
	{
		PdfArray arr = fontObj->GetArray();
		if (arr[0].IsReference())
		{
			PdfReference ref = arr[0].GetReference();
			PdfObject* fontObject = m_doc->GetObjects().GetObject(ref);
			if (fontObject)
			{
				PDFFont font = getFontInfo(fontObject);
				usedFonts.append(font);
				currGS.font.first = font;
				currGS.font.second = arr[1].GetReal();
			}

		}
	}
	PdfObject* lwObj = extGStateObj->GetIndirectKey("LW");
	if (lwObj)
		currGS.lineWidth = lwObj->GetReal();
	PdfObject* lcObj = extGStateObj->GetIndirectKey("LC");
	if (lcObj)
		currGS.lineCap = lcObj->GetNumber();
	PdfObject* ljObj = extGStateObj->GetIndirectKey("LJ");
	if (ljObj)
		currGS.lineJoin = ljObj->GetNumber();
	PdfObject* mlObj = extGStateObj->GetIndirectKey("ML");
	if (mlObj)
		currGS.miterLimit = mlObj->GetReal();
	PdfObject* dObj = extGStateObj->GetIndirectKey("D");
	if (dObj)
	{
		PdfArray dashArr = dObj->GetArray()[0];
		currGS.dashPattern.first.clear();
		for (int i=0; i<dashArr.GetSize(); ++i)
			currGS.dashPattern.first.append(dashArr[i].GetNumber());
		currGS.dashPattern.second = dObj->GetArray()[1].GetNumber();
	}
}
Example #9
0
bool PDFAnalyzer::inspectCanvas(PdfCanvas* canvas, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, QList<PDFImage> & imgs)
{
	// this method can be used to get used color spaces, detect transparency, and get used fonts in either PdfPage or PdfXObject
	PdfObject* colorSpaceRes;
	PdfObject* xObjects;
	PdfObject* transGroup;
	PdfObject* extGState;
	PdfObject* fontRes;
	QMap<PdfName, PDFColorSpace> processedNamedCS;
	QMap<PdfName, PDFFont> processedNamedFont;
	QList<PdfName> processedNamedXObj;
	QList<PdfName> processedNamedGS;
	try {
		// get hold of a PdfObject pointer of this canvas
		// needed for the finding resources code below to work
		PdfPage* page = dynamic_cast<PdfPage*>(canvas);
		PdfObject* canvasObject = page?(page->GetObject()):((dynamic_cast<PdfXObject*>(canvas))->GetObject());

		// find a resource with ColorSpace entry
		PdfObject* resources = canvas->GetResources();
		for (PdfObject* par = canvasObject; par && !resources; par = par->GetIndirectKey("Parent"))
		{
			resources = par->GetIndirectKey("Resources");
		}
		colorSpaceRes = resources?resources->GetIndirectKey("ColorSpace"):NULL;
		xObjects = resources?resources->GetIndirectKey("XObject"):NULL;
		extGState = resources?resources->GetIndirectKey("ExtGState"):NULL;
		fontRes = resources?resources->GetIndirectKey("Font"):NULL;

		// getting the transparency group of this content stream (if available)
		transGroup = canvasObject?canvasObject->GetIndirectKey("Group"):NULL;
		if (transGroup)
		{
			PdfObject* subtype = transGroup->GetIndirectKey("S");
			if (subtype && subtype->GetName() == "Transparency")
			{
				// having transparency group means there's transparency in the PDF
				hasTransparency = true;

				// reporting the color space used in transparency group (Section 7.5.5, PDF 1.6 Spec)
				PdfObject* cs = transGroup->GetIndirectKey("CS");
				if (cs)
				{
					PDFColorSpace retval = getCSType(cs);
					if (retval != CS_Unknown && !usedColorSpaces.contains(retval))
						usedColorSpaces.append(retval);
				}
			}
		}
	}
	catch (PdfError & e)
	{
		qDebug() << "Error in analyzing stream's resources.";
		e.PrintErrorMsg();
		return false;
	}

	try {
		// start parsing the content stream
		PdfContentsTokenizer tokenizer(canvas);
		EPdfContentsType t;
		const char * kwText;
		PdfVariant var;
		bool readToken;

		int tokenNumber = 0;
		QList<PdfVariant> args;
		bool inlineImgDict = false;
		QStack<PDFGraphicState> gsStack;
		PDFGraphicState currGS;
		while ((readToken = tokenizer.ReadNext(t, kwText, var)))
		{
			++tokenNumber;
			if (t == ePdfContentsType_Variant)
			{
				args.append(var);
			}
			else if (t == ePdfContentsType_Keyword)
			{
				QString kw(kwText);
				switch(kwNameMap.value(kw, KW_Undefined))
				{
				case KW_q:
					gsStack.push(currGS);
					break;
				case KW_Q:
					currGS = gsStack.pop();
					break;
				case KW_cm:
					{
					if (args.size() == 6)
					{
						double mt[6];
						for (int i=0; i<6; ++i)
						{
							mt[i] = args[i].GetReal();
						}
						QMatrix transMatrix(mt[0], mt[1], mt[2], mt[3], mt[4], mt[5]);
						currGS.ctm = transMatrix*currGS.ctm;
					}
					}
					break;
				case KW_w:
					currGS.lineWidth = args[0].GetReal();
					break;
				case KW_J:
					currGS.lineCap = args[0].GetNumber();
					break;
				case KW_j:
					currGS.lineJoin = args[0].GetNumber();
					break;
				case KW_M:
					currGS.lineJoin = args[0].GetReal();
					break;
				case KW_d:
					{
					currGS.dashPattern.first.clear();
					PdfArray dashArr = args[0].GetArray();
					for (int i=0; i<dashArr.size(); ++i)
						currGS.dashPattern.first.append(dashArr[i].GetNumber());
					currGS.dashPattern.second = args[0].GetNumber();
					}
					break;
				case KW_g:
					if (!usedColorSpaces.contains(CS_DeviceGray))
						usedColorSpaces.append(CS_DeviceGray);
					currGS.fillCS = CS_DeviceGray;
					currGS.fillColor.clear();
					currGS.fillColor.append(args[0].GetReal());
					break;
				case KW_G:
					if (!usedColorSpaces.contains(CS_DeviceGray))
						usedColorSpaces.append(CS_DeviceGray);
					currGS.strokeCS = CS_DeviceGray;
					currGS.strokeColor.clear();
					currGS.strokeColor.append(args[0].GetReal());
					break;
				case KW_rg:
					if (!usedColorSpaces.contains(CS_DeviceRGB))
						usedColorSpaces.append(CS_DeviceRGB);
					currGS.fillCS = CS_DeviceRGB;
					currGS.fillColor.clear();
					for (int i=0; i<args.size(); ++i)
						currGS.fillColor.append(args[i].GetReal());
					break;
				case KW_RG:
					if (!usedColorSpaces.contains(CS_DeviceRGB))
						usedColorSpaces.append(CS_DeviceRGB);
					currGS.strokeCS = CS_DeviceRGB;
					currGS.strokeColor.clear();
					for (int i=0; i<args.size(); ++i)
						currGS.strokeColor.append(args[i].GetReal());
					break;
				case KW_k:
					if (!usedColorSpaces.contains(CS_DeviceCMYK))
						usedColorSpaces.append(CS_DeviceCMYK);
					currGS.fillCS = CS_DeviceCMYK;
					currGS.fillColor.clear();
					for (int i=0; i<args.size(); ++i)
						currGS.fillColor.append(args[i].GetReal());
					break;
				case KW_K:
					if (!usedColorSpaces.contains(CS_DeviceCMYK))
						usedColorSpaces.append(CS_DeviceCMYK);
					currGS.strokeCS = CS_DeviceCMYK;
					currGS.strokeColor.clear();
					for (int i=0; i<args.size(); ++i)
						currGS.strokeColor.append(args[i].GetReal());
					break;
				case KW_cs:
					{
					if (args.size() == 1 && args[0].IsName())
					{
						if (args[0].GetName() == "DeviceGray")
						{
							currGS.fillCS = CS_DeviceGray;
							currGS.fillColor.clear();
							currGS.fillColor.append(0);
							if (!usedColorSpaces.contains(CS_DeviceGray))
								usedColorSpaces.append(CS_DeviceGray);
						}
						else if (args[0].GetName() == "DeviceRGB")
						{
							currGS.fillCS = CS_DeviceRGB;
							currGS.fillColor.clear();
							for (int i=0; i<3; ++i)
								currGS.fillColor.append(0);
							if (!usedColorSpaces.contains(CS_DeviceRGB))
								usedColorSpaces.append(CS_DeviceRGB);
						}
						else if (args[0].GetName() == "DeviceCMYK")
						{
							currGS.fillCS = CS_DeviceCMYK;
							currGS.fillColor.clear();
							for (int i=0; i<3; ++i)
								currGS.fillColor.append(0);
							currGS.fillColor.append(1);
							if (!usedColorSpaces.contains(CS_DeviceCMYK))
								usedColorSpaces.append(CS_DeviceCMYK);
						}
						else if (args[0].GetName() == "Pattern")
						{
							currGS.fillCS = CS_Pattern;
							if (!usedColorSpaces.contains(CS_Pattern))
								usedColorSpaces.append(CS_Pattern);
						}
						else
						{
							if (processedNamedCS.contains(args[0].GetName()))
							{
								currGS.fillCS = processedNamedCS.value(args[0].GetName());
							}
							else
							{
								if (colorSpaceRes && colorSpaceRes->GetIndirectKey(args[0].GetName()))
								{
									PdfObject* csEntry = colorSpaceRes->GetIndirectKey(args[0].GetName());
									PDFColorSpace retval = getCSType(csEntry);
									if (retval != CS_Unknown && !usedColorSpaces.contains(retval))
										usedColorSpaces.append(retval);
									currGS.fillCS = retval;
									processedNamedCS.insert(args[0].GetName(), retval);
								}
								else
								{
									qDebug() << "Supplied colorspace is undefined!";
									return false;
								}
							}
						}
					}
					else
					{
						qDebug() << "Wrong syntax in specifying color space!";
						return false;
					}
					}
					break;
				case KW_CS:
					{
					if (args.size() == 1 && args[0].IsName())
					{
						if (args[0].GetName() == "DeviceGray")
						{
							currGS.strokeCS = CS_DeviceGray;
							currGS.strokeColor.clear();
							currGS.strokeColor.append(0);
							if (!usedColorSpaces.contains(CS_DeviceGray))
								usedColorSpaces.append(CS_DeviceGray);
						}
						else if (args[0].GetName() == "DeviceRGB")
						{
							currGS.fillCS = CS_DeviceRGB;
							currGS.strokeColor.clear();
							for (int i=0; i<3; ++i)
								currGS.strokeColor.append(0);
							if (!usedColorSpaces.contains(CS_DeviceRGB))
								usedColorSpaces.append(CS_DeviceRGB);
						}
						else if (args[0].GetName() == "DeviceCMYK")
						{
							currGS.fillCS = CS_DeviceCMYK;
							currGS.strokeColor.clear();
							for (int i=0; i<3; ++i)
								currGS.strokeColor.append(0);
							currGS.strokeColor.append(1);
							if (!usedColorSpaces.contains(CS_DeviceCMYK))
								usedColorSpaces.append(CS_DeviceCMYK);
						}
						else if (args[0].GetName() == "Pattern")
						{
							currGS.fillCS = CS_Pattern;
							if (!usedColorSpaces.contains(CS_Pattern))
								usedColorSpaces.append(CS_Pattern);
						}
						else
						{
							if (processedNamedCS.contains(args[0].GetName()))
							{
								currGS.strokeCS = processedNamedCS.value(args[0].GetName());
							}
							else
							{
								if (colorSpaceRes && colorSpaceRes->GetIndirectKey(args[0].GetName()))
								{
									PdfObject* csEntry = colorSpaceRes->GetIndirectKey(args[0].GetName());
									PDFColorSpace retval = getCSType(csEntry);
									if (retval != CS_Unknown && !usedColorSpaces.contains(retval))
										usedColorSpaces.append(retval);
									currGS.strokeCS = retval;
									processedNamedCS.insert(args[0].GetName(), retval);
								}
								else
								{
									qDebug() << "Supplied colorspace is undefined!";
									return false;
								}
							}
						}
					}
					else
					{
						qDebug() << "Wrong syntax in specifying color space!";
						return false;
					}
					}
					break;
				case KW_sc:
					currGS.fillColor.clear();
					for (int i=0; i<args.size(); ++i)
						currGS.fillColor.append(args[i].GetReal());
					break;
				case KW_SC:
					currGS.strokeColor.clear();
					for (int i=0; i<args.size(); ++i)
						currGS.strokeColor.append(args[i].GetReal());
					break;
				case KW_scn:
					currGS.fillColor.clear();
					for (int i=0; i<args.size(); ++i)
					{
						if (args[i].IsReal() || args[i].IsNumber())
							currGS.fillColor.append(args[i].GetReal());
					}
					break;
				case KW_SCN:
					currGS.strokeColor.clear();
					for (int i=0; i<args.size(); ++i)
					{
						if (args[i].IsReal() || args[i].IsNumber())
							currGS.strokeColor.append(args[i].GetReal());
					}
					break;
				case KW_Do: // image or form XObject
					{
					if (!processedNamedXObj.contains(args[0].GetName()))
					{
						if (args.size() == 1 && args[0].IsName() && xObjects)
						{
							PdfObject* xObject = xObjects->GetIndirectKey(args[0].GetName());
							PdfObject* subtypeObject = xObject?xObject->GetIndirectKey("Subtype"):NULL;
							if (subtypeObject && subtypeObject->IsName())
							{
								if (subtypeObject->GetName() == "Image")
								{
									PdfObject* imgColorSpace = xObject->GetIndirectKey("ColorSpace");
									if (imgColorSpace)
									{
										PDFColorSpace retval = getCSType(imgColorSpace);
										if (retval != CS_Unknown && !usedColorSpaces.contains(retval))
											usedColorSpaces.append(retval);
									}
									PdfObject* sMaskObj = xObject->GetIndirectKey("SMask");
									if (sMaskObj)
										hasTransparency = true;
									PDFImage img;
									img.imgName = args[0].GetName().GetEscapedName().c_str();
									double width = xObject->GetIndirectKey("Width")->GetReal();
									double height = xObject->GetIndirectKey("Height")->GetReal();
									img.dpiX = qRound(width/(currGS.ctm.m11()/72));
									img.dpiY = qRound(height/(currGS.ctm.m22()/72));
									imgs.append(img);
								}
								else if (subtypeObject->GetName() == "Form")
								{
									PdfXObject xObj(xObject);
									inspectCanvas(&xObj, usedColorSpaces, hasTransparency, usedFonts, imgs); // recursive call
								}
							}
							else
							{
								qDebug() << "Supplied external object is undefined!";
								return false;
							}
							processedNamedXObj.append(args[0].GetName());
						}
						else
						{
							qDebug() << "Wrong syntax for Do operator or there's no XObject defined!";
							return false;
						}

					}
					}
					break;
				case KW_BI:
					inlineImgDict = true;
					break;
				case KW_ID:
					if (inlineImgDict)
					{
						PdfName colorspace("ColorSpace");
						PdfName cs("CS");
						if (args.contains(colorspace) || args.contains(cs))
						{
							int csIdx = args.contains(colorspace)?args.indexOf(colorspace):args.indexOf(cs);
							if (args[csIdx+1].IsName())
							{
								PdfName csName = args[csIdx+1].GetName();
								if ((csName == "G" || csName == "DeviceGray") && !usedColorSpaces.contains(CS_DeviceGray))
									usedColorSpaces.append(CS_DeviceGray);
								else if ((csName == "RGB" || csName == "DeviceRGB") && !usedColorSpaces.contains(CS_DeviceRGB))
									usedColorSpaces.append(CS_DeviceRGB);
								else if ((csName == "CMYK" || csName == "DeviceCMYK") && !usedColorSpaces.contains(CS_DeviceCMYK))
									usedColorSpaces.append(CS_DeviceCMYK);
								else if (!processedNamedCS.contains(csName))
								{
									if (colorSpaceRes && colorSpaceRes->GetIndirectKey(csName))
									{
										PdfObject* csEntry = colorSpaceRes->GetIndirectKey(csName);
										if (csEntry)
										{
											PDFColorSpace retval = getCSType(csEntry);
											if (retval != CS_Unknown && !usedColorSpaces.contains(retval))
												usedColorSpaces.append(retval);
											processedNamedCS.insert(csName, retval);
										}
									}
									else
									{
										qDebug() << "Supplied colorspace for inline image is undefined!";
										return false;
									}
								}
							}
						}
						PdfName height("Height");
						PdfName h("H");
						PdfName width("Width");
						PdfName w("W");
						if ((args.contains(height) || args.contains(h)) && (args.contains(width) || args.contains(w)))
						{
							int heightIdx = args.contains(height)?args.indexOf(height):args.indexOf(h);
							int widthIdx = args.contains(width)?args.indexOf(width):args.indexOf(w);
							double height = args[heightIdx+1].GetReal();
							double width = args[widthIdx+1].GetReal();
							PDFImage img;
							img.imgName = "Inline Image";
							img.dpiX = qRound(width/(currGS.ctm.m11()/72));
							img.dpiY = qRound(height/(currGS.ctm.m22()/72));
							imgs.append(img);
						}
						inlineImgDict = false;
					}
					break;
				case KW_gs:
					{
					if (!processedNamedGS.contains(args[0].GetName()))
					{
						if (args.size() == 1 && args[0].IsName() && extGState)
						{
							PdfObject* extGStateObj = extGState->GetIndirectKey(args[0].GetName());
							if (extGStateObj)
							{
								inspectExtGStateObj(extGStateObj, usedColorSpaces, hasTransparency, usedFonts, currGS);
							}
							else
							{
								qDebug() << "Named graphic state used with gs operator is undefined in current ExtGState";
								return false;
							}
							processedNamedGS.append(args[0].GetName());
						}
						else
						{
							qDebug() << "Wrong syntax in applying extended graphic state (gs operator) or there's no ExtGState defined!";
							return false;
						}
					}
					}
					break;
				case KW_Tf:
					{
					if (processedNamedFont.contains(args[0].GetName()))
					{
						currGS.font.first = processedNamedFont.value(args[0].GetName());
						currGS.font.second = args[1].GetReal();
					}
					else
					{
						if (args.size() == 2 && args[0].IsName() && fontRes)
						{
							PdfObject* fontObj = fontRes->GetIndirectKey(args[0].GetName());
							if (fontObj)
							{
								PDFFont retval = getFontInfo(fontObj);
								usedFonts.append(retval);
								processedNamedFont.insert(args[0].GetName(), retval);
								currGS.font.first = retval;
								currGS.font.second = args[1].GetReal();
							}
							else
							{
								qDebug() << "The specified font cannot be found in current Resources!";
								return false;
							}
						}
						else
						{
							qDebug() << "Wrong syntax in use of Tf operator or there's no Font defined in current Resources dictionary!";
							return false;
						}
					}
					}
					break;
				case KW_Undefined:
				default:
					break;
				}
				args.clear();
			}
		}
	}
	catch (PdfError & e)
	{
		qDebug() << "Error in parsing content stream";
		e.PrintErrorMsg();
		return false;
	}
	return true;
}
Example #10
0
void PdfParser::ReadXRefStreamContents( pdf_long lOffset, bool bReadOnlyTrailer )
{
    char*       pBuffer;
    char*       pStart;
    pdf_long        lBufferLen;
    long long        lSize     = 0;
    PdfVariant  vWArray;
    PdfObject*  pObj;

    long        nW[W_ARRAY_SIZE] = { 0, 0, 0 };
    int         i;

    m_device.Device()->Seek( lOffset );

    PdfParserObject xrefObject( m_vecObjects, m_device, m_buffer );
    // Ignore the encryption in the XREF as the XREF stream must no be encrypted (see PDF Reference 3.4.7)
    xrefObject.ParseFile( NULL );

    if( !xrefObject.GetDictionary().HasKey( PdfName::KeyType ) )
    {
        PODOFO_RAISE_ERROR( ePdfError_NoXRef );
    }

    pObj = xrefObject.GetDictionary().GetKey( PdfName::KeyType );
    if( !pObj->IsName() || ( pObj->GetName() != "XRef" ) )
    {
        PODOFO_RAISE_ERROR( ePdfError_NoXRef );
    }

    if( !m_pTrailer )
        m_pTrailer = new PdfParserObject( m_vecObjects, m_device, m_buffer );

    MergeTrailer( &xrefObject );

    if( bReadOnlyTrailer )
        return;

    if( !xrefObject.GetDictionary().HasKey( PdfName::KeySize ) || !xrefObject.GetDictionary().HasKey( "W" ) )
    {
        PODOFO_RAISE_ERROR( ePdfError_NoXRef );
    }

    lSize   = xrefObject.GetDictionary().GetKeyAsLong( PdfName::KeySize, 0 );
    vWArray = *(xrefObject.GetDictionary().GetKey( "W" ));

    // The pdf reference states that W is always an array with 3 entries
    // all of them have to be integeres
    if( !vWArray.IsArray() || vWArray.GetArray().size() != 3 )
    {
        PODOFO_RAISE_ERROR( ePdfError_NoXRef );
    }

    for( i=0;i<W_ARRAY_SIZE;i++ )
    {
        if( !vWArray.GetArray()[i].IsNumber() )
        {
            PODOFO_RAISE_ERROR( ePdfError_NoXRef );
        }

        nW[i] = static_cast<long>(vWArray.GetArray()[i].GetNumber());
    }

    std::vector<long long> vecIndeces;
    // get the first object number in this crossref stream.
    // it is not required to have an index key though.
    if( xrefObject.GetDictionary().HasKey( "Index" ) )
    {
        // reuse vWArray!!
        vWArray = *(xrefObject.GetDictionary().GetKey( "Index" ));
        if( !vWArray.IsArray() )
        {
            PODOFO_RAISE_ERROR( ePdfError_NoXRef );
        }

        TCIVariantList it = vWArray.GetArray().begin();
        while ( it != vWArray.GetArray().end() )
        {
            vecIndeces.push_back( (*it).GetNumber() );
            ++it;
        }
    }
    else
    {
        vecIndeces.push_back( 0 );
        vecIndeces.push_back( lSize );
    }

    if( vecIndeces.size() % 2 )
    {
        PODOFO_RAISE_ERROR( ePdfError_NoXRef );
    }

    if( !xrefObject.HasStreamToParse() )
    {
        PODOFO_RAISE_ERROR( ePdfError_NoXRef );
    }

    xrefObject.GetStream()->GetFilteredCopy( &pBuffer, &lBufferLen );

    pStart        = pBuffer;
    int nCurIndex = 0;
    while( nCurIndex < static_cast<pdf_long>(vecIndeces.size()) && pBuffer - pStart < lBufferLen )
    {
        int nFirstObj = static_cast<int>(vecIndeces[nCurIndex]);
        long long nCount    = vecIndeces[nCurIndex+1];

        while( nCount-- && pBuffer - pStart < lBufferLen )
        {
            ReadXRefStreamEntry( pBuffer, lBufferLen, nW, nFirstObj++ );
            pBuffer += (nW[0] + nW[1] + nW[2]);
        }

        nCurIndex += 2;
    }
    free( pStart );

    if( xrefObject.GetDictionary().HasKey("Prev") )
    {
        lOffset = static_cast<pdf_long>(xrefObject.GetDictionary().GetKeyAsLong( "Prev", 0 ));
        ReadXRefStreamContents( lOffset, bReadOnlyTrailer );
    }
}