Example #1
0
PdfAnnotation* PdfPage::GetAnnotation( int index )
{
    PdfAnnotation* pAnnot;
    PdfReference   ref;

    PdfObject*     pObj   = this->GetAnnotationsArray( false );

    if( !(pObj && pObj->IsArray()) )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidDataType );
    }
    
    if( index < 0 && static_cast<unsigned int>(index) >= pObj->GetArray().size() )
    {
        PODOFO_RAISE_ERROR( ePdfError_ValueOutOfRange );
    }

    ref    = pObj->GetArray()[index].GetReference();
    pAnnot = m_mapAnnotations[ref];
    if( !pAnnot )
    {
        pObj = m_pObject->GetOwner()->GetObject( ref );
        if( !pObj )
        {
            PdfError::DebugMessage( "Error looking up object %i %i R\n", ref.ObjectNumber(), ref.GenerationNumber() );
            PODOFO_RAISE_ERROR( ePdfError_NoObject );
        }
     
        pAnnot = new PdfAnnotation( pObj, this );
        m_mapAnnotations[ref] = pAnnot;
    }

    return pAnnot;
}
PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas )
    : PdfTokenizer()
{
    PdfObject* pContents = pCanvas->GetContents();
    if( pContents && pContents->IsArray()  ) 
    {
        PdfArray& a = pContents->GetArray();
        for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it )
        {
            if ( !(*it).IsReference() )
            {
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" );

            }
            
            m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) );
        }
    }
    else if ( pContents && pContents->HasStream() )
    {
        m_lstContents.push_back( pContents );
    }
    else
    {
        PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" );
    }

    if( m_lstContents.size() )
    {
        SetCurrentContentsStream( m_lstContents.front() );
        m_lstContents.pop_front();
    }
}
Example #3
0
PDFFont PDFAnalyzer::getFontInfo(PdfObject* fontObj)
{
	PDFFont currFont;
	PdfObject* subtype = fontObj->GetIndirectKey("Subtype");
	if (subtype && subtype->IsName())
	{
		PdfObject* fontDesc = fontObj->GetIndirectKey("FontDescriptor");
		if (subtype->GetName() == "Type1")
			currFont.fontType = F_Type1;
		else if (subtype->GetName() == "MMType1")
			currFont.fontType = F_MMType1;
		else if (subtype->GetName() == "TrueType")
			currFont.fontType = F_TrueType;
		else if (subtype->GetName() == "Type3")
		{
			currFont.fontType = F_Type3;
			currFont.isEmbedded = true;
			fontDesc = NULL;
		}
		else if (subtype->GetName() == "Type0")
		{
			PdfObject* descendantFonts = fontObj->GetIndirectKey("DescendantFonts");
			if (descendantFonts && descendantFonts->IsArray())
			{
				PdfObject descendantFont = descendantFonts->GetArray()[0];
				descendantFont.SetOwner(descendantFonts->GetOwner());
				PdfObject* subtypeDescFont = descendantFont.GetIndirectKey("Subtype");
				fontDesc = descendantFont.MustGetIndirectKey("FontDescriptor");
				if (subtypeDescFont && subtypeDescFont->IsName())
				{
					if (subtypeDescFont->GetName() == "CIDFontType0")
						currFont.fontType = F_CIDFontType0;
					else if (subtypeDescFont->GetName() == "CIDFontType2")
						currFont.fontType = F_CIDFontType2;
				}
			}
		}
		if (fontDesc)
		{
			PdfObject* fontFile = fontDesc->GetIndirectKey("FontFile");
			PdfObject* fontFile2 = fontDesc->GetIndirectKey("FontFile2");
			PdfObject* fontFile3 = fontDesc->GetIndirectKey("FontFile3");
			if (fontFile && fontFile->HasStream())
				currFont.isEmbedded = true;
			if (fontFile2 && fontFile2->HasStream())
				currFont.isEmbedded = true;
			if (fontFile3 && fontFile3->HasStream())
			{
				currFont.isEmbedded = true;
				PdfObject* ff3Subtype = fontFile3->GetIndirectKey("Subtype");
				if (ff3Subtype && ff3Subtype->IsName() && ff3Subtype->GetName() == "OpenType")
					currFont.isOpenType = true;
			}
		}
	}
	return currFont;
}
Example #4
0
const PdfRect PdfPage::GetPageBox( const char* inBox ) const
{
    PdfRect	 pageBox;
    PdfObject*   pObj;
        
    // Take advantage of inherited values - walking up the tree if necessary
    pObj = GetInheritedKeyFromObject( inBox, m_pObject );
    
    // assign the value of the box from the array
    if ( pObj && pObj->IsArray() )
        pageBox.FromArray( pObj->GetArray() );
    
    return pageBox;
}
Example #5
0
void PdfPage::DeleteAnnotation( const PdfReference & ref )
{
    PdfAnnotation*     pAnnot;
    PdfArray::iterator it;
    PdfObject*         pObj   = this->GetAnnotationsArray( false );
    bool               bFound = false;

    // delete the annotation from the array

    if( !(pObj && pObj->IsArray()) )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidDataType );
    }

    it = pObj->GetArray().begin();
    while( it != pObj->GetArray().end() ) 
    {
        if( (*it).GetReference() == ref ) 
        {
            pObj->GetArray().erase( it );
            bFound = true;
            break;
        }

        ++it;
    }

    // if no such annotation was found
    // throw an error instead of deleting
    // another object with this reference
    if( !bFound ) 
    {
        PODOFO_RAISE_ERROR( ePdfError_NoObject );
    }

    // delete any cached PdfAnnotations
    pAnnot = m_mapAnnotations[ref];
    if( pAnnot )
    {
        delete pAnnot;
        m_mapAnnotations.erase( ref );
    }

    // delete the PdfObject in the file
    delete m_pObject->GetOwner()->RemoveObject( ref );
}
Example #6
0
void PdfPage::DeleteAnnotation( int index )
{
    PdfReference   ref;
    PdfObject*     pObj   = this->GetAnnotationsArray( false );
    
    if( !(pObj && pObj->IsArray()) )
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidDataType );
    }
    
    if( index < 0 && static_cast<unsigned int>(index) >= pObj->GetArray().size() )
    {
        PODOFO_RAISE_ERROR( ePdfError_ValueOutOfRange );
    }

    ref    = pObj->GetArray()[index].GetReference();

    this->DeleteAnnotation( ref );
}
Example #7
0
PdfObject* PdfPage::GetAnnotationsArray( bool bCreate ) const
{
    PdfObject* pObj;

    // check for it in the object itself
    if ( m_pObject->GetDictionary().HasKey( "Annots" ) ) 
    {
        pObj = m_pObject->GetIndirectKey( "Annots" );
        if( pObj && pObj->IsArray() )
            return pObj;
    }
    else if( bCreate ) 
    {
        PdfArray array;
        const_cast<PdfPage*>(this)->m_pObject->GetDictionary().AddKey( "Annots", array );
        return m_pObject->GetDictionary().GetKey( "Annots" );
    }

    return NULL;
}
void PdfDestination::Init( PdfObject* pObject, PdfDocument* pDocument )
{
    bool bValueExpected = false;
    PdfObject* pValue = NULL;

    if ( pObject->GetDataType() == ePdfDataType_Array ) 
    {
        m_array = pObject->GetArray();
        m_pObject = pObject;
    }
    else if( pObject->GetDataType() == ePdfDataType_String ) 
    {
        PdfNamesTree* pNames = pDocument->GetNamesTree( ePdfDontCreateObject );
        if( !pNames ) 
        {
            PODOFO_RAISE_ERROR( ePdfError_NoObject );
        }
            
        pValue = pNames->GetValue( "Dests", pObject->GetString() );
        bValueExpected = true;
    }
    else if( pObject->GetDataType() == ePdfDataType_Name )
    {
        PdfMemDocument* pMemDoc = dynamic_cast<PdfMemDocument*>(pDocument);
        if ( !pMemDoc )
        { 
            PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidHandle,
                "For reading from a document, only use PdfMemDocument." );
        }

        PdfObject* pCatalog = pMemDoc->GetCatalog();
        if ( !pCatalog )
        {
            PODOFO_RAISE_ERROR( ePdfError_NoObject );
        }
 
        PdfObject* pDests = pCatalog->GetIndirectKey( PdfName( "Dests" ) );
        if( !pDests )
        {
            // The error code has been chosen for its distinguishability.
            PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidKey,
                "No PDF-1.1-compatible destination dictionary found." );
        }
        pValue = pDests->GetIndirectKey( pObject->GetName() );
        bValueExpected = true;
    } 
    else
    {
        PdfError::LogMessage( eLogSeverity_Error, "Unsupported object given to"
            " PdfDestination::Init of type %s", pObject->GetDataTypeString() );
        m_array = PdfArray(); // needed to prevent crash on method calls
        // needed for GetObject() use w/o checking its return value for NULL
        m_pObject = pDocument->GetObjects()->CreateObject( m_array );
    }
    if ( bValueExpected )
    {
        if( !pValue ) 
        {
            PODOFO_RAISE_ERROR( ePdfError_InvalidName );
        }

        if( pValue->IsArray() ) 
            m_array = pValue->GetArray();
        else if( pValue->IsDictionary() )
            m_array = pValue->GetDictionary().GetKey( "D" )->GetArray();
        m_pObject = pValue;
    }
}
Example #9
0
    void PDFProcessor::start ()
    {
        int nNum = 0;
        try
        {
            PdfObject*  pObj  = NULL;

            // open document
            qDebug() << "Opening file: " << filename.toStdString().c_str();
            PdfMemDocument document( filename.toStdString().c_str() );

//    	    m_pszOutputDirectory = const_cast<char*>(pszOutput);

            TCIVecObjects it = document.GetObjects().begin();

            while( it != document.GetObjects().end() )
            {
                if( (*it)->IsDictionary() )
                {
                    PdfObject* pObjType = (*it)->GetDictionary().GetKey( PdfName::KeyType );
                    PdfObject* pObjSubType = (*it)->GetDictionary().GetKey( PdfName::KeySubtype );
                    if( ( pObjType && pObjType->IsName() && ( pObjType->GetName().GetName() == "XObject" ) ) ||
                            ( pObjSubType && pObjSubType->IsName() && ( pObjSubType->GetName().GetName() == "Image" ) ) )
                    {
                        pObj = (*it)->GetDictionary().GetKey( PdfName::KeyFilter );
                        if( pObj && pObj->IsArray() && pObj->GetArray().GetSize() == 1 &&
                                pObj->GetArray()[0].IsName() && (pObj->GetArray()[0].GetName().GetName() == "DCTDecode") )
                            pObj = &pObj->GetArray()[0];

                        std::string filterName = pObj->GetName().GetName();
                        bool processed = 0;

                        if( pObj && pObj->IsName() && ( filterName == "DCTDecode" ) )
                        {
                            // The only filter is JPEG -> create a JPEG file
                            qDebug() << "JPG found.\n";
                            processed = true;
                            nNum++;
                        }

                        if( pObj && pObj->IsName() && ( filterName == "JPXDecode" ) )
                        {
                            // The only filter is JPEG -> create a JPEG file
                            qDebug() << "JPG found.\n";
                            processed = true;
                            nNum++;
                        }

                        if( pObj && pObj->IsName() && ( filterName == "FlateDecode" ) )
                        {
                            // The only filter is JPEG -> create a JPEG file
                            qDebug() << "JPG found.\n";
                            processed = true;
                            nNum++;
                        }


                        // else we found something strange, we do not care about it for now.
                        if (processed == false)
                        {
                            qDebug() << "Unknown image type found:" << QString::fromStdString(filterName) << "\n";
                            nNum++;
                        }

                        document.FreeObjectMemory( *it );
                    }
                }

                ++it;
            }


        }
        catch( PdfError & e )
        {
            qDebug() << "Error: An error ocurred during processing the pdf file:" << e.GetError();
            e.PrintErrorMsg();
            return;// e.GetError();
        }

        // TODO: statistics of no of images etc
//      nNum = extractor.GetNumImagesExtracted();

        qDebug() << "Extracted " << nNum << " images successfully from the PDF file.\n";
    }
void PdfMemStream::FlateCompress()
{
    PdfObject*        pObj;
    PdfVariant        vFilter( PdfName("FlateDecode" ) );
    PdfVariant        vFilterList;
    PdfArray          tFilters;

    PdfArray::const_iterator tciFilters;
    
    if( !m_lLength )
        return; // ePdfError_ErrOk

    // TODO: Handle DecodeParms
    if( m_pParent->GetDictionary().HasKey( "Filter" ) )
    {
        pObj = m_pParent->GetIndirectKey( "Filter" );

        if( pObj->IsName() )
        {
            if( pObj->GetName() != "DCTDecode" && pObj->GetName() != "FlateDecode" )
            {
                tFilters.push_back( vFilter );
                tFilters.push_back( *pObj );
            }
        }
        else if( pObj->IsArray() )
        {
            tciFilters = pObj->GetArray().begin();

            while( tciFilters != pObj->GetArray().end() )
            {
                if( (*tciFilters).IsName() )
                {
                    // do not compress DCTDecoded are already FlateDecoded streams again
                    if( (*tciFilters).GetName() == "DCTDecode" || (*tciFilters).GetName() == "FlateDecode" )
                    {
                        return;
                    }
                }

                ++tciFilters;
            }

            tFilters.push_back( vFilter );

            tciFilters = pObj->GetArray().begin();

            while( tciFilters != pObj->GetArray().end() )
            {
                tFilters.push_back( (*tciFilters) );
                
                ++tciFilters;
            }
        }
        else
            return;

        vFilterList = PdfVariant( tFilters );
        m_pParent->GetDictionary().AddKey( "Filter", vFilterList );

        FlateCompressStreamData(); // throws an exception on error
    }
    else
    {
        m_pParent->GetDictionary().AddKey( "Filter", PdfName( "FlateDecode" ) );
        FlateCompressStreamData();
    }
}
Example #11
0
void PDFAnalyzer::inspectExtGStateObj(PdfObject* extGStateObj, QList<PDFColorSpace> & usedColorSpaces, bool & hasTransparency, QList<PDFFont> & usedFonts, PDFGraphicState & currGS)
{
	PdfObject* bmObj = extGStateObj->GetIndirectKey("BM");
	if (bmObj && bmObj->IsName())
	{
		currGS.blendModes.clear();
		currGS.blendModes.append(bmObj->GetName().GetEscapedName().c_str());
		if (!(bmObj->GetName() == "Normal" || bmObj->GetName() == "Compatible"))
			hasTransparency = true;
	}
	else if (bmObj && bmObj->IsArray())
	{
		PdfArray arr = bmObj->GetArray();
		currGS.blendModes.clear();
		for(int i=0; i<arr.GetSize(); ++i)
			currGS.blendModes.append(arr[i].GetName().GetEscapedName().c_str());
		if (arr[0].IsName() && !(arr[0].GetName() == "Normal" || arr[0].GetName() == "Compatible"))
			hasTransparency = true;
	}
	PdfObject* caObj = extGStateObj->GetIndirectKey("ca");
	if (caObj && (caObj->IsReal() || caObj->IsNumber()))
	{
		currGS.fillAlphaConstant = caObj->GetReal();
		if (caObj->GetReal() < 1)
			hasTransparency = true;
	}
	PdfObject* cAObj = extGStateObj->GetIndirectKey("CA");
	if (cAObj && (cAObj->IsReal() || cAObj->IsNumber()))
	{
		if (cAObj->GetReal() < 1)
		hasTransparency = true;
	}
	PdfObject* sMaskObj = extGStateObj->GetIndirectKey("SMask");
	if (sMaskObj && !(sMaskObj->IsName() && sMaskObj->GetName() == "None"))
		hasTransparency = true;
	PdfObject* fontObj = extGStateObj->GetIndirectKey("Font");
	if (fontObj && fontObj->IsArray())
	{
		PdfArray arr = fontObj->GetArray();
		if (arr[0].IsReference())
		{
			PdfReference ref = arr[0].GetReference();
			PdfObject* fontObject = m_doc->GetObjects().GetObject(ref);
			if (fontObject)
			{
				PDFFont font = getFontInfo(fontObject);
				usedFonts.append(font);
				currGS.font.first = font;
				currGS.font.second = arr[1].GetReal();
			}

		}
	}
	PdfObject* lwObj = extGStateObj->GetIndirectKey("LW");
	if (lwObj)
		currGS.lineWidth = lwObj->GetReal();
	PdfObject* lcObj = extGStateObj->GetIndirectKey("LC");
	if (lcObj)
		currGS.lineCap = lcObj->GetNumber();
	PdfObject* ljObj = extGStateObj->GetIndirectKey("LJ");
	if (ljObj)
		currGS.lineJoin = ljObj->GetNumber();
	PdfObject* mlObj = extGStateObj->GetIndirectKey("ML");
	if (mlObj)
		currGS.miterLimit = mlObj->GetReal();
	PdfObject* dObj = extGStateObj->GetIndirectKey("D");
	if (dObj)
	{
		PdfArray dashArr = dObj->GetArray()[0];
		currGS.dashPattern.first.clear();
		for (int i=0; i<dashArr.GetSize(); ++i)
			currGS.dashPattern.first.append(dashArr[i].GetNumber());
		currGS.dashPattern.second = dObj->GetArray()[1].GetNumber();
	}
}
PdfContentsTokenizer::PdfContentsTokenizer( PdfCanvas* pCanvas )
    : PdfTokenizer(), m_readingInlineImgData(false)
{
    if( !pCanvas ) 
    {
        PODOFO_RAISE_ERROR( ePdfError_InvalidHandle );
    }

    PdfObject* pContents = pCanvas->GetContents();
    if( pContents && pContents->IsArray()  )
    {
        PdfArray& a = pContents->GetArray();
        for ( PdfArray::iterator it = a.begin(); it != a.end() ; ++it )
        {
            if ( !(*it).IsReference() )
            {
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array contained non-references" );

            }

            if ( !pContents->GetOwner()->GetObject( (*it).GetReference() ) )
            {
                // some damaged PDFs may have dangling references
                PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "/Contents array NULL reference" );
            }

            m_lstContents.push_back( pContents->GetOwner()->GetObject( (*it).GetReference() ) );
        }
    }
    else if ( pContents && pContents->HasStream() )
    {
        m_lstContents.push_back( pContents );
    }
    else if ( pContents && pContents->IsDictionary() )
    {
        m_lstContents.push_back( pContents );
        PdfError::LogMessage(eLogSeverity_Information,
                  "PdfContentsTokenizer: found canvas-dictionary without stream => empty page");
        // OC 18.09.2010 BugFix: Found an empty page in a PDF document:
        //    103 0 obj
        //    <<
        //    /Type /Page
        //    /MediaBox [ 0 0 595 842 ]
        //    /Parent 3 0 R
        //    /Resources <<
        //    /ProcSet [ /PDF ]
        //    >>
        //    /Rotate 0
        //    >>
        //    endobj
    }
    else
    {
        PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidDataType, "Page /Contents not stream or array of streams" );
    }

    if( m_lstContents.size() )
    {
        SetCurrentContentsStream( m_lstContents.front() );
        m_lstContents.pop_front();
    }
}