Beispiel #1
0
std::list<FormImage> extract(const std::string& filename, Form& form)
{
    std::list<FormImage> images;
    ColorSpace colorspace;
    PoDoFo::pdf_int64 componentbits;
    PoDoFo::PdfObject* obj = nullptr;
    PoDoFo::PdfObject* color = nullptr;
    PoDoFo::PdfObject* component = nullptr;
    PoDoFo::PdfMemDocument document(filename.c_str());
    PoDoFo::TCIVecObjects it = document.GetObjects().begin();

    while (it != document.GetObjects().end())
    {
        if ((*it)->IsDictionary())
        {
            PoDoFo::PdfObject* objType = (*it)->GetDictionary().GetKey(PoDoFo::PdfName::KeyType);
            PoDoFo::PdfObject* objSubType = (*it)->GetDictionary().GetKey(PoDoFo::PdfName::KeySubtype);

            if ((objType    && objType->IsName()    && objType->GetName().GetName() == "XObject") ||
                (objSubType && objSubType->IsName() && objSubType->GetName().GetName() == "Image" ))
            {
                // Colorspace
                color = (*it)->GetDictionary().GetKey(PoDoFo::PdfName("ColorSpace"));
                colorspace = ColorSpace::Unknown;

                if (color && color->IsReference())
                    color = document.GetObjects().GetObject(color->GetReference());

                // Follow ICCBased reference to the Alternate colorspace
                if (color && color->IsArray() && color->GetArray().GetSize() == 2 &&
                        // First item is ICCBased
                        color->GetArray()[0].IsName() &&
                        color->GetArray()[0].GetName().GetName() == "ICCBased" &&
                        // Second item is reference to color space
                        color->GetArray()[1].IsReference())
                {
                    color = document.GetObjects().GetObject(color->GetArray()[1].GetReference());

                    if (color)
                        color = color->GetDictionary().GetKey(PoDoFo::PdfName("Alternate"));
                }

                // Check if either RGB or Grayscale (either the specified
                // colorspace or the alternate if using an ICCBased colorspace)
                if (color && color->IsName())
                {
                    std::string col = color->GetName().GetName();

                    if (col == "DeviceRGB")
                        colorspace = ColorSpace::RGB;
                    else if (col == "DeviceGray")
                        colorspace = ColorSpace::Gray;
                }

                // Bits per component
                component = (*it)->GetDictionary().GetKey(PoDoFo::PdfName("BitsPerComponent"));
                componentbits = 8;

                if (component && component->IsNumber())
                    componentbits = component->GetNumber();

                // Stream
                obj = (*it)->GetDictionary().GetKey(PoDoFo::PdfName::KeyFilter);

                // JPEG and Flate are in another array
                if (obj && obj->IsArray() && obj->GetArray().GetSize() == 1 &&
                    ((obj->GetArray()[0].IsName() && obj->GetArray()[0].GetName().GetName() == "DCTDecode") ||
                     (obj->GetArray()[0].IsName() && obj->GetArray()[0].GetName().GetName() == "FlateDecode")))
                    obj = &obj->GetArray()[0];

                Pixels pixels;

                if (obj && obj->IsName())
                {
                    std::string name = obj->GetName().GetName();

                    if (name == "DCTDecode")
                        pixels = readPDFImage(*it, PixelType::JPG, colorspace, componentbits, filename, form);
                    else if (name == "CCITTFaxDecode")
                        pixels = readPDFImage(*it, PixelType::TIF, colorspace, componentbits, filename, form);
                    // PNM is the default
                    //else if (name == "FlateDecode")
                    //  pixels = readPDFImage(*it, PixelType::PNM, colorspace, componentbits, filename, form);
                    else
                        pixels = readPDFImage(*it, PixelType::PNM, colorspace, componentbits, filename, form);
                }
                else
                {
                    pixels = readPDFImage(*it, PixelType::PNM, colorspace, componentbits, filename, form);
                }

                document.FreeObjectMemory(*it);

                if (pixels.isLoaded())
                    images.push_back(FormImage(form, std::move(pixels)));
            }
        }

        ++it;
    }

    return images;
}