vector<string> PDFModifiedPage::WriteModifiedResourcesDict(PDFParser* inParser,PDFDictionary* inResourcesDictionary,ObjectsContext& inObjectContext,PDFDocumentCopyingContext* inCopyingContext) { vector<string> formResourcesNames; MapIterator<PDFNameToPDFObjectMap> resourcesDictionaryIt = inResourcesDictionary->GetIterator(); // create modified page object DictionaryContext* dict = mWriter->GetObjectsContext().StartDictionary(); // copy all elements of the page to the new page object, but the "Contents" and "Resources" elements while(resourcesDictionaryIt.MoveNext()) { if(resourcesDictionaryIt.GetKey()->GetValue() != "XObject") { dict->WriteKey(resourcesDictionaryIt.GetKey()->GetValue()); inCopyingContext->CopyDirectObjectAsIs(resourcesDictionaryIt.GetValue()); } } // now write a new xobject entry. dict->WriteKey("XObject"); DictionaryContext* xobjectDict = inObjectContext.StartDictionary(); PDFObjectCastPtr<PDFDictionary> existingXObjectDict(inParser->QueryDictionaryObject(inResourcesDictionary,"XObject")); string imageObjectName; if(existingXObjectDict.GetPtr()) { // i'm having a very sophisticated algo here to create a new unique name. // i'm making sure it's different in one letter from any name, using a well known discrete math proof method MapIterator<PDFNameToPDFObjectMap> itExisting = existingXObjectDict->GetIterator(); unsigned long i=0; while(itExisting.MoveNext()) { string name = itExisting.GetKey()->GetValue(); xobjectDict->WriteKey(name); inCopyingContext->CopyDirectObjectAsIs(itExisting.GetValue()); imageObjectName.push_back((char)(GetDifferentChar((name.length() >= i+1) ? name[i]:0x39))); ++i; } inObjectContext.EndLine(); } PDFFormXObjectVector::iterator itForms = mContenxts.begin(); imageObjectName.push_back('_'); for(int i=0;itForms != mContenxts.end();++i,++itForms) { string formObjectName = imageObjectName + Int(i).ToString(); xobjectDict->WriteKey(formObjectName); xobjectDict->WriteObjectReferenceValue((*itForms)->GetObjectID()); formResourcesNames.push_back(formObjectName); } inObjectContext.EndDictionary(xobjectDict); inObjectContext.EndDictionary(dict); return formResourcesNames; }
void showXObjectsPerPageInfo(PDFParser& parser,PDFObjectCastPtr<PDFDictionary> xobjects) { RefCountPtr<PDFName> key; PDFObjectCastPtr<PDFIndirectObjectReference> value; MapIterator<PDFNameToPDFObjectMap> it = xobjects->GetIterator(); while(it.MoveNext()) { key = it.GetKey(); value = it.GetValue(); cout << "XObject named " << key->GetValue().c_str() << " is object " << value->mObjectID << " of type "; PDFObjectCastPtr<PDFStreamInput> xobject(parser.ParseNewObject(value->mObjectID)); PDFObjectCastPtr<PDFDictionary> xobjectDictionary(xobject->QueryStreamDictionary()); PDFObjectCastPtr<PDFName> typeOfXObject = xobjectDictionary->QueryDirectObject("Subtype"); cout << typeOfXObject->GetValue().c_str() << "\n"; } }
Handle<Value> DocumentCopyingContextDriver::GetCopiedObjects(const Arguments& args) { HandleScope scope; DocumentCopyingContextDriver* copyingContextDriver = ObjectWrap::Unwrap<DocumentCopyingContextDriver>(args.This()); if(!copyingContextDriver->CopyingContext) { ThrowException(Exception::TypeError(String::New("copying context object not initialized, create using pdfWriter.createPDFCopyingContext or PDFWriter.createPDFCopyingContextForModifiedFile"))); return scope.Close(Undefined()); } // create an object that will serve as the map Local<Object> result = Object::New(); MapIterator<ObjectIDTypeToObjectIDTypeMap> it = copyingContextDriver->CopyingContext->GetCopiedObjectsMappingIterator(); while(it.MoveNext()) result->Set(String::New(ObjectIDTypeObject(it.GetKey()).ToString().c_str()),Number::New(it.GetValue())); return scope.Close(result); }
EStatusCode DCTDecodeFilterTest::ModifyImageObject(PDFWriter* inWriter,ObjectIDType inImageObject) { EStatusCode status = eSuccess; PDFDocumentCopyingContext* modifiedFileContext = inWriter->CreatePDFCopyingContextForModifiedFile(); do { // get image source dictionary PDFObjectCastPtr<PDFStreamInput> imageStream(inWriter->GetModifiedFileParser().ParseNewObject(inImageObject)); RefCountPtr<PDFDictionary> imageDictionary(imageStream->QueryStreamDictionary()); // strt object for modified image inWriter->GetObjectsContext().StartModifiedIndirectObject(inImageObject); DictionaryContext* newImageDictionary = inWriter->GetObjectsContext().StartDictionary(); MapIterator<PDFNameToPDFObjectMap> it = imageDictionary->GetIterator(); // copy all but "Filter" and "Length" ObjectIDTypeList indirectObjects; while (it.MoveNext()) { if(it.GetKey()->GetValue() == "Filter" || it.GetKey()->GetValue() == "Length") continue; newImageDictionary->WriteKey(it.GetKey()->GetValue()); EStatusCodeAndObjectIDTypeList result = modifiedFileContext->CopyDirectObjectWithDeepCopy(it.GetValue()); if(result.first != eSuccess) { status = result.first; break; } indirectObjects.insert(indirectObjects.end(),result.second.begin(),result.second.end()); } if(status != eSuccess) break; // start image stream for this dictionary (make sure it's unfiltered) PDFStream* newImageStream = inWriter->GetObjectsContext().StartUnfilteredPDFStream(newImageDictionary); // copy source stream through read filter IByteReader* sourceImage = modifiedFileContext->GetSourceDocumentParser()->StartReadingFromStream(imageStream.GetPtr()); if(!sourceImage) { cout<<"failed to read DCT stream\n"; status = eFailure; break; } OutputStreamTraits traits(newImageStream->GetWriteStream()); status = traits.CopyToOutputStream(sourceImage); // finalize stream inWriter->GetObjectsContext().EndPDFStream(newImageStream); delete newImageStream; // late check for status so stream is deleted if(status != eSuccess) break; // copy remaining indirect objects from image stream dictionary status = modifiedFileContext->CopyNewObjectsForDirectObject(indirectObjects); } while (false); delete modifiedFileContext; return status; }
ObjectIDType DCTDecodeFilterTest::FindDCTDecodedImageObject(PDFParser* inParser) { ObjectIDType imageObject = 0; do { // find image by looking for the first image in the first page RefCountPtr<PDFDictionary> firstPage = inParser->ParsePage(0); if(!firstPage) break; PDFObjectCastPtr<PDFDictionary> resourceDictionary(inParser->QueryDictionaryObject(firstPage.GetPtr(),"Resources")); if(!resourceDictionary) break; PDFObjectCastPtr<PDFDictionary> xobjectDictionary(inParser->QueryDictionaryObject(resourceDictionary.GetPtr(), "XObject")); if(!xobjectDictionary) break; MapIterator<PDFNameToPDFObjectMap> it = xobjectDictionary->GetIterator(); while(it.MoveNext()) { if(it.GetValue()->GetType() == PDFObject::ePDFObjectIndirectObjectReference) { PDFObjectCastPtr<PDFStreamInput> image( inParser->ParseNewObject(((PDFIndirectObjectReference*)it.GetValue())->mObjectID)); RefCountPtr<PDFDictionary> imageDictionary = image->QueryStreamDictionary(); PDFObjectCastPtr<PDFName> objectType = imageDictionary->QueryDirectObject("Subtype"); if(!objectType || objectType->GetValue() != "Image") continue; RefCountPtr<PDFObject> filters = imageDictionary->QueryDirectObject("Filter"); if(!filters) break; if(filters->GetType() == PDFObject::ePDFObjectName && ((PDFName*)filters.GetPtr())->GetValue() == "DCTDecode") { imageObject = ((PDFIndirectObjectReference*)it.GetValue())->mObjectID; break; } PDFArray* filtersArray = (PDFArray*)filters.GetPtr(); if(filtersArray->GetLength() == 1) { PDFObjectCastPtr<PDFName> firstDecoder(filtersArray->QueryObject(0)); if(firstDecoder->GetValue() == "DCTDecode") { imageObject = ((PDFIndirectObjectReference*)it.GetValue())->mObjectID; break; } } } } } while (false); return imageObject; }
PDFHummus::EStatusCode PDFModifiedPage::WritePage() { EStatusCode status = EndContentContext(); // just in case someone forgot to close the latest content context do { if (status != eSuccess || !mIsDirty) { break; } // allocate an object ID for the new contents stream (for placing the form) // we first create the modified page object, so that we can define a name for the new form xobject // that is unique ObjectsContext& objectContext = mWriter->GetObjectsContext(); ObjectIDType newContentObjectID = objectContext.GetInDirectObjectsRegistry().AllocateNewObjectID(); ObjectIDType newEncapsulatingObjectID = 0; // create a copying context, so we can copy the page dictionary, and modify its contents + resources dict PDFDocumentCopyingContext* copyingContext = mWriter->CreatePDFCopyingContextForModifiedFile(); // get the page object ObjectIDType pageObjectID = copyingContext->GetSourceDocumentParser()->GetPageObjectID(mPageIndex); PDFObjectCastPtr<PDFDictionary> pageDictionaryObject = copyingContext->GetSourceDocumentParser()->ParsePage(mPageIndex); MapIterator<PDFNameToPDFObjectMap> pageDictionaryObjectIt = pageDictionaryObject->GetIterator(); // create modified page object objectContext.StartModifiedIndirectObject(pageObjectID); DictionaryContext* modifiedPageObject = mWriter->GetObjectsContext().StartDictionary(); // copy all elements of the page to the new page object, but the "Contents", "Resources" and "Annots" elements while (pageDictionaryObjectIt.MoveNext()) { if (pageDictionaryObjectIt.GetKey()->GetValue() != "Resources" && pageDictionaryObjectIt.GetKey()->GetValue() != "Contents" && pageDictionaryObjectIt.GetKey()->GetValue() != "Annots") { modifiedPageObject->WriteKey(pageDictionaryObjectIt.GetKey()->GetValue()); copyingContext->CopyDirectObjectAsIs(pageDictionaryObjectIt.GetValue()); } } // Write new annotations entry, joining existing annotations, and new ones (from links attaching or what not) if (pageDictionaryObject->Exists("Annots") || mWriter->GetDocumentContext().GetAnnotations().size() > 0) { modifiedPageObject->WriteKey("Annots"); objectContext.StartArray(); // write old annots, if any exist if(pageDictionaryObject->Exists("Annots")) { PDFObjectCastPtr<PDFArray> anArray(copyingContext->GetSourceDocumentParser()->QueryDictionaryObject(pageDictionaryObject.GetPtr(), "Annots")); SingleValueContainerIterator<PDFObjectVector> refs = anArray->GetIterator(); while (refs.MoveNext()) copyingContext->CopyDirectObjectAsIs(refs.GetItem()); } // write new annots from links ObjectIDTypeSet& annotations = mWriter->GetDocumentContext().GetAnnotations(); if (annotations.size() > 0) { ObjectIDTypeSet::iterator it = annotations.begin(); for (; it != annotations.end(); ++it) objectContext.WriteNewIndirectObjectReference(*it); } annotations.clear(); objectContext.EndArray(eTokenSeparatorEndLine); } // Write new contents entry, joining the existing contents with the new one. take care of various scenarios of the existing Contents modifiedPageObject->WriteKey("Contents"); if (!pageDictionaryObject->Exists("Contents")) { // no contents objectContext.WriteIndirectObjectReference(newContentObjectID); } else { objectContext.StartArray(); if (mEnsureContentEncapsulation) { newEncapsulatingObjectID = objectContext.GetInDirectObjectsRegistry().AllocateNewObjectID(); objectContext.WriteNewIndirectObjectReference(newEncapsulatingObjectID); } RefCountPtr<PDFObject> pageContent(copyingContext->GetSourceDocumentParser()->QueryDictionaryObject(pageDictionaryObject.GetPtr(), "Contents")); if (pageContent->GetType() == PDFObject::ePDFObjectStream) { // single content stream. must be a refrence which points to it PDFObjectCastPtr<PDFIndirectObjectReference> ref(pageDictionaryObject->QueryDirectObject("Contents")); objectContext.WriteIndirectObjectReference(ref->mObjectID, ref->mVersion); } else if (pageContent->GetType() == PDFObject::ePDFObjectArray) { PDFArray* anArray = (PDFArray*)pageContent.GetPtr(); // multiple content streams SingleValueContainerIterator<PDFObjectVector> refs = anArray->GetIterator(); PDFObjectCastPtr<PDFIndirectObjectReference> ref; while (refs.MoveNext()) { ref = refs.GetItem(); objectContext.WriteIndirectObjectReference(ref->mObjectID, ref->mVersion); } } else { // this basically means no content...or whatever. just ignore. } objectContext.WriteNewIndirectObjectReference(newContentObjectID); objectContext.EndArray(); objectContext.EndLine(); } // Write a new resource entry. copy all but the "XObject" entry, which needs to be modified. Just for kicks i'm keeping the original // form (either direct dictionary, or indirect object) ObjectIDType resourcesIndirect = 0; ObjectIDType newResourcesIndirect = 0; vector<string> formResourcesNames; modifiedPageObject->WriteKey("Resources"); if (!pageDictionaryObject->Exists("Resources")) { // check if there's inherited dict. if so - write directly as a modified version PDFObjectCastPtr<PDFDictionary> parentDict( pageDictionaryObject->Exists("Parent") ? copyingContext->GetSourceDocumentParser()->QueryDictionaryObject(pageDictionaryObject.GetPtr(), "Parent"): NULL); if(!parentDict) { formResourcesNames = WriteNewResourcesDictionary(objectContext); } else { PDFObjectCastPtr<PDFDictionary> inheritedResources = findInheritedResources(copyingContext->GetSourceDocumentParser(),parentDict.GetPtr()); if(!inheritedResources) { formResourcesNames = WriteNewResourcesDictionary(objectContext); } else { formResourcesNames = WriteModifiedResourcesDict(copyingContext->GetSourceDocumentParser(), inheritedResources.GetPtr(), objectContext, copyingContext); } } } else { // resources may be direct, or indirect. if direct, write as is, adding the new form xobject, otherwise wait till page object ends and write then PDFObjectCastPtr<PDFIndirectObjectReference> resourceDictRef(pageDictionaryObject->QueryDirectObject("Resources")); if (!resourceDictRef) { PDFObjectCastPtr<PDFDictionary> resourceDict(pageDictionaryObject->QueryDirectObject("Resources")); formResourcesNames = WriteModifiedResourcesDict(copyingContext->GetSourceDocumentParser(), resourceDict.GetPtr(), objectContext, copyingContext); } else { resourcesIndirect = resourceDictRef->mObjectID; // later will write a modified version of the resources dictionary, with the new form. // only modify the resources dict object if wasn't already modified (can happen when sharing resources dict between multiple pages). // in the case where it was alrady modified, create a new resources dictionary that's a copy, and use it instead, to avoid overwriting // the previous modification GetObjectWriteInformationResult res = objectContext.GetInDirectObjectsRegistry().GetObjectWriteInformation(resourcesIndirect); if (res.first && res.second.mIsDirty) { newResourcesIndirect = objectContext.GetInDirectObjectsRegistry().AllocateNewObjectID(); modifiedPageObject->WriteObjectReferenceValue(newResourcesIndirect); } else modifiedPageObject->WriteObjectReferenceValue(resourcesIndirect); } } objectContext.EndDictionary(modifiedPageObject); objectContext.EndIndirectObject(); if (resourcesIndirect != 0) { if (newResourcesIndirect != 0) objectContext.StartNewIndirectObject(newResourcesIndirect); else objectContext.StartModifiedIndirectObject(resourcesIndirect); PDFObjectCastPtr<PDFDictionary> resourceDict(copyingContext->GetSourceDocumentParser()->ParseNewObject(resourcesIndirect)); formResourcesNames = WriteModifiedResourcesDict(copyingContext->GetSourceDocumentParser(), resourceDict.GetPtr(), objectContext, copyingContext); objectContext.EndIndirectObject(); } // if required write encapsulation code, so that new stream is independent of graphic context of original PDFStream* newStream; PrimitiveObjectsWriter primitivesWriter; if (newEncapsulatingObjectID != 0) { objectContext.StartNewIndirectObject(newEncapsulatingObjectID); newStream = objectContext.StartPDFStream(); primitivesWriter.SetStreamForWriting(newStream->GetWriteStream()); primitivesWriter.WriteKeyword("q"); objectContext.EndPDFStream(newStream); } // last but not least, create the actual content stream object, placing the form objectContext.StartNewIndirectObject(newContentObjectID); newStream = objectContext.StartPDFStream(); primitivesWriter.SetStreamForWriting(newStream->GetWriteStream()); if (newEncapsulatingObjectID != 0) { primitivesWriter.WriteKeyword("Q"); } vector<string>::iterator it = formResourcesNames.begin(); for (; it != formResourcesNames.end(); ++it) { primitivesWriter.WriteKeyword("q"); primitivesWriter.WriteInteger(1); primitivesWriter.WriteInteger(0); primitivesWriter.WriteInteger(0); primitivesWriter.WriteInteger(1); primitivesWriter.WriteInteger(0); primitivesWriter.WriteInteger(0); primitivesWriter.WriteKeyword("cm"); primitivesWriter.WriteName(*it); primitivesWriter.WriteKeyword("Do"); primitivesWriter.WriteKeyword("Q"); } objectContext.EndPDFStream(newStream); } while (false); return status; }
METHOD_RETURN_TYPE DocumentCopyingContextDriver::GetCopiedObjects(const ARGS_TYPE& args) { CREATE_ISOLATE_CONTEXT; CREATE_ESCAPABLE_SCOPE; DocumentCopyingContextDriver* copyingContextDriver = ObjectWrap::Unwrap<DocumentCopyingContextDriver>(args.This()); if(!copyingContextDriver->CopyingContext) { THROW_EXCEPTION("copying context object not initialized, create using pdfWriter.createPDFCopyingContext or PDFWriter.createPDFCopyingContextForModifiedFile"); SET_FUNCTION_RETURN_VALUE(UNDEFINED); } // create an object that will serve as the map Local<Object> result = NEW_OBJECT; MapIterator<ObjectIDTypeToObjectIDTypeMap> it = copyingContextDriver->CopyingContext->GetCopiedObjectsMappingIterator(); while(it.MoveNext()) result->Set(NEW_STRING(ObjectIDTypeObject(it.GetKey()).ToString().c_str()),NEW_NUMBER(it.GetValue())); SET_FUNCTION_RETURN_VALUE(result); }
METHOD_RETURN_TYPE PDFDictionaryDriver::ToJSObject(const ARGS_TYPE& args) { CREATE_ISOLATE_CONTEXT; CREATE_ESCAPABLE_SCOPE; PDFDictionaryDriver* driver = ObjectWrap::Unwrap<PDFDictionaryDriver>(args.This()); Local<Object> result = NEW_OBJECT; MapIterator<PDFNameToPDFObjectMap> it = driver->TheObject->GetIterator(); while(it.MoveNext()) result->Set(NEW_STRING(it.GetKey()->GetValue().c_str()),PDFObjectDriver::CreateDriver(it.GetValue())); SET_FUNCTION_RETURN_VALUE(result); }