const SkString escape_xml(const SkString& input, const char* before = nullptr, const char* after = nullptr) { if (input.size() == 0) { return input; } // "&" --> "&" and "<" --> "<" // text is assumed to be in UTF-8 // all strings are xml content, not attribute values. size_t beforeLen = before ? strlen(before) : 0; size_t afterLen = after ? strlen(after) : 0; int extra = count_xml_escape_size(input); SkString output(input.size() + extra + beforeLen + afterLen); char* out = output.writable_str(); if (before) { strncpy(out, before, beforeLen); out += beforeLen; } static const char kAmp[] = "&"; static const char kLt[] = "<"; for (size_t i = 0; i < input.size(); ++i) { if (input[i] == '&') { strncpy(out, kAmp, strlen(kAmp)); out += strlen(kAmp); } else if (input[i] == '<') { strncpy(out, kLt, strlen(kLt)); out += strlen(kLt); } else { *out++ = input[i]; } } if (after) { strncpy(out, after, afterLen); out += afterLen; } // Validate that we haven't written outside of our string. SkASSERT(out == &output.writable_str()[output.size()]); *out = '\0'; return output; }
sk_sp<SkPDFObject> SkPDFMetadata::MakeXMPObject( const SkDocument::PDFMetadata& metadata, const UUID& doc, const UUID& instance) { static const char templateString[] = "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, " "2012/08/23-13:03:03\">\n" "<rdf:RDF " "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" "<rdf:Description rdf:about=\"\"\n" " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n" " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n" " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n" " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n" "<pdfaid:part>2</pdfaid:part>\n" "<pdfaid:conformance>B</pdfaid:conformance>\n" "%s" // ModifyDate "%s" // CreateDate "%s" // xmp:CreatorTool "<dc:format>application/pdf</dc:format>\n" "%s" // dc:title "%s" // dc:description "%s" // author "%s" // keywords "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n" "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n" "%s" // pdf:Producer "%s" // pdf:Keywords "</rdf:Description>\n" "</rdf:RDF>\n" "</x:xmpmeta>\n" // Note: the standard suggests 4k of padding. "<?xpacket end=\"w\"?>\n"; SkString creationDate; SkString modificationDate; if (metadata.fCreation.fEnabled) { SkString tmp; metadata.fCreation.fDateTime.toISO8601(&tmp); SkASSERT(0 == count_xml_escape_size(tmp)); // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape creationDate = SkStringPrintf("<xmp:CreateDate>%s</xmp:CreateDate>\n", tmp.c_str()); } if (metadata.fModified.fEnabled) { SkString tmp; metadata.fModified.fDateTime.toISO8601(&tmp); SkASSERT(0 == count_xml_escape_size(tmp)); modificationDate = SkStringPrintf( "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str()); } SkString title = escape_xml(metadata.fTitle, "<dc:title><rdf:Alt><rdf:li xml:lang=\"x-default\">", "</rdf:li></rdf:Alt></dc:title>\n"); SkString author = escape_xml(metadata.fAuthor, "<dc:creator><rdf:Bag><rdf:li>", "</rdf:li></rdf:Bag></dc:creator>\n"); // TODO: in theory, XMP can support multiple authors. Split on a delimiter? SkString subject = escape_xml( metadata.fSubject, "<dc:description><rdf:Alt><rdf:li xml:lang=\"x-default\">", "</rdf:li></rdf:Alt></dc:description>\n"); SkString keywords1 = escape_xml(metadata.fKeywords, "<dc:subject><rdf:Bag><rdf:li>", "</rdf:li></rdf:Bag></dc:subject>\n"); SkString keywords2 = escape_xml(metadata.fKeywords, "<pdf:Keywords>", "</pdf:Keywords>\n"); // TODO: in theory, keywords can be a list too. SkString producer("<pdf:Producer>" SKPDF_PRODUCER "</pdf:Producer>\n"); if (!metadata.fProducer.isEmpty()) { // TODO: register a developer prefix to make // <skia:SKPDF_CUSTOM_PRODUCER_KEY> a real XML tag. producer = escape_xml( metadata.fProducer, "<pdf:Producer>", "</pdf:Producer>\n<!-- <skia:" SKPDF_CUSTOM_PRODUCER_KEY ">" SKPDF_PRODUCER "</skia:" SKPDF_CUSTOM_PRODUCER_KEY "> -->\n"); } SkString creator = escape_xml(metadata.fCreator, "<xmp:CreatorTool>", "</xmp:CreatorTool>\n"); SkString documentID = uuid_to_string(doc); // no need to escape SkASSERT(0 == count_xml_escape_size(documentID)); SkString instanceID = uuid_to_string(instance); SkASSERT(0 == count_xml_escape_size(instanceID)); return sk_make_sp<PDFXMLObject>(SkStringPrintf( templateString, modificationDate.c_str(), creationDate.c_str(), creator.c_str(), title.c_str(), subject.c_str(), author.c_str(), keywords1.c_str(), documentID.c_str(), instanceID.c_str(), producer.c_str(), keywords2.c_str())); }
SkPDFObject* SkPDFMetadata::createXMPObject(const UUID& doc, const UUID& instance) const { static const char templateString[] = "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, " "2012/08/23-13:03:03\">\n" "<rdf:RDF " "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" "<rdf:Description rdf:about=\"\"\n" " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n" " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n" " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n" " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n" "<pdfaid:part>2</pdfaid:part>\n" "<pdfaid:conformance>B</pdfaid:conformance>\n" "%s" // ModifyDate "%s" // CreateDate "%s" // MetadataDate "%s" // xmp:CreatorTool "<dc:format>application/pdf</dc:format>\n" "%s" // dc:title "%s" // dc:description "%s" // author "%s" // keywords "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n" "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n" "<pdf:Producer>Skia/PDF</pdf:Producer>\n" "%s" // pdf:Keywords "</rdf:Description>\n" "</rdf:RDF>\n" "</x:xmpmeta>\n" // Note: the standard suggests 4k of padding. "<?xpacket end=\"w\"?>\n"; SkString creationDate; SkString modificationDate; SkString metadataDate; if (fCreation) { SkString tmp; fCreation->toISO8601(&tmp); SkASSERT(0 == count_xml_escape_size(tmp)); // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape creationDate = sk_string_printf("<xmp:CreateDate>%s</xmp:CreateDate>\n", tmp.c_str()); } if (fModified) { SkString tmp; fModified->toISO8601(&tmp); SkASSERT(0 == count_xml_escape_size(tmp)); modificationDate = sk_string_printf( "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str()); metadataDate = sk_string_printf( "<xmp:MetadataDate>%s</xmp:MetadataDate>\n", tmp.c_str()); } SkString title = escape_xml(get(fInfo, "Title"), "<dc:title><rdf:Alt><rdf:li>", "</rdf:li></rdf:Alt></dc:title>\n"); SkString author = escape_xml(get(fInfo, "Author"), "<dc:creator><rdf:Bag><rdf:li>", "</rdf:li></rdf:Bag></dc:creator>\n"); // TODO: in theory, XMP can support multiple authors. Split on a delimiter? SkString subject = escape_xml(get(fInfo, "Subject"), "<dc:description><rdf:Alt><rdf:li>", "</rdf:li></rdf:Alt></dc:description>\n"); SkString keywords1 = escape_xml(get(fInfo, "Keywords"), "<dc:subject><rdf:Bag><rdf:li>", "</rdf:li></rdf:Bag></dc:subject>\n"); SkString keywords2 = escape_xml(get(fInfo, "Keywords"), "<pdf:Keywords>", "</pdf:Keywords>\n"); // TODO: in theory, keywords can be a list too. SkString creator = escape_xml(get(fInfo, "Creator"), "<xmp:CreatorTool>", "</xmp:CreatorTool>\n"); SkString documentID = uuid_to_string(doc); // no need to escape SkASSERT(0 == count_xml_escape_size(documentID)); SkString instanceID = uuid_to_string(instance); SkASSERT(0 == count_xml_escape_size(instanceID)); return new PDFXMLObject(sk_string_printf( templateString, modificationDate.c_str(), creationDate.c_str(), metadataDate.c_str(), creator.c_str(), title.c_str(), subject.c_str(), author.c_str(), keywords1.c_str(), documentID.c_str(), instanceID.c_str(), keywords2.c_str())); }