/** * Creates an XMP object from an RDF string. The string is used to * to simulate creating and XMP object from multiple input buffers. * The last call to ParseFromBuffer has no kXMP_ParseMoreBuffers options, * thereby indicating this is the last input buffer. */ SXMPMeta createXMPFromRDF() { const char * rdf = "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" "<rdf:Description rdf:about='' xmlns:dc='http://purl.org/dc/elements/1.1/'>" "<dc:subject>" "<rdf:Bag>" "<rdf:li>XMP</rdf:li>" "<rdf:li>SDK</rdf:li>" "<rdf:li>Sample</rdf:li>" "</rdf:Bag>" "</dc:subject>" "<dc:format>image/tiff</dc:format>" "</rdf:Description>" "</rdf:RDF>"; SXMPMeta meta; // Loop over the rdf string and create the XMP object // 10 characters at a time int i; for (i = 0; i < (long)strlen(rdf) - 10; i += 10 ) { meta.ParseFromBuffer ( &rdf[i], 10, kXMP_ParseMoreBuffers ); } // The last call has no kXMP_ParseMoreBuffers options, signifying // this is the last input buffer meta.ParseFromBuffer ( &rdf[i], (XMP_StringLen) strlen(rdf) - i ); return meta; }
static void FullUnicodeParse ( FILE * log, const char * encoding, size_t bufferSize, const std::string & packet, const std::string & fullUnicode ) { if ( bufferSize > sizeof(sU32) ) { fprintf ( log, "#ERROR: FullUnicodeParse buffer overrun for %s, %d byte buffers\n", encoding, bufferSize ); return; } SXMPMeta meta; try { memset ( sU32, -1, sizeof(sU32) ); for ( size_t i = 0; i < packet.size(); i += bufferSize ) { size_t count = bufferSize; if ( count > (packet.size() - i) ) count = packet.size() - i; memcpy ( sU32, &packet[i], count ); meta.ParseFromBuffer ( XMP_StringPtr(sU32), count, kXMP_ParseMoreBuffers ); } meta.ParseFromBuffer ( XMP_StringPtr(sU32), 0 ); } catch ( XMP_Error& excep ) { char message [200]; sprintf ( message, "#ERROR: Full Unicode parsing error for %s, %d byte buffers", encoding, bufferSize ); PrintXMPErrorInfo ( excep, message ); return; } std::string value; bool found = meta.GetProperty ( kNS1, "FullUnicode", &value, 0 ); if ( (! found) || (value != fullUnicode) ) fprintf ( log, "#ERROR: Failed to get full Unicode value for %s, %d byte buffers\n", encoding, bufferSize ); } // FullUnicodeParse
bool xmp_parse(XmpPtr xmp, const char *buffer, size_t len) { CHECK_PTR(xmp, false); CHECK_PTR(buffer, false); SXMPMeta *txmp = (SXMPMeta *)xmp; try { txmp->ParseFromBuffer(buffer, len, kXMP_RequireXMPMeta ); } catch(const XMP_Error & e) { set_error(e); return false; } return true; }
static void ProcessPacket ( const char * fileName, FILE * inFile, size_t offset, size_t length ) { std::string xmlString; xmlString.append ( length, ' ' ); fseek ( inFile, offset, SEEK_SET ); fread ( (void*)xmlString.data(), 1, length, inFile ); char title [1000]; sprintf ( title, "// Dumping raw input for \"%s\" (%d..%d)", fileName, offset, (offset + length - 1) ); printf ( "// " ); for ( size_t i = 3; i < strlen(title); ++i ) printf ( "=" ); printf ( "\n\n%s\n\n%.*s\n\n", title, length, xmlString.c_str() ); fflush ( stdout ); SXMPMeta xmpObj; try { xmpObj.ParseFromBuffer ( xmlString.c_str(), length ); } catch ( ... ) { printf ( "## Parse failed\n\n" ); return; } xmpObj.DumpObject ( DumpCallback, stdout ); fflush ( stdout ); string xmpString; xmpObj.SerializeToBuffer ( &xmpString, kXMP_OmitPacketWrapper ); printf ( "\nPretty serialization, %d bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() ); fflush ( stdout ); xmpObj.SerializeToBuffer ( &xmpString, (kXMP_OmitPacketWrapper | kXMP_UseCompactFormat) ); printf ( "Compact serialization, %d bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() ); fflush ( stdout ); } // ProcessPacket
void Scanner_MetaHandler::CacheFileData() { LFA_FileRef fileRef = this->parent->fileRef; bool beLenient = XMP_OptionIsClear ( this->parent->openFlags, kXMPFiles_OpenStrictly ); int pkt; XMP_Int64 bufPos; size_t bufLen; SXMPMeta * newMeta; XMP_AbortProc abortProc = this->parent->abortProc; void * abortArg = this->parent->abortArg; const bool checkAbort = (abortProc != 0); std::vector<CandidateInfo> candidates; // ! These have SXMPMeta* fields, don't leak on exceptions. this->containsXMP = false; try { // ------------------------------------------------------ // Scan the entire file to find all of the valid packets. XMP_Int64 fileLen = LFA_Measure ( fileRef ); XMPScanner scanner ( fileLen ); enum { kBufferSize = 64*1024 }; XMP_Uns8 buffer [kBufferSize]; LFA_Seek ( fileRef, 0, SEEK_SET ); for ( bufPos = 0; bufPos < fileLen; bufPos += bufLen ) { if ( checkAbort && abortProc(abortArg) ) { XMP_Throw ( "Scanner_MetaHandler::LocateXMP - User abort", kXMPErr_UserAbort ); } bufLen = LFA_Read ( fileRef, buffer, kBufferSize ); if ( bufLen == 0 ) XMP_Throw ( "Scanner_MetaHandler::LocateXMP: Read failure", kXMPErr_ExternalFailure ); scanner.Scan ( buffer, bufPos, bufLen ); } // -------------------------------------------------------------- // Parse the valid packet snips, building a vector of candidates. long snipCount = scanner.GetSnipCount(); XMPScanner::SnipInfoVector snips ( snipCount ); scanner.Report ( snips ); for ( pkt = 0; pkt < snipCount; ++pkt ) { if ( checkAbort && abortProc(abortArg) ) { XMP_Throw ( "Scanner_MetaHandler::LocateXMP - User abort", kXMPErr_UserAbort ); } // Seek to the packet then try to parse it. if ( snips[pkt].fState != XMPScanner::eValidPacketSnip ) continue; LFA_Seek ( fileRef, snips[pkt].fOffset, SEEK_SET ); newMeta = new SXMPMeta(); std::string xmpPacket; xmpPacket.reserve ( (size_t)snips[pkt].fLength ); try { for ( bufPos = 0; bufPos < snips[pkt].fLength; bufPos += bufLen ) { bufLen = kBufferSize; if ( (bufPos + bufLen) > (size_t)snips[pkt].fLength ) bufLen = size_t ( snips[pkt].fLength - bufPos ); (void) LFA_Read ( fileRef, buffer, (XMP_Int32)bufLen, kLFA_RequireAll ); xmpPacket.append ( (const char *)buffer, bufLen ); newMeta->ParseFromBuffer ( (char *)buffer, (XMP_StringLen)bufLen, kXMP_ParseMoreBuffers ); } newMeta->ParseFromBuffer ( 0, 0, kXMP_NoOptions ); } catch ( ... ) { delete newMeta; if ( beLenient ) continue; // Skip if we're being lenient, else rethrow. throw; } // It parsed OK, add it to the array of candidates. candidates.push_back ( CandidateInfo() ); CandidateInfo & newInfo = candidates.back(); newInfo.xmpObj = newMeta; newInfo.xmpPacket.swap ( xmpPacket ); newInfo.packetInfo.offset = snips[pkt].fOffset; newInfo.packetInfo.length = (XMP_Int32)snips[pkt].fLength; newInfo.packetInfo.charForm = snips[pkt].fCharForm; newInfo.packetInfo.writeable = (snips[pkt].fAccess == 'w'); } // ---------------------------------------- // Figure out which packet is the main one. int main = PickMainPacket ( candidates, beLenient ); if ( main != -1 ) { this->packetInfo = candidates[main].packetInfo; this->xmpPacket.swap ( candidates[main].xmpPacket ); this->xmpObj = *candidates[main].xmpObj; this->containsXMP = true; this->processedXMP = true; } for ( pkt = 0; pkt < (int)candidates.size(); ++pkt ) { if ( candidates[pkt].xmpObj != 0 ) delete candidates[pkt].xmpObj; } } catch ( ... ) { // Clean up the SXMPMeta* fields from the vector of candidates. for ( pkt = 0; pkt < (int)candidates.size(); ++pkt ) { if ( candidates[pkt].xmpObj != 0 ) delete candidates[pkt].xmpObj; } throw; } } // Scanner_MetaHandler::CacheFileData
static void DoTest ( FILE * log ) { SXMPMeta meta; size_t u8Count, u32Count; SXMPMeta meta8, meta16b, meta16l, meta32b, meta32l; std::string u8Packet, u16bPacket, u16lPacket, u32bPacket, u32lPacket; InitializeUnicodeConversions(); // --------------------------------------------------------------------------------------------- fprintf ( log, "// ------------------------------------------------\n" ); fprintf ( log, "// Test basic serialization and parsing using ASCII\n\n" ); // ---------------------------------------------------- // Create basic ASCII packets in each of the encodings. meta.ParseFromBuffer ( kSimpleRDF, kXMP_UseNullTermination ); meta.SerializeToBuffer ( &u8Packet, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF8) ); meta.SerializeToBuffer ( &u16bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Big) ); meta.SerializeToBuffer ( &u16lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Little) ); meta.SerializeToBuffer ( &u32bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Big) ); meta.SerializeToBuffer ( &u32lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Little) ); #if 0 FILE* dump; dump = fopen ( "u8Packet.txt", "w" ); fwrite ( u8Packet.c_str(), 1, u8Packet.size(), dump ); fclose ( dump ); dump = fopen ( "u16bPacket.txt", "w" ); fwrite ( u16bPacket.c_str(), 1, u16bPacket.size(), dump ); fclose ( dump ); dump = fopen ( "u16lPacket.txt", "w" ); fwrite ( u16lPacket.c_str(), 1, u16lPacket.size(), dump ); fclose ( dump ); dump = fopen ( "u32bPacket.txt", "w" ); fwrite ( u32bPacket.c_str(), 1, u32bPacket.size(), dump ); fclose ( dump ); dump = fopen ( "u32lPacket.txt", "w" ); fwrite ( u32lPacket.c_str(), 1, u32lPacket.size(), dump ); fclose ( dump ); #endif // Verify the character form. The conversion functions are tested separately. const char * ptr; ptr = u8Packet.c_str(); fprintf ( log, "UTF-8 : %d : %.2X %.2X \"%.10s...\"\n", u8Packet.size(), *ptr, *(ptr+1), ptr ); ptr = u16bPacket.c_str(); fprintf ( log, "UTF-16BE : %d : %.2X %.2X %.2X\n", u16bPacket.size(), *ptr, *(ptr+1), *(ptr+2) ); ptr = u16lPacket.c_str(); fprintf ( log, "UTF-16LE : %d : %.2X %.2X %.2X\n", u16lPacket.size(), *ptr, *(ptr+1), *(ptr+2) ); ptr = u32bPacket.c_str(); fprintf ( log, "UTF-32BE : %d : %.2X %.2X %.2X %.2X %.2X\n", u32bPacket.size(), *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4) ); ptr = u32lPacket.c_str(); fprintf ( log, "UTF-32LE : %d : %.2X %.2X %.2X %.2X %.2X\n", u32lPacket.size(), *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4) ); fprintf ( log, "\nBasic serialization tests done\n" ); // ------------------------------------------------- // Verify round trip reparsing of the basic packets. std::string origDump, rtDump; meta.DumpObject ( DumpToString, &origDump ); fprintf ( log, "Original dump\n%s\n", origDump.c_str() ); try { meta8.ParseFromBuffer ( u8Packet.c_str(), u8Packet.size() ); meta16b.ParseFromBuffer ( u16bPacket.c_str(), u16bPacket.size() ); meta16l.ParseFromBuffer ( u16lPacket.c_str(), u16lPacket.size() ); meta32b.ParseFromBuffer ( u32bPacket.c_str(), u32bPacket.size() ); meta32l.ParseFromBuffer ( u32lPacket.c_str(), u32lPacket.size() ); } catch ( XMP_Error& excep ) { PrintXMPErrorInfo ( excep, "## Caught reparsing exception" ); fprintf ( log, "\n" ); } #if 0 fprintf ( log, "After UTF-8 roundtrip\n" ); meta8.DumpObject ( DumpToFile, log ); fprintf ( log, "\nAfter UTF-16 BE roundtrip\n" ); meta16b.DumpObject ( DumpToFile, log ); fprintf ( log, "\nAfter UTF-16 LE roundtrip\n" ); meta16l.DumpObject ( DumpToFile, log ); fprintf ( log, "\nAfter UTF-32 BE roundtrip\n" ); meta32b.DumpObject ( DumpToFile, log ); fprintf ( log, "\nAfter UTF-32 LE roundtrip\n" ); meta32l.DumpObject ( DumpToFile, log ); #endif rtDump.clear(); meta8.DumpObject ( DumpToString, &rtDump ); if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-8\n%s\n", rtDump.c_str() ); rtDump.clear(); meta16b.DumpObject ( DumpToString, &rtDump ); if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-16BE\n%s\n", rtDump.c_str() ); rtDump.clear(); meta16l.DumpObject ( DumpToString, &rtDump ); if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-16LE\n%s\n", rtDump.c_str() ); #if IncludeUTF32 rtDump.clear(); meta32b.DumpObject ( DumpToString, &rtDump ); if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-32BE\n%s\n", rtDump.c_str() ); rtDump.clear(); meta32l.DumpObject ( DumpToString, &rtDump ); if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-32LE\n%s\n", rtDump.c_str() ); #endif fprintf ( log, "Basic round-trip parsing tests done\n\n" ); // --------------------------------------------------------------------------------------------- fprintf ( log, "// --------------------------------------------------\n" ); fprintf ( log, "// Test parse buffering logic using full Unicode data\n\n" ); // -------------------------------------------------------------------------------------------- // Construct the packets to parse in all encodings. There is just one property with a value // containing all of the Unicode representations. This isn't all of the Unicode characters, but // is more than enough to establish correctness of the buffering logic. It is almost everything // in the BMP, plus the range U+100000..U+10FFFF beyond the BMP. Doing all Unicode characters // takes far to long to execute and does not provide additional confidence. Skip ASCII controls, // they are not allowed in XML and get changed to spaces by SetProperty. Skip U+FFFE and U+FFFF, // the expat parser rejects them. #define kTab 0x09 #define kLF 0x0A #define kCR 0x0D size_t i; UTF32Unit cp; sU32[0] = kTab; sU32[1] = kLF; sU32[2] = kCR; for ( i = 3, cp = 0x20; cp < 0x7F; ++i, ++cp ) sU32[i] = cp; for ( cp = 0x80; cp < 0xD800; ++i, ++cp ) sU32[i] = cp; for ( cp = 0xE000; cp < 0xFFFE; ++i, ++cp ) sU32[i] = cp; for ( cp = 0x100000; cp < 0x110000; ++i, ++cp ) sU32[i] = cp; u32Count = i; assert ( u32Count == (3 + (0x7F-0x20) + (0xD800-0x80) + (0xFFFE - 0xE000) + (0x110000-0x100000)) ); if ( kBigEndianHost ) { UTF32BE_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &i, &u8Count ); } else { UTF32LE_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &i, &u8Count ); } if ( i != u32Count ) fprintf ( log, "#ERROR: Failed to convert full UTF-32 buffer\n" ); assert ( u8Count == (3 + (0x7F-0x20) + 2*(0x800-0x80) + 3*(0xD800-0x800) + 3*(0xFFFE - 0xE000) + 4*(0x110000-0x100000)) ); sU8[u8Count] = 0; std::string fullUnicode; SXMPUtils::RemoveProperties ( &meta, "", "", kXMPUI_DoAllProperties ); meta.SetProperty ( kNS1, "FullUnicode", XMP_StringPtr(sU8) ); meta.GetProperty ( kNS1, "FullUnicode", &fullUnicode, 0 ); if ( (fullUnicode.size() != u8Count) || (fullUnicode != XMP_StringPtr(sU8)) ) { fprintf ( log, "#ERROR: Failed to set full UTF-8 value\n" ); if ( (fullUnicode.size() != u8Count) ) { fprintf ( log, " Size mismatch, want %d, got %d\n", u8Count, fullUnicode.size() ); } else { for ( size_t b = 0; b < u8Count; ++b ) { if ( fullUnicode[b] != sU8[b] ) fprintf ( log, " Byte mismatch at %d\n", b ); } } } u8Packet.clear(); u16bPacket.clear(); u16lPacket.clear(); u32bPacket.clear(); u32lPacket.clear(); meta.SerializeToBuffer ( &u8Packet, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF8) ); meta.SerializeToBuffer ( &u16bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Big) ); meta.SerializeToBuffer ( &u16lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Little) ); #if IncludeUTF32 meta.SerializeToBuffer ( &u32bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Big) ); meta.SerializeToBuffer ( &u32lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Little) ); #endif // --------------------------------------------------------------------- // Parse the whole packet as a sanity check, then at a variety of sizes. FullUnicodeParse ( log, "UTF-8", u8Packet.size(), u8Packet, fullUnicode ); FullUnicodeParse ( log, "UTF-16BE", u16bPacket.size(), u16bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-16LE", u16lPacket.size(), u16lPacket, fullUnicode ); #if IncludeUTF32 FullUnicodeParse ( log, "UTF-32BE", u32bPacket.size(), u32bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-32LE", u32lPacket.size(), u32lPacket, fullUnicode ); #endif fprintf ( log, "Full packet, no BOM, buffered parsing tests done\n" ); #if 0 // Skip the partial buffer tests, there seem to be problems, but no client uses partial buffers. for ( i = 1; i <= 3; ++i ) { FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); #if IncludeUTF32 FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); #endif fprintf ( log, "%d byte buffers, no BOM, buffered parsing tests done\n", i ); } for ( i = 4; i <= 16; i *= 2 ) { FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); #if IncludeUTF32 FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); #endif fprintf ( log, "%d byte buffers, no BOM, buffered parsing tests done\n", i ); } #endif fprintf ( log, "\n" ); // ----------------------------------------------------------------------- // Redo the buffered parsing tests, now with a leading BOM in the packets. u8Packet.insert ( 0, "\xEF\xBB\xBF", 3 ); UTF32Unit NatBOM = 0x0000FEFF; UTF32Unit SwapBOM = 0xFFFE0000; if ( kBigEndianHost ) { u16bPacket.insert ( 0, XMP_StringPtr(&NatBOM)+2, 2 ); u16lPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 2 ); u32bPacket.insert ( 0, XMP_StringPtr(&NatBOM), 4 ); u32lPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 4 ); } else { u16lPacket.insert ( 0, XMP_StringPtr(&NatBOM), 2 ); u16bPacket.insert ( 0, XMP_StringPtr(&SwapBOM)+2, 2 ); u32lPacket.insert ( 0, XMP_StringPtr(&NatBOM), 4 ); u32bPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 4 ); } FullUnicodeParse ( log, "UTF-8", u8Packet.size(), u8Packet, fullUnicode ); FullUnicodeParse ( log, "UTF-16BE", u16bPacket.size(), u16bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-16LE", u16lPacket.size(), u16lPacket, fullUnicode ); #if IncludeUTF32 FullUnicodeParse ( log, "UTF-32BE", u32bPacket.size(), u32bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-32LE", u32lPacket.size(), u32lPacket, fullUnicode ); #endif fprintf ( log, "Full packet, leading BOM, buffered parsing tests done\n" ); #if 0 // Skip the partial buffer tests, there seem to be problems, but no client uses partial buffers. for ( i = 1; i <= 3; ++i ) { FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); #if IncludeUTF32 FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); #endif fprintf ( log, "%d byte buffers, leading BOM, buffered parsing tests done\n", i ); } for ( i = 4; i <= 16; i *= 2 ) { FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); #if IncludeUTF32 FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); #endif fprintf ( log, "%d byte buffers, leading BOM, buffered parsing tests done\n", i ); } #endif fprintf ( log, "\n" ); } // DoTest