static void CodePointToUTF8 ( UniCodePoint uniChar, XMP_VarString & utf8Str ) { size_t i, byteCount; XMP_Uns8 buffer [8]; UniCodePoint cpTemp; if ( uniChar <= 0x7F ) { i = 7; byteCount = 1; buffer[7] = char(uniChar); } else { // --------------------------------------------------------------------------------------- // Copy the data bits from the low order end to the high order end, include the 0x80 mask. i = 8; cpTemp = uniChar; while ( cpTemp != 0 ) { -- i; // Exit with i pointing to the last byte stored. buffer[i] = UnsByte(0x80) | (UnsByte(cpTemp) & 0x3F); cpTemp = cpTemp >> 6; } byteCount = 8 - i; // The total number of bytes needed. XMP_Assert ( (2 <= byteCount) && (byteCount <= 6) ); // ------------------------------------------------------------------------------------- // Make sure the high order byte can hold the byte count mask, compute and set the mask. size_t bitCount = 0; // The number of data bits in the first byte. for ( cpTemp = (buffer[i] & UnsByte(0x3F)); cpTemp != 0; cpTemp = cpTemp >> 1 ) bitCount += 1; if ( bitCount > (8 - (byteCount + 1)) ) byteCount += 1; i = 8 - byteCount; // First byte index and mask shift count. XMP_Assert ( (0 <= i) && (i <= 6) ); buffer[i] |= (UnsByte(0xFF) << i) & UnsByte(0xFF); // AUDIT: Safe, i is between 0 and 6. } utf8Str.assign ( (char*)(&buffer[i]), byteCount ); } // CodePointToUTF8
/* class static */ void XMPUtils::SeparateArrayItems ( XMPMeta * xmpObj, XMP_StringPtr schemaNS, XMP_StringPtr arrayName, XMP_OptionBits options, XMP_StringPtr catedStr ) { XMP_Assert ( (schemaNS != 0) && (arrayName != 0) && (catedStr != 0) ); // ! Enforced by wrapper. XMP_VarString itemValue; size_t itemStart, itemEnd; size_t nextSize, charSize = 0; // Avoid VS uninit var warnings. UniCharKind nextKind, charKind = UCK_normal; UniCodePoint nextChar, uniChar = 0; // Extract "special" option bits, verify and normalize the others. bool preserveCommas = false; if ( options & kXMPUtil_AllowCommas ) { preserveCommas = true; options ^= kXMPUtil_AllowCommas; } options = VerifySetOptions ( options, 0 ); // Keep a zero value, has special meaning below. if ( options & ~kXMP_PropArrayFormMask ) XMP_Throw ( "Options can only provide array form", kXMPErr_BadOptions ); // Find the array node, make sure it is OK. Move the current children aside, to be readded later if kept. XMP_ExpandedXPath arrayPath; ExpandXPath ( schemaNS, arrayName, &arrayPath ); XMP_Node * arrayNode = FindNode ( &xmpObj->tree, arrayPath, kXMP_ExistingOnly ); if ( arrayNode != 0 ) { // The array exists, make sure the form is compatible. Zero arrayForm means take what exists. XMP_OptionBits arrayForm = arrayNode->options & kXMP_PropArrayFormMask; if ( (arrayForm == 0) || (arrayForm & kXMP_PropArrayIsAlternate) ) { XMP_Throw ( "Named property must be non-alternate array", kXMPErr_BadXPath ); } if ( (options != 0) && (options != arrayForm) ) XMP_Throw ( "Mismatch of specified and existing array form", kXMPErr_BadXPath ); // *** Right error? } else { // The array does not exist, try to create it. arrayNode = FindNode ( &xmpObj->tree, arrayPath, kXMP_CreateNodes, (options | kXMP_PropValueIsArray) ); if ( arrayNode == 0 ) XMP_Throw ( "Failed to create named array", kXMPErr_BadXPath ); } XMP_NodeOffspring oldChildren ( arrayNode->children ); size_t oldChildCount = oldChildren.size(); arrayNode->children.clear(); // Extract the item values one at a time, until the whole input string is done. Be very careful // in the extraction about the string positions. They are essentially byte pointers, while the // contents are UTF-8. Adding or subtracting 1 does not necessarily move 1 Unicode character! size_t endPos = strlen ( catedStr ); itemEnd = 0; while ( itemEnd < endPos ) { // Skip any leading spaces and separation characters. Always skip commas here. They can be // kept when within a value, but not when alone between values. for ( itemStart = itemEnd; itemStart < endPos; itemStart += charSize ) { ClassifyCharacter ( catedStr, itemStart, &charKind, &charSize, &uniChar ); if ( (charKind == UCK_normal) || (charKind == UCK_quote) ) break; } if ( itemStart >= endPos ) break; if ( charKind != UCK_quote ) { // This is not a quoted value. Scan for the end, create an array item from the substring. for ( itemEnd = itemStart; itemEnd < endPos; itemEnd += charSize ) { ClassifyCharacter ( catedStr, itemEnd, &charKind, &charSize, &uniChar ); if ( (charKind == UCK_normal) || (charKind == UCK_quote) ) continue; if ( (charKind == UCK_comma) && preserveCommas ) continue; if ( charKind != UCK_space ) break; if ( (itemEnd + charSize) >= endPos ) break; // Anything left? ClassifyCharacter ( catedStr, (itemEnd+charSize), &nextKind, &nextSize, &nextChar ); if ( (nextKind == UCK_normal) || (nextKind == UCK_quote) ) continue; if ( (nextKind == UCK_comma) && preserveCommas ) continue; break; // Have multiple spaces, or a space followed by a separator. } itemValue.assign ( catedStr, itemStart, (itemEnd - itemStart) ); } else { // Accumulate quoted values into a local string, undoubling internal quotes that // match the surrounding quotes. Do not undouble "unmatching" quotes. UniCodePoint openQuote = uniChar; UniCodePoint closeQuote = GetClosingQuote ( openQuote ); itemStart += charSize; // Skip the opening quote; itemValue.erase(); for ( itemEnd = itemStart; itemEnd < endPos; itemEnd += charSize ) { ClassifyCharacter ( catedStr, itemEnd, &charKind, &charSize, &uniChar ); if ( (charKind != UCK_quote) || (! IsSurroundingQuote ( uniChar, openQuote, closeQuote)) ) { // This is not a matching quote, just append it to the item value. itemValue.append ( catedStr, itemEnd, charSize ); } else { // This is a "matching" quote. Is it doubled, or the final closing quote? Tolerate // various edge cases like undoubled opening (non-closing) quotes, or end of input. if ( (itemEnd + charSize) < endPos ) { ClassifyCharacter ( catedStr, itemEnd+charSize, &nextKind, &nextSize, &nextChar ); } else { nextKind = UCK_semicolon; nextSize = 0; nextChar = 0x3B; } if ( uniChar == nextChar ) { // This is doubled, copy it and skip the double. itemValue.append ( catedStr, itemEnd, charSize ); itemEnd += nextSize; // Loop will add in charSize. } else if ( ! IsClosingingQuote ( uniChar, openQuote, closeQuote ) ) { // This is an undoubled, non-closing quote, copy it. itemValue.append ( catedStr, itemEnd, charSize ); } else { // This is an undoubled closing quote, skip it and exit the loop. itemEnd += charSize; break; } } } // Loop to accumulate the quoted value. } // Add the separated item to the array. Keep a matching old value in case it had separators. size_t oldChild; for ( oldChild = 0; oldChild < oldChildCount; ++oldChild ) { if ( (oldChildren[oldChild] != 0) && (itemValue == oldChildren[oldChild]->value) ) break; } XMP_Node * newItem = 0; if ( oldChild == oldChildCount ) { newItem = new XMP_Node ( arrayNode, kXMP_ArrayItemName, itemValue.c_str(), 0 ); } else { newItem = oldChildren[oldChild]; oldChildren[oldChild] = 0; // ! Don't match again, let duplicates be seen. } arrayNode->children.push_back ( newItem ); } // Loop through all of the returned items. // Delete any of the old children that were not kept. for ( size_t i = 0; i < oldChildCount; ++i ) { if ( oldChildren[i] != 0 ) delete oldChildren[i]; } } // SeparateArrayItems
void ExpandXPath ( XMP_StringPtr schemaNS, XMP_StringPtr propPath, XMP_ExpandedXPath * expandedXPath ) { XMP_Assert ( (schemaNS != 0) && (propPath != 0) && (*propPath != 0) && (expandedXPath != 0) ); XMP_StringPtr stepBegin, stepEnd; XMP_StringPtr qualName, nameEnd; XMP_VarString currStep; size_t resCount = 2; // Guess at the number of steps. At least 2, plus 1 for each '/' or '['. for ( stepEnd = propPath; *stepEnd != 0; ++stepEnd ) { if ( (*stepEnd == '/') || (*stepEnd == '[') ) ++resCount; } expandedXPath->clear(); expandedXPath->reserve ( resCount ); // ------------------------------------------------------------------------------------------- // Pull out the first component and do some special processing on it: add the schema namespace // prefix and see if it is an alias. The start must be a qualName. stepBegin = propPath; stepEnd = stepBegin; while ( (*stepEnd != 0) && (*stepEnd != '/') && (*stepEnd != '[') && (*stepEnd != '*') ) ++stepEnd; if ( stepEnd == stepBegin ) XMP_Throw ( "Empty initial XPath step", kXMPErr_BadXPath ); currStep.assign ( stepBegin, (stepEnd - stepBegin) ); VerifyXPathRoot ( schemaNS, currStep.c_str(), expandedXPath ); XMP_OptionBits stepFlags = kXMP_StructFieldStep; if ( sRegisteredAliasMap->find ( (*expandedXPath)[kRootPropStep].step ) != sRegisteredAliasMap->end() ) { stepFlags |= kXMP_StepIsAlias; } (*expandedXPath)[kRootPropStep].options |= stepFlags; // ----------------------------------------------------- // Now continue to process the rest of the XPath string. while ( *stepEnd != 0 ) { stepBegin = stepEnd; if ( *stepBegin == '/' ) ++stepBegin; if ( *stepBegin == '*' ) { ++stepBegin; if ( *stepBegin != '[' ) XMP_Throw ( "Missing '[' after '*'", kXMPErr_BadXPath ); } stepEnd = stepBegin; if ( *stepBegin != '[' ) { // A struct field or qualifier. qualName = stepBegin; while ( (*stepEnd != 0) && (*stepEnd != '/') && (*stepEnd != '[') && (*stepEnd != '*') ) ++stepEnd; nameEnd = stepEnd; stepFlags = kXMP_StructFieldStep; // ! Touch up later, also changing '@' to '?'. } else { // One of the array forms. ++stepEnd; // Look at the character after the leading '['. if ( ('0' <= *stepEnd) && (*stepEnd <= '9') ) { // A numeric (decimal integer) array index. while ( (*stepEnd != 0) && ('0' <= *stepEnd) && (*stepEnd <= '9') ) ++stepEnd; if ( *stepEnd != ']' ) XMP_Throw ( "Missing ']' for integer array index", kXMPErr_BadXPath ); stepFlags = kXMP_ArrayIndexStep; } else { // Could be "[last()]" or one of the selector forms. Find the ']' or '='. while ( (*stepEnd != 0) && (*stepEnd != ']') && (*stepEnd != '=') ) ++stepEnd; if ( *stepEnd == 0 ) XMP_Throw ( "Missing ']' or '=' for array index", kXMPErr_BadXPath ); if ( *stepEnd == ']' ) { if ( strncmp ( "[last()", stepBegin, (stepEnd - stepBegin) ) != 0 ) { XMP_Throw ( "Invalid non-numeric array index", kXMPErr_BadXPath ); } stepFlags = kXMP_ArrayLastStep; } else { qualName = stepBegin+1; nameEnd = stepEnd; ++stepEnd; // Absorb the '=', remember the quote. const char quote = *stepEnd; if ( (quote != '\'') && (quote != '"') ) { XMP_Throw ( "Invalid quote in array selector", kXMPErr_BadXPath ); } ++stepEnd; // Absorb the leading quote. while ( *stepEnd != 0 ) { if ( *stepEnd == quote ) { if ( *(stepEnd+1) != quote ) break; ++stepEnd; } ++stepEnd; } if ( *stepEnd == 0 ) { XMP_Throw ( "No terminating quote for array selector", kXMPErr_BadXPath ); } ++stepEnd; // Absorb the trailing quote. stepFlags = kXMP_FieldSelectorStep; // ! Touch up later, also changing '@' to '?'. } } if ( *stepEnd != ']' ) XMP_Throw ( "Missing ']' for array index", kXMPErr_BadXPath ); ++stepEnd; } if ( stepEnd == stepBegin ) XMP_Throw ( "Empty XPath step", kXMPErr_BadXPath ); currStep.assign ( stepBegin, (stepEnd - stepBegin) ); if ( GetStepKind ( stepFlags ) == kXMP_StructFieldStep ) { if ( currStep[0] == '@' ) { currStep[0] = '?'; if ( currStep != "?xml:lang" ) XMP_Throw ( "Only xml:lang allowed with '@'", kXMPErr_BadXPath ); } if ( currStep[0] == '?' ) { ++qualName; stepFlags = kXMP_QualifierStep; } VerifyQualName ( qualName, nameEnd ); } else if ( GetStepKind ( stepFlags ) == kXMP_FieldSelectorStep ) { if ( currStep[1] == '@' ) { currStep[1] = '?'; if ( strncmp ( currStep.c_str(), "[?xml:lang=", 11 ) != 0 ) { XMP_Throw ( "Only xml:lang allowed with '@'", kXMPErr_BadXPath ); } } if ( currStep[1] == '?' ) { ++qualName; stepFlags = kXMP_QualSelectorStep; } VerifyQualName ( qualName, nameEnd ); } expandedXPath->push_back ( XPathStepInfo ( currStep, stepFlags ) ); } } // ExpandXPath