static void
CodePointToUTF8 ( UniCodePoint uniChar, XMP_VarString & utf8Str )
{
	size_t i, byteCount;
	XMP_Uns8 buffer [8];
	UniCodePoint cpTemp;
	
	if ( uniChar <= 0x7F ) {

		i = 7;
		byteCount = 1;
		buffer[7] = char(uniChar);
	
	} else {

		// ---------------------------------------------------------------------------------------
		// Copy the data bits from the low order end to the high order end, include the 0x80 mask.
		
		i = 8;
		cpTemp = uniChar;
		while ( cpTemp != 0 ) {
			-- i;	// Exit with i pointing to the last byte stored.
			buffer[i] = UnsByte(0x80) | (UnsByte(cpTemp) & 0x3F);
			cpTemp = cpTemp >> 6;
		}
		byteCount = 8 - i;	// The total number of bytes needed.
		XMP_Assert ( (2 <= byteCount) && (byteCount <= 6) );

		// -------------------------------------------------------------------------------------
		// Make sure the high order byte can hold the byte count mask, compute and set the mask.
		
		size_t bitCount = 0;	// The number of data bits in the first byte.
		for ( cpTemp = (buffer[i] & UnsByte(0x3F)); cpTemp != 0; cpTemp = cpTemp >> 1 ) bitCount += 1;
		if ( bitCount > (8 - (byteCount + 1)) ) byteCount += 1;
		
		i = 8 - byteCount;	// First byte index and mask shift count.
		XMP_Assert ( (0 <= i) && (i <= 6) );
		buffer[i] |= (UnsByte(0xFF) << i) & UnsByte(0xFF);	// AUDIT: Safe, i is between 0 and 6.
	
	}
	
	utf8Str.assign ( (char*)(&buffer[i]), byteCount );
	
}	// CodePointToUTF8
/* class static */ void
XMPUtils::SeparateArrayItems ( XMPMeta *	  xmpObj,
							   XMP_StringPtr  schemaNS,
							   XMP_StringPtr  arrayName,
							   XMP_OptionBits options,
							   XMP_StringPtr  catedStr )
{
	XMP_Assert ( (schemaNS != 0) && (arrayName != 0) && (catedStr != 0) );	// ! Enforced by wrapper.
	
	XMP_VarString itemValue;
	size_t itemStart, itemEnd;
	size_t nextSize, charSize = 0;	// Avoid VS uninit var warnings.
	UniCharKind	  nextKind, charKind = UCK_normal;
	UniCodePoint  nextChar, uniChar = 0;
	
	// Extract "special" option bits, verify and normalize the others.
	
	bool preserveCommas = false;
	if ( options & kXMPUtil_AllowCommas ) {
		preserveCommas = true;
		options ^= kXMPUtil_AllowCommas;
	}

	options = VerifySetOptions ( options, 0 );	// Keep a zero value, has special meaning below.
	if ( options & ~kXMP_PropArrayFormMask ) XMP_Throw ( "Options can only provide array form", kXMPErr_BadOptions );
	
	// Find the array node, make sure it is OK. Move the current children aside, to be readded later if kept.
	
	XMP_ExpandedXPath arrayPath;
	ExpandXPath ( schemaNS, arrayName, &arrayPath );
	XMP_Node * arrayNode = FindNode ( &xmpObj->tree, arrayPath, kXMP_ExistingOnly );
	
	if ( arrayNode != 0 ) {
		// The array exists, make sure the form is compatible. Zero arrayForm means take what exists.
		XMP_OptionBits arrayForm = arrayNode->options & kXMP_PropArrayFormMask;
		if ( (arrayForm == 0) || (arrayForm & kXMP_PropArrayIsAlternate) ) {
			XMP_Throw ( "Named property must be non-alternate array", kXMPErr_BadXPath );
		}
		if ( (options != 0) && (options != arrayForm) ) XMP_Throw ( "Mismatch of specified and existing array form", kXMPErr_BadXPath );	// *** Right error?
	} else {
		// The array does not exist, try to create it.
		arrayNode = FindNode ( &xmpObj->tree, arrayPath, kXMP_CreateNodes, (options | kXMP_PropValueIsArray) );
		if ( arrayNode == 0 ) XMP_Throw ( "Failed to create named array", kXMPErr_BadXPath );
	}

	XMP_NodeOffspring oldChildren ( arrayNode->children );
	size_t oldChildCount = oldChildren.size();
	arrayNode->children.clear();
	
	// Extract the item values one at a time, until the whole input string is done. Be very careful
	// in the extraction about the string positions. They are essentially byte pointers, while the
	// contents are UTF-8. Adding or subtracting 1 does not necessarily move 1 Unicode character!
	
	size_t endPos = strlen ( catedStr );
	
	itemEnd = 0;
	while ( itemEnd < endPos ) {
		
		// Skip any leading spaces and separation characters. Always skip commas here. They can be
		// kept when within a value, but not when alone between values.
		
		for ( itemStart = itemEnd; itemStart < endPos; itemStart += charSize ) {
			ClassifyCharacter ( catedStr, itemStart, &charKind, &charSize, &uniChar );
			if ( (charKind == UCK_normal) || (charKind == UCK_quote) ) break;
		}
		if ( itemStart >= endPos ) break;
		
		if ( charKind != UCK_quote ) {
		
			// This is not a quoted value. Scan for the end, create an array item from the substring.

			for ( itemEnd = itemStart; itemEnd < endPos; itemEnd += charSize ) {

				ClassifyCharacter ( catedStr, itemEnd, &charKind, &charSize, &uniChar );

				if ( (charKind == UCK_normal) || (charKind == UCK_quote) ) continue;
				if ( (charKind == UCK_comma) && preserveCommas ) continue;
				if ( charKind != UCK_space ) break;

				if ( (itemEnd + charSize) >= endPos ) break;	// Anything left?
				ClassifyCharacter ( catedStr, (itemEnd+charSize), &nextKind, &nextSize, &nextChar );
				if ( (nextKind == UCK_normal) || (nextKind == UCK_quote) ) continue;
				if ( (nextKind == UCK_comma) && preserveCommas ) continue;
				break;	// Have multiple spaces, or a space followed by a separator.

			}		

			itemValue.assign ( catedStr, itemStart, (itemEnd - itemStart) );
		
		} else {
		
			// Accumulate quoted values into a local string, undoubling internal quotes that
			// match the surrounding quotes. Do not undouble "unmatching" quotes.
		
			UniCodePoint openQuote = uniChar;
			UniCodePoint closeQuote = GetClosingQuote ( openQuote );

			itemStart += charSize;	// Skip the opening quote;
			itemValue.erase();
			
			for ( itemEnd = itemStart; itemEnd < endPos; itemEnd += charSize ) {

				ClassifyCharacter ( catedStr, itemEnd, &charKind, &charSize, &uniChar );

				if ( (charKind != UCK_quote) || (! IsSurroundingQuote ( uniChar, openQuote, closeQuote)) ) {
				
					// This is not a matching quote, just append it to the item value.
					itemValue.append ( catedStr, itemEnd, charSize );
					
				} else {
				
					// This is a "matching" quote. Is it doubled, or the final closing quote? Tolerate
					// various edge cases like undoubled opening (non-closing) quotes, or end of input.
					
					if ( (itemEnd + charSize) < endPos ) {
						ClassifyCharacter ( catedStr, itemEnd+charSize, &nextKind, &nextSize, &nextChar );
					} else {
						nextKind = UCK_semicolon; nextSize = 0; nextChar = 0x3B;
					}
					
					if ( uniChar == nextChar ) {
						// This is doubled, copy it and skip the double.
						itemValue.append ( catedStr, itemEnd, charSize );
						itemEnd += nextSize;	// Loop will add in charSize.
					} else if ( ! IsClosingingQuote ( uniChar, openQuote, closeQuote ) ) {
						// This is an undoubled, non-closing quote, copy it.
						itemValue.append ( catedStr, itemEnd, charSize );
					} else {
						// This is an undoubled closing quote, skip it and exit the loop.
						itemEnd += charSize;
						break;
					}

				}

			}	// Loop to accumulate the quoted value.
		
		}

		// Add the separated item to the array. Keep a matching old value in case it had separators.
		
		size_t oldChild;
		for ( oldChild = 0; oldChild < oldChildCount; ++oldChild ) {
			if ( (oldChildren[oldChild] != 0) && (itemValue == oldChildren[oldChild]->value) ) break;
		}
		
		XMP_Node * newItem = 0;
		if ( oldChild == oldChildCount ) {
			newItem = new XMP_Node ( arrayNode, kXMP_ArrayItemName, itemValue.c_str(), 0 );
		} else {
			newItem = oldChildren[oldChild];
			oldChildren[oldChild] = 0;	// ! Don't match again, let duplicates be seen.
		}
		arrayNode->children.push_back ( newItem );
		
	}	// Loop through all of the returned items.

	// Delete any of the old children that were not kept.
	for ( size_t i = 0; i < oldChildCount; ++i ) {
		if ( oldChildren[i] != 0 ) delete oldChildren[i];
	}
	
}	// SeparateArrayItems
示例#3
0
void
ExpandXPath	( XMP_StringPtr			schemaNS,
			  XMP_StringPtr			propPath,
			  XMP_ExpandedXPath *	expandedXPath )
{
	XMP_Assert ( (schemaNS != 0) && (propPath != 0) && (*propPath != 0) && (expandedXPath != 0) );
	
	XMP_StringPtr	stepBegin, stepEnd;
	XMP_StringPtr	qualName, nameEnd;
	XMP_VarString	currStep;
		
	size_t resCount = 2;	// Guess at the number of steps. At least 2, plus 1 for each '/' or '['.
	for ( stepEnd = propPath; *stepEnd != 0; ++stepEnd ) {
		if ( (*stepEnd == '/') || (*stepEnd == '[') ) ++resCount;
	}
	
	expandedXPath->clear();
	expandedXPath->reserve ( resCount );
	
	// -------------------------------------------------------------------------------------------
	// Pull out the first component and do some special processing on it: add the schema namespace
	// prefix and see if it is an alias. The start must be a qualName.
	
	stepBegin = propPath;
	stepEnd = stepBegin;
	while ( (*stepEnd != 0) && (*stepEnd != '/') && (*stepEnd != '[') && (*stepEnd != '*') ) ++stepEnd;
	if ( stepEnd == stepBegin ) XMP_Throw ( "Empty initial XPath step", kXMPErr_BadXPath );
	currStep.assign ( stepBegin, (stepEnd - stepBegin) );
	
	VerifyXPathRoot ( schemaNS, currStep.c_str(), expandedXPath );

	XMP_OptionBits stepFlags = kXMP_StructFieldStep;	
	if ( sRegisteredAliasMap->find ( (*expandedXPath)[kRootPropStep].step ) != sRegisteredAliasMap->end() ) {
		stepFlags |= kXMP_StepIsAlias;
	}
	(*expandedXPath)[kRootPropStep].options |= stepFlags;
		
	// -----------------------------------------------------
	// Now continue to process the rest of the XPath string.

	while ( *stepEnd != 0 ) {

		stepBegin = stepEnd;
		if ( *stepBegin == '/' ) ++stepBegin;
		if ( *stepBegin == '*' ) {
			++stepBegin;
			if ( *stepBegin != '[' ) XMP_Throw ( "Missing '[' after '*'", kXMPErr_BadXPath );
		}
		stepEnd = stepBegin;

		if ( *stepBegin != '[' ) {
		
			// A struct field or qualifier.
			qualName = stepBegin;
			while ( (*stepEnd != 0) && (*stepEnd != '/') && (*stepEnd != '[') && (*stepEnd != '*') ) ++stepEnd;
			nameEnd = stepEnd;
			stepFlags = kXMP_StructFieldStep;	// ! Touch up later, also changing '@' to '?'.
			
		} else {
		
			// One of the array forms.
		
			++stepEnd;	// Look at the character after the leading '['.
			
			if ( ('0' <= *stepEnd) && (*stepEnd <= '9') ) {

				// A numeric (decimal integer) array index.
				while ( (*stepEnd != 0) && ('0' <= *stepEnd) && (*stepEnd <= '9') ) ++stepEnd;
				if ( *stepEnd != ']' ) XMP_Throw ( "Missing ']' for integer array index", kXMPErr_BadXPath );
				stepFlags = kXMP_ArrayIndexStep;

			} else {

				// Could be "[last()]" or one of the selector forms. Find the ']' or '='.
				
				while ( (*stepEnd != 0) && (*stepEnd != ']') && (*stepEnd != '=') ) ++stepEnd;
				if ( *stepEnd == 0 ) XMP_Throw ( "Missing ']' or '=' for array index", kXMPErr_BadXPath );

				if ( *stepEnd == ']' ) {

					if ( strncmp ( "[last()", stepBegin, (stepEnd - stepBegin) ) != 0 ) {
						XMP_Throw ( "Invalid non-numeric array index", kXMPErr_BadXPath );
					}
					stepFlags = kXMP_ArrayLastStep;

				} else {

					qualName = stepBegin+1;
					nameEnd = stepEnd;
					++stepEnd;	// Absorb the '=', remember the quote.
					const char quote = *stepEnd;
					if ( (quote != '\'') && (quote != '"') ) {
						XMP_Throw ( "Invalid quote in array selector", kXMPErr_BadXPath );
					}

					++stepEnd;	// Absorb the leading quote.
					while ( *stepEnd != 0 ) {
						if ( *stepEnd == quote ) {
							if ( *(stepEnd+1) != quote ) break;
							++stepEnd;
						}
						++stepEnd;
					}
					if ( *stepEnd == 0 ) {
						XMP_Throw ( "No terminating quote for array selector", kXMPErr_BadXPath );
					}
					++stepEnd;	// Absorb the trailing quote.
					
					stepFlags = kXMP_FieldSelectorStep;	// ! Touch up later, also changing '@' to '?'.

				}

			}

			if ( *stepEnd != ']' ) XMP_Throw ( "Missing ']' for array index", kXMPErr_BadXPath );
			++stepEnd;
			
		}

		if ( stepEnd == stepBegin ) XMP_Throw ( "Empty XPath step", kXMPErr_BadXPath );
		currStep.assign ( stepBegin, (stepEnd - stepBegin) );

		if ( GetStepKind ( stepFlags ) == kXMP_StructFieldStep ) {

			if ( currStep[0] == '@' ) {
				currStep[0] = '?';
				if ( currStep != "?xml:lang" ) XMP_Throw ( "Only xml:lang allowed with '@'", kXMPErr_BadXPath );
			}
			if ( currStep[0] == '?' ) {
				++qualName;
				stepFlags = kXMP_QualifierStep;
			}
			VerifyQualName ( qualName, nameEnd );

		} else if ( GetStepKind ( stepFlags ) == kXMP_FieldSelectorStep ) {

			if ( currStep[1] == '@' ) {
				currStep[1] = '?';
				if ( strncmp ( currStep.c_str(), "[?xml:lang=", 11 ) != 0 ) {
					XMP_Throw ( "Only xml:lang allowed with '@'", kXMPErr_BadXPath );
				}
			}
			if ( currStep[1] == '?' ) {
				++qualName;
				stepFlags = kXMP_QualSelectorStep;
			}
			VerifyQualName ( qualName, nameEnd );

		}

		expandedXPath->push_back ( XPathStepInfo ( currStep, stepFlags ) );

	}

}	// ExpandXPath