static void AppendNodeValue ( XMP_VarString & outputStr, const XMP_VarString & value, bool forAttribute ) { unsigned char * runStart = (unsigned char *) value.c_str(); unsigned char * runLimit = runStart + value.size(); unsigned char * runEnd; unsigned char ch; while ( runStart < runLimit ) { for ( runEnd = runStart; runEnd < runLimit; ++runEnd ) { ch = *runEnd; if ( forAttribute && (ch == '"') ) break; if ( (ch < 0x20) || (ch == '&') || (ch == '<') || (ch == '>') ) break; } outputStr.append ( (char *) runStart, (runEnd - runStart) ); if ( runEnd < runLimit ) { if ( ch < 0x20 ) { XMP_Assert ( (ch == kTab) || (ch == kLF) || (ch == kCR) ); char hexBuf[16]; memcpy ( hexBuf, "&#xn;", 6 ); // AUDIT: Length of "&#xn;" is 5, hexBuf size is 16. hexBuf[3] = kHexDigits[ch&0xF]; outputStr.append ( hexBuf, 5 ); } else { if ( ch == '"' ) { outputStr += """; } else if ( ch == '<' ) { outputStr += "<"; } else if ( ch == '>' ) { outputStr += ">"; } else { XMP_Assert ( ch == '&' ); outputStr += "&"; } } ++runEnd; } runStart = runEnd; } } // AppendNodeValue
static void RDF_ResourcePropertyElement ( XMP_Node * xmpParent, const XML_Node & xmlNode, bool isTopLevel ) { if ( isTopLevel && (xmlNode.name == "iX:changes") ) return; // Strip old "punchcard" chaff. XMP_Node * newCompound = AddChildNode ( xmpParent, xmlNode, "", isTopLevel ); XML_cNodePos currAttr = xmlNode.attrs.begin(); XML_cNodePos endAttr = xmlNode.attrs.end(); for ( ; currAttr != endAttr; ++currAttr ) { XMP_VarString & attrName = (*currAttr)->name; if ( attrName == "xml:lang" ) { AddQualifierNode ( newCompound, **currAttr ); } else if ( attrName == "rdf:ID" ) { continue; // Ignore all rdf:ID attributes. } else { XMP_Throw ( "Invalid attribute for resource property element", kXMPErr_BadRDF ); } } XML_cNodePos currChild = xmlNode.content.begin(); XML_cNodePos endChild = xmlNode.content.end(); for ( ; currChild != endChild; ++currChild ) { if ( ! (*currChild)->IsWhitespaceNode() ) break; } if ( currChild == endChild ) XMP_Throw ( "Missing child of resource property element", kXMPErr_BadRDF ); if ( (*currChild)->kind != kElemNode ) XMP_Throw ( "Children of resource property element must be XML elements", kXMPErr_BadRDF ); if ( (*currChild)->name == "rdf:Bag" ) { newCompound->options |= kXMP_PropValueIsArray; } else if ( (*currChild)->name == "rdf:Seq" ) { newCompound->options |= kXMP_PropValueIsArray | kXMP_PropArrayIsOrdered; } else if ( (*currChild)->name == "rdf:Alt" ) { newCompound->options |= kXMP_PropValueIsArray | kXMP_PropArrayIsOrdered | kXMP_PropArrayIsAlternate; } else { newCompound->options |= kXMP_PropValueIsStruct; if ( (*currChild)->name != "rdf:Description" ) { XMP_VarString typeName ( (*currChild)->ns ); size_t colonPos = (*currChild)->name.find_first_of(':'); if ( colonPos == XMP_VarString::npos ) XMP_Throw ( "All XML elements must be in a namespace", kXMPErr_BadXMP ); typeName.append ( (*currChild)->name, colonPos, XMP_VarString::npos ); AddQualifierNode ( newCompound, XMP_VarString("rdf:type"), typeName ); } } RDF_NodeElement ( newCompound, **currChild, kNotTopLevel ); if ( newCompound->options & kRDF_HasValueElem ) { FixupQualifiedNode ( newCompound ); } else if ( newCompound->options & kXMP_PropArrayIsAlternate ) { DetectAltText ( newCompound ); } for ( ++currChild; currChild != endChild; ++currChild ) { if ( ! (*currChild)->IsWhitespaceNode() ) XMP_Throw ( "Invalid child of resource property element", kXMPErr_BadRDF ); } } // RDF_ResourcePropertyElement
/* class static */ void XMPUtils::SeparateArrayItems ( XMPMeta * xmpObj, XMP_StringPtr schemaNS, XMP_StringPtr arrayName, XMP_OptionBits options, XMP_StringPtr catedStr ) { XMP_Assert ( (schemaNS != 0) && (arrayName != 0) && (catedStr != 0) ); // ! Enforced by wrapper. XMP_VarString itemValue; size_t itemStart, itemEnd; size_t nextSize, charSize = 0; // Avoid VS uninit var warnings. UniCharKind nextKind, charKind = UCK_normal; UniCodePoint nextChar, uniChar = 0; // Extract "special" option bits, verify and normalize the others. bool preserveCommas = false; if ( options & kXMPUtil_AllowCommas ) { preserveCommas = true; options ^= kXMPUtil_AllowCommas; } options = VerifySetOptions ( options, 0 ); // Keep a zero value, has special meaning below. if ( options & ~kXMP_PropArrayFormMask ) XMP_Throw ( "Options can only provide array form", kXMPErr_BadOptions ); // Find the array node, make sure it is OK. Move the current children aside, to be readded later if kept. XMP_ExpandedXPath arrayPath; ExpandXPath ( schemaNS, arrayName, &arrayPath ); XMP_Node * arrayNode = FindNode ( &xmpObj->tree, arrayPath, kXMP_ExistingOnly ); if ( arrayNode != 0 ) { // The array exists, make sure the form is compatible. Zero arrayForm means take what exists. XMP_OptionBits arrayForm = arrayNode->options & kXMP_PropArrayFormMask; if ( (arrayForm == 0) || (arrayForm & kXMP_PropArrayIsAlternate) ) { XMP_Throw ( "Named property must be non-alternate array", kXMPErr_BadXPath ); } if ( (options != 0) && (options != arrayForm) ) XMP_Throw ( "Mismatch of specified and existing array form", kXMPErr_BadXPath ); // *** Right error? } else { // The array does not exist, try to create it. arrayNode = FindNode ( &xmpObj->tree, arrayPath, kXMP_CreateNodes, (options | kXMP_PropValueIsArray) ); if ( arrayNode == 0 ) XMP_Throw ( "Failed to create named array", kXMPErr_BadXPath ); } XMP_NodeOffspring oldChildren ( arrayNode->children ); size_t oldChildCount = oldChildren.size(); arrayNode->children.clear(); // Extract the item values one at a time, until the whole input string is done. Be very careful // in the extraction about the string positions. They are essentially byte pointers, while the // contents are UTF-8. Adding or subtracting 1 does not necessarily move 1 Unicode character! size_t endPos = strlen ( catedStr ); itemEnd = 0; while ( itemEnd < endPos ) { // Skip any leading spaces and separation characters. Always skip commas here. They can be // kept when within a value, but not when alone between values. for ( itemStart = itemEnd; itemStart < endPos; itemStart += charSize ) { ClassifyCharacter ( catedStr, itemStart, &charKind, &charSize, &uniChar ); if ( (charKind == UCK_normal) || (charKind == UCK_quote) ) break; } if ( itemStart >= endPos ) break; if ( charKind != UCK_quote ) { // This is not a quoted value. Scan for the end, create an array item from the substring. for ( itemEnd = itemStart; itemEnd < endPos; itemEnd += charSize ) { ClassifyCharacter ( catedStr, itemEnd, &charKind, &charSize, &uniChar ); if ( (charKind == UCK_normal) || (charKind == UCK_quote) ) continue; if ( (charKind == UCK_comma) && preserveCommas ) continue; if ( charKind != UCK_space ) break; if ( (itemEnd + charSize) >= endPos ) break; // Anything left? ClassifyCharacter ( catedStr, (itemEnd+charSize), &nextKind, &nextSize, &nextChar ); if ( (nextKind == UCK_normal) || (nextKind == UCK_quote) ) continue; if ( (nextKind == UCK_comma) && preserveCommas ) continue; break; // Have multiple spaces, or a space followed by a separator. } itemValue.assign ( catedStr, itemStart, (itemEnd - itemStart) ); } else { // Accumulate quoted values into a local string, undoubling internal quotes that // match the surrounding quotes. Do not undouble "unmatching" quotes. UniCodePoint openQuote = uniChar; UniCodePoint closeQuote = GetClosingQuote ( openQuote ); itemStart += charSize; // Skip the opening quote; itemValue.erase(); for ( itemEnd = itemStart; itemEnd < endPos; itemEnd += charSize ) { ClassifyCharacter ( catedStr, itemEnd, &charKind, &charSize, &uniChar ); if ( (charKind != UCK_quote) || (! IsSurroundingQuote ( uniChar, openQuote, closeQuote)) ) { // This is not a matching quote, just append it to the item value. itemValue.append ( catedStr, itemEnd, charSize ); } else { // This is a "matching" quote. Is it doubled, or the final closing quote? Tolerate // various edge cases like undoubled opening (non-closing) quotes, or end of input. if ( (itemEnd + charSize) < endPos ) { ClassifyCharacter ( catedStr, itemEnd+charSize, &nextKind, &nextSize, &nextChar ); } else { nextKind = UCK_semicolon; nextSize = 0; nextChar = 0x3B; } if ( uniChar == nextChar ) { // This is doubled, copy it and skip the double. itemValue.append ( catedStr, itemEnd, charSize ); itemEnd += nextSize; // Loop will add in charSize. } else if ( ! IsClosingingQuote ( uniChar, openQuote, closeQuote ) ) { // This is an undoubled, non-closing quote, copy it. itemValue.append ( catedStr, itemEnd, charSize ); } else { // This is an undoubled closing quote, skip it and exit the loop. itemEnd += charSize; break; } } } // Loop to accumulate the quoted value. } // Add the separated item to the array. Keep a matching old value in case it had separators. size_t oldChild; for ( oldChild = 0; oldChild < oldChildCount; ++oldChild ) { if ( (oldChildren[oldChild] != 0) && (itemValue == oldChildren[oldChild]->value) ) break; } XMP_Node * newItem = 0; if ( oldChild == oldChildCount ) { newItem = new XMP_Node ( arrayNode, kXMP_ArrayItemName, itemValue.c_str(), 0 ); } else { newItem = oldChildren[oldChild]; oldChildren[oldChild] = 0; // ! Don't match again, let duplicates be seen. } arrayNode->children.push_back ( newItem ); } // Loop through all of the returned items. // Delete any of the old children that were not kept. for ( size_t i = 0; i < oldChildCount; ++i ) { if ( oldChildren[i] != 0 ) delete oldChildren[i]; } } // SeparateArrayItems
static void ApplyQuotes ( XMP_VarString * item, UniCodePoint openQuote, UniCodePoint closeQuote, bool allowCommas ) { bool prevSpace = false; size_t charOffset, charLen; UniCharKind charKind; UniCodePoint uniChar; // ----------------------------------------------------------------------------------------- // See if there are any separators in the value. Stop at the first occurrance. This is a bit // tricky in order to make typical typing work conveniently. The purpose of applying quotes // is to preserve the values when splitting them back apart. That is CatenateContainerItems // and SeparateContainerItems must round trip properly. For the most part we only look for // separators here. Internal quotes, as in -- Irving "Bud" Jones -- won't cause problems in // the separation. An initial quote will though, it will make the value look quoted. charOffset = 0; ClassifyCharacter ( item->c_str(), charOffset, &charKind, &charLen, &uniChar ); if ( charKind != UCK_quote ) { for ( charOffset = 0; size_t(charOffset) < item->size(); charOffset += charLen ) { ClassifyCharacter ( item->c_str(), charOffset, &charKind, &charLen, &uniChar ); if ( charKind == UCK_space ) { if ( prevSpace ) break; // Multiple spaces are a separator. prevSpace = true; } else { prevSpace = false; if ( (charKind == UCK_semicolon) || (charKind == UCK_control) ) break; if ( (charKind == UCK_comma) && (! allowCommas) ) break; } } } if ( size_t(charOffset) < item->size() ) { // -------------------------------------------------------------------------------------- // Create a quoted copy, doubling any internal quotes that match the outer ones. Internal // quotes did not stop the "needs quoting" search, but they do need doubling. So we have // to rescan the front of the string for quotes. Handle the special case of U+301D being // closed by either U+301E or U+301F. XMP_VarString newItem; size_t splitPoint; for ( splitPoint = 0; splitPoint <= charOffset; ++splitPoint ) { ClassifyCharacter ( item->c_str(), splitPoint, &charKind, &charLen, &uniChar ); if ( charKind == UCK_quote ) break; } CodePointToUTF8 ( openQuote, newItem ); newItem.append ( *item, 0, splitPoint ); // Copy the leading "normal" portion. for ( charOffset = splitPoint; size_t(charOffset) < item->size(); charOffset += charLen ) { ClassifyCharacter ( item->c_str(), charOffset, &charKind, &charLen, &uniChar ); newItem.append ( *item, charOffset, charLen ); if ( (charKind == UCK_quote) && IsSurroundingQuote ( uniChar, openQuote, closeQuote ) ) { newItem.append ( *item, charOffset, charLen ); } } XMP_VarString closeStr; CodePointToUTF8 ( closeQuote, closeStr ); newItem.append ( closeStr ); *item = newItem; } } // ApplyQuotes