static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) { UChar outChar; if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { const UChar* source = &outChar; *err = U_ZERO_ERROR; ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); return; } UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err); }
U_CAPI void U_EXPORT2 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode * err) { UConverter *converter; int32_t length; if(U_FAILURE(*err)) { return; } converter = args->converter; length = converter->subCharLen; if(length == 0) { return; } if(length < 0) { /* * Write/convert the substitution string. Its real length is -length. * Unlike the escape callback, we need not change the converter's * callback function because ucnv_setSubstString() verified that * the string can be converted, so we will not get a conversion error * and will not recurse. * At worst we should get a U_BUFFER_OVERFLOW_ERROR. */ const UChar *source = (const UChar *)converter->subChars; ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err); return; } if(converter->sharedData->impl->writeSub!=NULL) { converter->sharedData->impl->writeSub(args, offsetIndex, err); } else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) { /* TODO: Is this untestable because the MBCS converter has a writeSub function to call and the other converters don't use subChar1? */ ucnv_cbFromUWriteBytes(args, (const char *)&converter->subChar1, 1, offsetIndex, err); } else { ucnv_cbFromUWriteBytes(args, (const char *)converter->subChars, length, offsetIndex, err); } }
// Combines both gbkUrlEscapedEntityCallback and GBK character substitution. static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) { if (reason == UCNV_UNASSIGNED) { if (UChar outChar = fallbackForGBK(codePoint)) { const UChar* source = &outChar; *err = U_ZERO_ERROR; ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); return; } urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, err); return; } UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err); }
/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, *uses a clean copy (resetted) of the converter, to convert that unicode *escape sequence to the target codepage (if conversion failure happens then *we revert to substituting with subchar) */ U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( const void *context, UConverterFromUnicodeArgs *fromArgs, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode * err) { UChar valueString[VALUE_STRING_LENGTH]; int32_t valueStringLength = 0; int32_t i = 0; const UChar *myValueSource = NULL; UErrorCode err2 = U_ZERO_ERROR; UConverterFromUCallback original = NULL; const void *originalContext; UConverterFromUCallback ignoredCallback = NULL; const void *ignoredContext; if (reason > UCNV_IRREGULAR) { return; } ucnv_setFromUCallBack (fromArgs->converter, (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, &original, &originalContext, &err2); if (U_FAILURE (err2)) { *err = err2; return; } if(context==NULL) { while (i < length) { valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); } } else { switch(*((char*)context)) { case UCNV_PRV_ESCAPE_JAVA: while (i < length) { valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); } break; case UCNV_PRV_ESCAPE_C: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ if(length==2){ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); } else{ valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); } break; case UCNV_PRV_ESCAPE_XML_DEC: valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ if(length==2){ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); } else{ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); } valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ break; case UCNV_PRV_ESCAPE_XML_HEX: valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ if(length==2){ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); } else{ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); } valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ break; case UCNV_PRV_ESCAPE_UNICODE: valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ if (length == 2) { valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); } else { valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); } valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ break; case UCNV_PRV_ESCAPE_CSS2: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); /* Always add space character, becase the next character might be whitespace, which would erroneously be considered the termination of the escape sequence. */ valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; break; default: while (i < length) { valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); } } } myValueSource = valueString; /* reset the error */ *err = U_ZERO_ERROR; ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); ucnv_setFromUCallBack (fromArgs->converter, original, originalContext, &ignoredCallback, &ignoredContext, &err2); if (U_FAILURE (err2)) { *err = err2; return; } return; }