UErrorCode convsample_12() { printf("\n\n==============================================\n" "Sample 12: C: simple sjis -> unicode conversion\n"); // **************************** START SAMPLE ******************* char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 }; UChar target[100]; UErrorCode status = U_ZERO_ERROR; UConverter *conv; int32_t len; // set up the converter conv = ucnv_open("shift_jis", &status); assert(U_SUCCESS(status)); // convert to Unicode // Note: we can use strlen, we know it's an 8 bit null terminated codepage target[6] = 0xFDCA; len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status); U_ASSERT(status); // close the converter ucnv_close(conv); // ***************************** END SAMPLE ******************** // Print it out printBytes("src", source, strlen(source) ); printf("\n"); printUChars("targ", target, len); return U_ZERO_ERROR; }
UErrorCode convsample_02() { printf("\n\n==============================================\n" "Sample 02: C: simple Unicode -> koi8-r conversion\n"); // **************************** START SAMPLE ******************* // "cat<cat>OK" UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432, 0x0430, 0x0021, 0x0000 }; char target[100]; UErrorCode status = U_ZERO_ERROR; UConverter *conv; int32_t len; // set up the converter conv = ucnv_open("koi8-r", &status); assert(U_SUCCESS(status)); // convert to koi8-r len = ucnv_fromUChars(conv, target, 100, source, -1, &status); assert(U_SUCCESS(status)); // close the converter ucnv_close(conv); // ***************************** END SAMPLE ******************** // Print it out printUChars("src", source); printf("\n"); printBytes("targ", target, len); return U_ZERO_ERROR; }
void printUChar(UChar32 ch32) { if(ch32 > 0xFFFF) { printf("ch: U+%06X\n", ch32); } else { UChar ch = (UChar)ch32; printUChars("C", &ch, 1); } }
static void TestAppendChar(){ static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}; static const uint32_t test[]={ /*append-position(unsafe), CHAR to be appended */ 0, 0x10401, 2, 0x0028, 2, 0x007f, 3, 0xd801, 1, 0x20402, 8, 0x10401, 5, 0xc0, 5, 0xc1, 5, 0xfd, 6, 0x80, 6, 0x81, 6, 0xbf, 7, 0xfe, /*append-position(safe), CHAR to be appended */ 0, 0x10401, 2, 0x0028, 3, 0x7f, 3, 0xd801, /* illegal for UTF-8 starting with Unicode 3.2 */ 1, 0x20402, 9, 0x10401, 5, 0xc0, 5, 0xc1, 5, 0xfd, 6, 0x80, 6, 0x81, 6, 0xbf, 7, 0xfe, }; static const uint16_t movedOffset[]={ /*offset-moved-to(unsafe)*/ 4, /*for append-pos: 0 , CHAR 0x10401*/ 3, 3, 6, 5, 12, 7, 7, 7, 8, 8, 8, 9, /*offset-moved-to(safe)*/ 4, /*for append-pos: 0, CHAR 0x10401*/ 3, 4, 6, 5, 11, 7, 7, 7, 8, 8, 8, 9, }; static const uint8_t result[][11]={ /*unsafe*/ {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90}, {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, /*safe*/ {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/ {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, }; uint16_t i, count=0; uint8_t str[12]; uint32_t offset; /* UChar32 c=0;*/ uint16_t size=sizeof(s)/sizeof(s[0]); for(i=0; i<sizeof(test)/sizeof(test[0]); i=(uint16_t)(i+2)){ uprv_memcpy(str, s, size); offset=test[i]; if(count<13){ UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); if(offset != movedOffset[count]){ log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", count, movedOffset[count], offset); } if(uprv_memcmp(str, result[count], size) !=0){ log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count); printUChars(result[count], size); log_err("\nGot: "); printUChars(str, size); log_err("\n"); } }else{ UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]); if(offset != movedOffset[count]){ log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", count, movedOffset[count], offset); } if(uprv_memcmp(str, result[count], size) !=0){ log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count); printUChars(result[count], size); log_err("\nGot: "); printUChars(str, size); log_err("\n"); } /*call the API instead of MACRO uprv_memcpy(str, s, size); offset=test[i]; c=test[i+1]; if((uint32_t)(c)<=0x7f) { (str)[(offset)++]=(uint8_t)(c); } else { (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c); } if(offset != movedOffset[count]){ log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", count, movedOffset[count], offset); } if(uprv_memcmp(str, result[count], size) !=0){ log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count); printUChars(result[count], size); printf("\nGot: "); printUChars(str, size); printf("\n"); } */ } count++; } }
UBool convsample_21_didSubstitute(const char *source) { UChar uchars[100]; char bytes[100]; UConverter *conv = NULL, *cloneCnv = NULL; UErrorCode status = U_ZERO_ERROR; uint32_t len, len2; int32_t cloneLen; UBool flagVal = FALSE; UConverterFromUCallback junkCB; FromUFLAGContext *flagCtx = NULL, *cloneFlagCtx = NULL; debugCBContext *debugCtx1 = NULL, *debugCtx2 = NULL, *cloneDebugCtx = NULL; printf("\n\n==============================================\n" "Sample 21: C: Test for substitution w/ callbacks & clones \n"); /* print out the original source */ printBytes("src", source); printf("\n"); /* First, convert from UTF8 to unicode */ conv = ucnv_open("utf-8", &status); U_ASSERT(status); len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status); U_ASSERT(status); printUChars("uch", uchars, len); printf("\n"); /* Now, close the converter */ ucnv_close(conv); /* Now, convert to windows-1252 */ conv = ucnv_open("windows-1252", &status); U_ASSERT(status); /* Converter starts out with the SUBSTITUTE callback set. */ /* initialize our callback */ /* from the 'bottom' innermost, out * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */ #if DEBUG_TMI printf("flagCB_fromU = %p\n", &flagCB_fromU); printf("debugCB_fromU = %p\n", &debugCB_fromU); #endif debugCtx1 = debugCB_openContext(); flagCtx = flagCB_fromU_openContext(); debugCtx2 = debugCB_openContext(); debugCtx1->subCallback = flagCB_fromU; /* debug1 -> flag */ debugCtx1->subContext = flagCtx; flagCtx->subCallback = debugCB_fromU; /* flag -> debug2 */ flagCtx->subContext = debugCtx2; debugCtx2->subCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE; debugCtx2->subContext = NULL; /* Set our special callback */ ucnv_setFromUCallBack(conv, debugCB_fromU, debugCtx1, &(debugCtx2->subCallback), &(debugCtx2->subContext), &status); U_ASSERT(status); #if DEBUG_TMI printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n", conv, debugCtx1, debugCtx1->subCallback, debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback); #endif cloneLen = 1; /* but passing in null so it will clone */ cloneCnv = ucnv_safeClone(conv, NULL, &cloneLen, &status); U_ASSERT(status); #if DEBUG_TMI printf("Cloned converter from %p -> %p. Closing %p.\n", conv, cloneCnv, conv); #endif ucnv_close(conv); #if DEBUG_TMI printf("%p closed.\n", conv); #endif U_ASSERT(status); /* Now, we have to extract the context */ cloneDebugCtx = NULL; cloneFlagCtx = NULL; ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx); if(cloneDebugCtx != NULL) { cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext; } printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n", cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL ); len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status); U_ASSERT(status); if(cloneFlagCtx != NULL) { flagVal = cloneFlagCtx->flag; /* it's about to go away when we close the cnv */ } else { printf("** Warning, couldn't get the subcallback \n"); } ucnv_close(cloneCnv); /* print out the original source */ printBytes("bytes", bytes, len2); return flagVal; /* true if callback was called */ }
UBool convsample_20_didSubstitute(const char *source) { UChar uchars[100]; char bytes[100]; UConverter *conv = NULL; UErrorCode status = U_ZERO_ERROR; uint32_t len, len2; UBool flagVal; FromUFLAGContext * context = NULL; printf("\n\n==============================================\n" "Sample 20: C: Test for substitution using callbacks\n"); /* print out the original source */ printBytes("src", source); printf("\n"); /* First, convert from UTF8 to unicode */ conv = ucnv_open("utf-8", &status); U_ASSERT(status); len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status); U_ASSERT(status); printUChars("uch", uchars, len); printf("\n"); /* Now, close the converter */ ucnv_close(conv); /* Now, convert to windows-1252 */ conv = ucnv_open("windows-1252", &status); U_ASSERT(status); /* Converter starts out with the SUBSTITUTE callback set. */ /* initialize our callback */ context = flagCB_fromU_openContext(); /* Set our special callback */ ucnv_setFromUCallBack(conv, flagCB_fromU, context, &(context->subCallback), &(context->subContext), &status); U_ASSERT(status); len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status); U_ASSERT(status); flagVal = context->flag; /* it's about to go away when we close the cnv */ ucnv_close(conv); /* print out the original source */ printBytes("bytes", bytes, len2); return flagVal; /* true if callback was called */ }
static void TestAppendChar(){ static UChar s[5]={0x0061, 0x0062, 0x0063, 0x0064, 0x0000}; static uint32_t test[]={ /*append-position(unsafe), CHAR to be appended */ 0, 0x20441, 2, 0x0028, 2, 0xdc00, 3, 0xd800, 1, 0x20402, /*append-position(safe), CHAR to be appended */ 0, 0x20441, 2, 0xdc00, 3, 0xd800, 1, 0x20402, 3, 0x20402, 3, 0x10402, 2, 0x10402, }; static uint16_t movedOffset[]={ /*offset-moved-to(unsafe)*/ 2, /*for append-pos: 0 , CHAR 0x20441*/ 3, 3, 4, 3, /*offse-moved-to(safe)*/ 2, /*for append-pos: 0, CHAR 0x20441*/ 3, 4, 3, 4, 4, 4 }; static UChar result[][5]={ /*unsafe*/ {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000}, {0x0061, 0x0062, 0x0028, 0x0064, 0x0000}, {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000}, {0x0061, 0x0062, 0x0063, 0xd800, 0x0000}, {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000}, /*safe*/ {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000}, {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000}, {0x0061, 0x0062, 0x0063, 0xd800, 0x0000}, {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000}, {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000}, {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000}, {0x0061, 0x0062, 0xd801, 0xdc02, 0x0000}, }; uint16_t i, count=0; UChar *str=(UChar*)malloc(sizeof(UChar) * (u_strlen(s)+1)); uint16_t offset; for(i=0; i<sizeof(test)/sizeof(test[0]); i=(uint16_t)(i+2)){ if(count<5){ u_strcpy(str, s); offset=(uint16_t)test[i]; UTF16_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); if(offset != movedOffset[count]){ log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", count, movedOffset[count], offset); } if(u_strcmp(str, result[count]) !=0){ log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed for count=%d. Expected:", count); printUChars(result[count]); printf("\nGot:"); printUChars(str); printf("\n"); } }else{ u_strcpy(str, s); offset=(uint16_t)test[i]; UTF16_APPEND_CHAR_SAFE(str, offset, (uint16_t)u_strlen(str), test[i+1]); if(offset != movedOffset[count]){ log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", count, movedOffset[count], offset); } if(u_strcmp(str, result[count]) !=0){ log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed for count=%d. Expected:", count); printUChars(result[count]); printf("\nGot:"); printUChars(str); printf("\n"); } } count++; } free(str); }