static void demoUnicodeStringInit() { // *** Make sure to read about invariant characters in utypes.h! *** // Initialization of Unicode strings from C literals works _only_ for // invariant characters! printf("\n* demoUnicodeStringInit() ---------- ***\n\n"); // the string literal is 32 chars long - this must be counted for the macro UnicodeString invariantOnly=UNICODE_STRING("such characters are safe 123 %-.", 32); /* * In C, we need two macros: one to declare the UChar[] array, and * one to populate it; the second one is a noop on platforms where * wchar_t is compatible with UChar and ASCII-based. * The length of the string literal must be counted for both macros. */ /* declare the invString array for the string */ U_STRING_DECL(invString, "such characters are safe 123 %-.", 32); /* populate it with the characters */ U_STRING_INIT(invString, "such characters are safe 123 %-.", 32); // compare the C and C++ strings printf("C and C++ Unicode strings are equal: %d\n", invariantOnly==UnicodeString(TRUE, invString, 32)); /* * convert between char * and UChar * strings that * contain only invariant characters */ static const char *cs1="such characters are safe 123 %-."; static UChar us1[40]; static char cs2[40]; u_charsToUChars(cs1, us1, 33); /* include the terminating NUL */ u_UCharsToChars(us1, cs2, 33); printf("char * -> UChar * -> char * with only " "invariant characters: \"%s\"\n", cs2); // initialize a UnicodeString from a string literal that contains // escape sequences written with invariant characters // do not forget to duplicate the backslashes for ICU to see them // then, count each double backslash only once! UnicodeString german=UNICODE_STRING( "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", 64). unescape(); printUnicodeString("german UnicodeString from unescaping:\n ", german); /* * C: convert and unescape a char * string with only invariant * characters to fill a UChar * string */ UChar buffer[200]; int32_t length; length=u_unescape( "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", buffer, UPRV_LENGTHOF(buffer)); printf("german C Unicode string from char * unescaping: (length %d)\n ", length); printUnicodeString("", UnicodeString(buffer)); }
static void demoCaseMapInCPlusPlus() { /* * input= * "aB<capital sigma>" * "iI<small dotless i><capital dotted I> " * "<sharp s> <small lig. ffi>" * "<small final sigma><small sigma><capital sigma>" */ static const UChar input[]={ 0x61, 0x42, 0x3a3, 0x69, 0x49, 0x131, 0x130, 0x20, 0xdf, 0x20, 0xfb03, 0x3c2, 0x3c3, 0x3a3, 0 }; printf("\n* demoCaseMapInCPlusPlus() --------- ***\n\n"); UnicodeString s(input), t; const Locale &en=Locale::getEnglish(); Locale tr("tr"); /* * Full case mappings as in demoCaseMapInC(), using UnicodeString functions. * These functions modify the string object itself. * Since we want to keep the input string around, we copy it each time * and case-map the copy. */ printUnicodeString("input string: ", s); /* lowercase/English */ printUnicodeString("full-lowercased/en: ", (t=s).toLower(en)); /* lowercase/Turkish */ printUnicodeString("full-lowercased/tr: ", (t=s).toLower(tr)); /* uppercase/English */ printUnicodeString("full-uppercased/en: ", (t=s).toUpper(en)); /* uppercase/Turkish */ printUnicodeString("full-uppercased/tr: ", (t=s).toUpper(tr)); /* titlecase/English */ printUnicodeString("full-titlecased/en: ", (t=s).toTitle(NULL, en)); /* titlecase/Turkish */ printUnicodeString("full-titlecased/tr: ", (t=s).toTitle(NULL, tr)); /* case-folde/default */ printUnicodeString("full-case-folded/default: ", (t=s).foldCase(U_FOLD_CASE_DEFAULT)); /* case-folde/Turkic */ printUnicodeString("full-case-folded/Turkic: ", (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I)); }
int DoConvert ( char * lpInBuffer, int nInLen, char * lpOutBuffer, int& rnOutLen ) { #ifdef VERBOSE_DEBUGGING fprintf(stderr, "IcuTranslitEC.CppDoConvert() BEGIN\n"); #endif int hr = 0; UnicodeString sInOut; #ifdef _MSC_VER sInOut.setTo((UChar *)lpInBuffer, nInLen / 2); #else sInOut.setTo(lpInBuffer); #endif printUnicodeString("Will transliterate: ", sInOut); if( m_bLTR ) { if( m_pTForwards ) { #ifdef VERBOSE_DEBUGGING fprintf(stderr, "Doing forwards transliteration...\n"); #endif m_pTForwards->transliterate(sInOut); #ifdef VERBOSE_DEBUGGING fprintf(stderr, "Did forwards transliteration.\n"); #endif } else { #ifdef VERBOSE_DEBUGGING fprintf(stderr, "There is no forward transliterator.\n"); #endif hr = -1; } } else // !m_bLTR { if( m_pTBackwards ) { #ifdef VERBOSE_DEBUGGING fprintf(stderr, "Doing backwards transliteration...\n"); #endif m_pTBackwards->transliterate(sInOut); #ifdef VERBOSE_DEBUGGING fprintf(stderr, "Did backwards transliteration.\n"); #endif } else { #ifdef VERBOSE_DEBUGGING fprintf(stderr, "There is no backwards transliterator.\n"); #endif hr = -1; } } if (hr == 0) { printUnicodeString("Result of transliteration: ", sInOut); #ifdef VERBOSE_DEBUGGING fprintf(stderr, "sInOut.length %d\n", sInOut.length()); #endif #ifdef _MSC_VER UErrorCode err = U_ZERO_ERROR; int nLen = sInOut.extract((UChar *)lpOutBuffer, rnOutLen/sizeof(UChar) , err); if (nLen >= (int)rnOutLen || U_FAILURE(err)) #else int nLen = sInOut.extract(0, sInOut.length(), (char *)NULL); // "preflight" to get size if( nLen >= (int)rnOutLen ) #endif { #ifdef VERBOSE_DEBUGGING fprintf(stderr, "Length %d more than output buffer size %d\n", nLen, rnOutLen); #endif hr = -1; } else { #ifdef VERBOSE_DEBUGGING fprintf(stderr, "nLen %d < original rnOutLen %d\n", nLen, rnOutLen); #endif #ifdef _MSC_VER rnOutLen = nLen * sizeof(UChar); #else nLen = sInOut.extract(0, sInOut.length(), lpOutBuffer); rnOutLen = nLen; #endif #ifdef VERBOSE_DEBUGGING fprintf(stderr, "lpOutBuffer length = %u (should be %d)\n", (unsigned)strlen(lpOutBuffer), rnOutLen); fprintf(stderr, "lpOutBuffer: '%s'\n", lpOutBuffer); #endif } } return hr; }
int main( void ) { UFILE *out; UErrorCode status = U_ZERO_ERROR; out = u_finit(stdout, NULL, NULL); if(!out) { fprintf(stderr, "Could not initialize (finit()) over stdout! \n"); return 1; } ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, NULL, NULL, NULL, &status); if(U_FAILURE(status)) { u_fprintf(out, "Warning- couldn't set the substitute callback - err %s\n", u_errorName(status)); } /* End Demo boilerplate */ u_fprintf(out,"ICU Case Mapping Sample Program\n\n"); u_fprintf(out, "C++ Case Mapping\n\n"); UnicodeString string("This is a test"); /* lowercase = "istanbul" */ UChar lowercase[] = {0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0}; /* uppercase = "LATIN CAPITAL I WITH DOT ABOVE STANBUL" */ UChar uppercase[] = {0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4C, 0}; UnicodeString upper(uppercase); UnicodeString lower(lowercase); u_fprintf(out, "\nstring: "); printUnicodeString(out, string); string.toUpper(); /* string = "THIS IS A TEST" */ u_fprintf(out, "\ntoUpper(): "); printUnicodeString(out, string); string.toLower(); /* string = "this is a test" */ u_fprintf(out, "\ntoLower(): "); printUnicodeString(out, string); u_fprintf(out, "\n\nlowercase=%S, uppercase=%S\n", lowercase, uppercase); string = upper; string.toLower(Locale("tr", "TR")); /* Turkish lower case map string = lowercase */ u_fprintf(out, "\nupper.toLower: "); printUnicodeString(out, string); string = lower; string.toUpper(Locale("tr", "TR")); /* Turkish upper case map string = uppercase */ u_fprintf(out, "\nlower.toUpper: "); printUnicodeString(out, string); u_fprintf(out, "\nEnd C++ sample\n\n"); // Call the C version int rc = c_main(out); u_fclose(out); return rc; }
static void demoUnicodeStringStorage() { // These sample code lines illustrate how to use UnicodeString, and the // comments tell what happens internally. There are no APIs to observe // most of this programmatically, except for stepping into the code // with a debugger. // This is by design to hide such details from the user. int32_t i; printf("\n* demoUnicodeStringStorage() ------- ***\n\n"); // * UnicodeString with internally stored contents // instantiate a UnicodeString from a single code point // the few (2) UChars will be stored in the object itself UnicodeString one((UChar32)0x24001); // this copies the few UChars into the "two" object UnicodeString two=one; printf("length of short string copy: %d\n", two.length()); // set "one" to contain the 3 UChars from readonly // this setTo() variant copies the characters one.setTo(readonly, UPRV_LENGTHOF(readonly)); // * UnicodeString with allocated contents // build a longer string that will not fit into the object's buffer one+=UnicodeString(writeable, UPRV_LENGTHOF(writeable)); one+=one; one+=one; printf("length of longer string: %d\n", one.length()); // copying will use the same allocated buffer and increment the reference // counter two=one; printf("length of longer string copy: %d\n", two.length()); // * UnicodeString using readonly-alias to a const UChar array // construct a string that aliases a readonly buffer UnicodeString three(FALSE, readonly, UPRV_LENGTHOF(readonly)); printUnicodeString("readonly-alias string: ", three); // copy-on-write: any modification to the string results in // a copy to either the internal buffer or to a newly allocated one three.setCharAt(1, 0x39); printUnicodeString("readonly-aliasing string after modification: ", three); // the aliased array is not modified for(i=0; i<three.length(); ++i) { printf("readonly buffer[%d] after modifying its string: 0x%lx\n", i, readonly[i]); } // setTo() readonly alias one.setTo(FALSE, writeable, UPRV_LENGTHOF(writeable)); // copying the readonly-alias object with fastCopyFrom() (new in ICU 2.4) // will readonly-alias the same buffer two.fastCopyFrom(one); printUnicodeString("fastCopyFrom(readonly alias of \"writeable\" array): ", two); printf("verify that a fastCopyFrom(readonly alias) uses the same buffer pointer: %d (should be 1)\n", one.getBuffer()==two.getBuffer()); // a normal assignment will clone the contents (new in ICU 2.4) two=one; printf("verify that a regular copy of a readonly alias uses a different buffer pointer: %d (should be 0)\n", one.getBuffer()==two.getBuffer()); // * UnicodeString using writeable-alias to a non-const UChar array UnicodeString four(writeable, UPRV_LENGTHOF(writeable), UPRV_LENGTHOF(writeable)); printUnicodeString("writeable-alias string: ", four); // a modification writes through to the buffer four.setCharAt(1, 0x39); for(i=0; i<four.length(); ++i) { printf("writeable-alias backing buffer[%d]=0x%lx " "after modification\n", i, writeable[i]); } // a copy will not alias any more; // instead, it will get a copy of the contents into allocated memory two=four; two.setCharAt(1, 0x21); for(i=0; i<two.length(); ++i) { printf("writeable-alias backing buffer[%d]=0x%lx after " "modification of string copy\n", i, writeable[i]); } // setTo() writeable alias, capacity==length one.setTo(writeable, UPRV_LENGTHOF(writeable), UPRV_LENGTHOF(writeable)); // grow the string - it will not fit into the backing buffer any more // and will get copied before modification one.append((UChar)0x40); // shrink it back so it would fit one.truncate(one.length()-1); // we still operate on the copy one.setCharAt(1, 0x25); printf("string after growing too much and then shrinking[1]=0x%lx\n" " backing store for this[1]=0x%lx\n", one.charAt(1), writeable[1]); // if we need it in the original buffer, then extract() to it // extract() does not do anything if the string aliases that same buffer // i=min(one.length(), length of array) if(one.length()<UPRV_LENGTHOF(writeable)) { i=one.length(); } else { i=UPRV_LENGTHOF(writeable); } one.extract(0, i, writeable); for(i=0; i<UPRV_LENGTHOF(writeable); ++i) { printf("writeable-alias backing buffer[%d]=0x%lx after re-extract\n", i, writeable[i]); } }