// character_name {{{ static PyObject * icu_character_name(PyObject *self, PyObject *args) { char name[512] = {0}; int32_t sz = 0, alias = 0; UChar *buf; UErrorCode status = U_ZERO_ERROR; PyObject *palias = NULL, *result = NULL, *input = NULL; UChar32 code = 0; if (!PyArg_ParseTuple(args, "O|O", &input, &palias)) return NULL; if (palias != NULL && PyObject_IsTrue(palias)) alias = 1; buf = python_to_icu(input, &sz, 1); if (buf == NULL) goto end; U16_GET(buf, 0, 0, sz, code); if (alias) { sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status); } else { sz = u_charName(code, U_UNICODE_CHAR_NAME, name, 511, &status); } if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "Failed to get name for code"); goto end; } result = PyUnicode_DecodeUTF8(name, sz, "strict"); end: if (buf != NULL) free(buf); return result; } // }}}
/* Print a UChar if possible, in seven characters. */ void prettyPrintUChar(UChar c) { if( (c <= 0x007F) && (isgraph(c)) ) { printf(" '%c' ", (char)(0x00FF&c)); } else if ( c > 0x007F ) { char buf[1000]; UErrorCode status = U_ZERO_ERROR; int32_t o; o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status); if(U_SUCCESS(status) && (o>0) ) { buf[6] = 0; printf("%7s", buf); } else { printf(" ??????"); } } else { switch((char)(c & 0x007F)) { case ' ': printf(" ' ' "); break; case '\t': printf(" \\t "); break; case '\n': printf(" \\n "); break; default: printf(" _ "); break; } } }
size_t formatCharacterDescription (char *buffer, size_t size, int column, int row) { static char *const colours[] = { strtext("black"), strtext("blue"), strtext("green"), strtext("cyan"), strtext("red"), strtext("magenta"), strtext("brown"), strtext("light grey"), strtext("dark grey"), strtext("light blue"), strtext("light green"), strtext("light cyan"), strtext("light red"), strtext("light magenta"), strtext("yellow"), strtext("white") }; size_t length; ScreenCharacter character; readScreen(column, row, 1, 1, &character); STR_BEGIN(buffer, size); { uint32_t text = character.text; STR_PRINTF("char %" PRIu32 " (U+%04" PRIX32 "): %s on %s", text, text, gettext(colours[character.attributes & 0X0F]), gettext(colours[(character.attributes & 0X70) >> 4])); } if (character.attributes & SCR_ATTR_BLINK) { STR_PRINTF(" %s", gettext("blink")); } #ifdef HAVE_ICU { char name[0X40]; UErrorCode error = U_ZERO_ERROR; u_charName(character.text, U_EXTENDED_CHAR_NAME, name, sizeof(name), &error); if (U_SUCCESS(error)) { STR_PRINTF(" [%s]", name); } } #endif /* HAVE_ICU */ length = STR_LENGTH; STR_END; return length; }
Variant HHVM_STATIC_METHOD(IntlChar, charName, const Variant& arg, int64_t choice) { GETCP(arg, cp); UErrorCode error = U_ZERO_ERROR; int32_t buffer_len = u_charName(cp, (UCharNameChoice)choice, nullptr, 0, &error); String buffer(buffer_len, ReserveString); error = U_ZERO_ERROR; buffer_len = u_charName(cp, (UCharNameChoice)choice, buffer.bufferSlice().ptr, buffer_len + 1, &error); if (U_FAILURE(error)) { s_intl_error->setError(error, "Failure getting character name"); return init_null(); } buffer.setSize(buffer_len); return buffer; }
/** * Implements {@link Transliterator#handleTransliterate}. * Ignore isIncremental since we don't need the context, and * we work on codepoints. */ void UnicodeNameTransliterator::handleTransliterate(Replaceable & text, UTransPosition & offsets, UBool /*isIncremental*/) const { // The failure mode, here and below, is to behave like Any-Null, // if either there is no name data (max len == 0) or there is no // memory (malloc() => NULL). int32_t maxLen = uprv_getMaxCharNameLength(); if (maxLen == 0) { offsets.start = offsets.limit; return; } // Accomodate the longest possible name plus padding char * buf = (char *) uprv_malloc(maxLen); if (buf == NULL) { offsets.start = offsets.limit; return; } int32_t cursor = offsets.start; int32_t limit = offsets.limit; UnicodeString str(FALSE, OPEN_DELIM, OPEN_DELIM_LEN); UErrorCode status; int32_t len; while (cursor < limit) { UChar32 c = text.char32At(cursor); int32_t clen = UTF_CHAR_LENGTH(c); status = U_ZERO_ERROR; if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, maxLen, &status)) > 0 && !U_FAILURE(status)) { str.truncate(OPEN_DELIM_LEN); str.append(UnicodeString(buf, len, US_INV)).append(CLOSE_DELIM); text.handleReplaceBetween(cursor, cursor + clen, str); len += OPEN_DELIM_LEN + 1; // adjust for delimiters cursor += len; // advance cursor and adjust for new text limit += len - clen; // change in length } else { cursor += clen; } } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; offsets.start = cursor; uprv_free(buf); }
int getCharacterName (wchar_t character, char *buffer, size_t size) { #ifdef HAVE_ICU UErrorCode error = U_ZERO_ERROR; u_charName(character, U_EXTENDED_CHAR_NAME, buffer, size, &error); if (U_SUCCESS(error) && *buffer) return 1; #endif /* HAVE_ICU */ return 0; }
// character_name_from_code {{{ static PyObject * icu_character_name_from_code(PyObject *self, PyObject *args) { char name[512] = {0}; int32_t sz, alias = 0; UErrorCode status = U_ZERO_ERROR; PyObject *palias = NULL, *result = NULL; UChar32 code = 0; if (!PyArg_ParseTuple(args, "I|O", &code, &palias)) return NULL; if (palias != NULL && PyObject_IsTrue(palias)) alias = 1; if (alias) { sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status); } else { sz = u_charName(code, U_UNICODE_CHAR_NAME, name, 511, &status); } if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "Failed to get name for code"); goto end; } result = PyUnicode_DecodeUTF8(name, sz, "strict"); end: return result; } // }}}
static jstring Character_getNameImpl(JNIEnv* env, jclass, jint codePoint) { // U_UNICODE_CHAR_NAME gives us the modern names for characters. For control characters, // we need U_EXTENDED_CHAR_NAME to get "NULL" rather than "BASIC LATIN 0" and so on. // We could just use U_EXTENDED_CHAR_NAME except that it returns strings for characters // that aren't unassigned but that don't have names, and those strings aren't in the form // Java specifies. bool isControl = (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f)); UCharNameChoice nameType = isControl ? U_EXTENDED_CHAR_NAME : U_UNICODE_CHAR_NAME; UErrorCode status = U_ZERO_ERROR; char buf[BUFSIZ]; // TODO: is there a more sensible upper bound? int32_t byteCount = u_charName(codePoint, nameType, &buf[0], sizeof(buf), &status); return (U_FAILURE(status) || byteCount == 0) ? NULL : env->NewStringUTF(buf); }
static void printProps(UChar32 codePoint) { char buffer[100]; UErrorCode errorCode; /* get the character name */ errorCode=U_ZERO_ERROR; u_charName(codePoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode); /* print the code point and the character name */ printf("U+%04lx\t%s\n", codePoint, buffer); /* print some properties */ printf(" general category (numeric enum value): %u\n", u_charType(codePoint)); /* note: these APIs do not provide the data from SpecialCasing.txt */ printf(" is lowercase: %d uppercase: U+%04lx\n", u_islower(codePoint), u_toupper(codePoint)); printf(" is digit: %d decimal digit value: %d\n", u_isdigit(codePoint), u_charDigitValue(codePoint)); printf(" BiDi directional category (numeric enum value): %u\n", u_charDirection(codePoint)); }
int32_t __hs_u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode) { return u_charName(code, nameChoice, buffer, bufferLength, pErrorCode); }
void get_unicode_info(const char* text, const icu::UnicodeString& us, Sqlite::Statement& insert) { bool allokay = true; for (const char* t = text; *t; ++t) { if (!(std::isalnum(*t) || *t == '_' || *t == ':' || *t == ' ' || *t == '.' || *t == '-')) { allokay = false; break; } } if (allokay) { return; } bool unusual = false; for (icu::StringCharacterIterator it(us); it.hasNext(); it.next()) { UChar32 codepoint = it.current32(); int8_t chartype = u_charType(codepoint); if (! u_isprint(codepoint)) { unusual = true; break; } if (u_charDirection(codepoint) != 0) { unusual = true; break; } if (chartype != 1 && // UPPERCASE_LETTER chartype != 2 && // LOWERCASE_LETTER chartype != 9 && // DECIMAL_DIGIT_NUMBER chartype != 12 && // SPACE_SEPARATOR chartype != 19 && // DASH_PUNCTUATION chartype != 22 && // CONNECTOR_PUNCTUATION chartype != 23) { // OTHER_PUNCTUATION unusual = true; break; } } if (unusual) { int num = 0; for (icu::StringCharacterIterator it(us); it.hasNext(); it.next(), ++num) { UChar32 codepoint = it.current32(); int8_t chartype = u_charType(codepoint); char buffer[100]; UErrorCode errorCode = U_ZERO_ERROR; u_charName(codepoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode); UCharDirection direction = u_charDirection(codepoint); int32_t block = u_getIntPropertyValue(codepoint, UCHAR_BLOCK); icu::UnicodeString ustr(codepoint); std::string str; ustr.toUTF8String(str); char uplus[10]; snprintf(uplus, 10, "U+%04x", codepoint); insert. bind_text(text). bind_int(num). bind_text(str.c_str()). bind_text(uplus). bind_int(block). bind_text(category_to_string(chartype)). bind_int(direction). bind_text(buffer). execute(); } } }