Exemple #1
0
// character_name {{{
static PyObject *
icu_character_name(PyObject *self, PyObject *args) {
    char name[512] = {0}; 
    int32_t sz = 0, alias = 0;
    UChar *buf;
    UErrorCode status = U_ZERO_ERROR;
    PyObject *palias = NULL, *result = NULL, *input = NULL;
    UChar32 code = 0;
  
    if (!PyArg_ParseTuple(args, "O|O", &input, &palias)) return NULL;

    if (palias != NULL && PyObject_IsTrue(palias)) alias = 1; 
    buf = python_to_icu(input, &sz, 1);
    if (buf == NULL) goto end; 
    U16_GET(buf, 0, 0, sz, code);
    if (alias) {
        sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status);
    } else {
        sz = u_charName(code, U_UNICODE_CHAR_NAME, name, 511, &status);
    }
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "Failed to get name for code"); goto end; }
    result = PyUnicode_DecodeUTF8(name, sz, "strict");
end:
    if (buf != NULL) free(buf);

    return result;
} // }}}
Exemple #2
0
/* Print a UChar if possible, in seven characters. */
void prettyPrintUChar(UChar c)
{
  if(  (c <= 0x007F) &&
       (isgraph(c))  ) {
    printf(" '%c'   ", (char)(0x00FF&c));
  } else if ( c > 0x007F ) {
    char buf[1000];
    UErrorCode status = U_ZERO_ERROR;
    int32_t o;

    o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
    if(U_SUCCESS(status) && (o>0) ) {
      buf[6] = 0;
      printf("%7s", buf);
    } else {
      printf(" ??????");
    }
  } else {
    switch((char)(c & 0x007F)) {
    case ' ':
      printf(" ' '   ");
      break;
    case '\t':
      printf(" \\t    ");
      break;
    case '\n':
      printf(" \\n    ");
      break;
    default:
      printf("  _    ");
      break;
    }
  }
}
Exemple #3
0
size_t
formatCharacterDescription (char *buffer, size_t size, int column, int row) {
  static char *const colours[] = {
    strtext("black"),
    strtext("blue"),
    strtext("green"),
    strtext("cyan"),
    strtext("red"),
    strtext("magenta"),
    strtext("brown"),
    strtext("light grey"),
    strtext("dark grey"),
    strtext("light blue"),
    strtext("light green"),
    strtext("light cyan"),
    strtext("light red"),
    strtext("light magenta"),
    strtext("yellow"),
    strtext("white")
  };

  size_t length;
  ScreenCharacter character;

  readScreen(column, row, 1, 1, &character);
  STR_BEGIN(buffer, size);

  {
    uint32_t text = character.text;

    STR_PRINTF("char %" PRIu32 " (U+%04" PRIX32 "): %s on %s",
               text, text,
               gettext(colours[character.attributes & 0X0F]),
               gettext(colours[(character.attributes & 0X70) >> 4]));
  }

  if (character.attributes & SCR_ATTR_BLINK) {
    STR_PRINTF(" %s", gettext("blink"));
  }

#ifdef HAVE_ICU
  {
    char name[0X40];
    UErrorCode error = U_ZERO_ERROR;

    u_charName(character.text, U_EXTENDED_CHAR_NAME, name, sizeof(name), &error);
    if (U_SUCCESS(error)) {
      STR_PRINTF(" [%s]", name);
    }
  }
#endif /* HAVE_ICU */

  length = STR_LENGTH;
  STR_END;
  return length;
}
Variant HHVM_STATIC_METHOD(IntlChar, charName,
                           const Variant& arg, int64_t choice) {
  GETCP(arg, cp);

  UErrorCode error = U_ZERO_ERROR;
  int32_t buffer_len = u_charName(cp, (UCharNameChoice)choice,
                                  nullptr, 0, &error);

  String buffer(buffer_len, ReserveString);
  error = U_ZERO_ERROR;
  buffer_len = u_charName(cp, (UCharNameChoice)choice,
                          buffer.bufferSlice().ptr, buffer_len + 1, &error);
  if (U_FAILURE(error)) {
    s_intl_error->setError(error, "Failure getting character name");
    return init_null();
  }
  buffer.setSize(buffer_len);
  return buffer;
}
Exemple #5
0
/**
 * Implements {@link Transliterator#handleTransliterate}.
 * Ignore isIncremental since we don't need the context, and
 * we work on codepoints.
 */
void UnicodeNameTransliterator::handleTransliterate(Replaceable & text, UTransPosition & offsets,
        UBool /*isIncremental*/) const
{
	// The failure mode, here and below, is to behave like Any-Null,
	// if either there is no name data (max len == 0) or there is no
	// memory (malloc() => NULL).

	int32_t maxLen = uprv_getMaxCharNameLength();
	if (maxLen == 0)
	{
		offsets.start = offsets.limit;
		return;
	}

	// Accomodate the longest possible name plus padding
	char * buf = (char *) uprv_malloc(maxLen);
	if (buf == NULL)
	{
		offsets.start = offsets.limit;
		return;
	}

	int32_t cursor = offsets.start;
	int32_t limit = offsets.limit;

	UnicodeString str(FALSE, OPEN_DELIM, OPEN_DELIM_LEN);
	UErrorCode status;
	int32_t len;

	while (cursor < limit)
	{
		UChar32 c = text.char32At(cursor);
		int32_t clen = UTF_CHAR_LENGTH(c);
		status = U_ZERO_ERROR;
		if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, maxLen, &status)) > 0 && !U_FAILURE(status))
		{
			str.truncate(OPEN_DELIM_LEN);
			str.append(UnicodeString(buf, len, US_INV)).append(CLOSE_DELIM);
			text.handleReplaceBetween(cursor, cursor + clen, str);
			len += OPEN_DELIM_LEN + 1; // adjust for delimiters
			cursor += len; // advance cursor and adjust for new text
			limit += len - clen; // change in length
		}
		else
		{
			cursor += clen;
		}
	}

	offsets.contextLimit += limit - offsets.limit;
	offsets.limit = limit;
	offsets.start = cursor;

	uprv_free(buf);
}
Exemple #6
0
int
getCharacterName (wchar_t character, char *buffer, size_t size) {
#ifdef HAVE_ICU
  UErrorCode error = U_ZERO_ERROR;

  u_charName(character, U_EXTENDED_CHAR_NAME, buffer, size, &error);
  if (U_SUCCESS(error) && *buffer) return 1;
#endif /* HAVE_ICU */

  return 0;
}
Exemple #7
0
// character_name_from_code {{{
static PyObject *
icu_character_name_from_code(PyObject *self, PyObject *args) {
    char name[512] = {0}; 
    int32_t sz, alias = 0;
    UErrorCode status = U_ZERO_ERROR;
    PyObject *palias = NULL, *result = NULL;
    UChar32 code = 0;
  
    if (!PyArg_ParseTuple(args, "I|O", &code, &palias)) return NULL;

    if (palias != NULL && PyObject_IsTrue(palias)) alias = 1; 
    
    if (alias) {
        sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status);
    } else {
        sz = u_charName(code, U_UNICODE_CHAR_NAME, name, 511, &status);
    }
    if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "Failed to get name for code"); goto end; }
    result = PyUnicode_DecodeUTF8(name, sz, "strict");
end:
    return result;
} // }}}
static jstring Character_getNameImpl(JNIEnv* env, jclass, jint codePoint) {
    // U_UNICODE_CHAR_NAME gives us the modern names for characters. For control characters,
    // we need U_EXTENDED_CHAR_NAME to get "NULL" rather than "BASIC LATIN 0" and so on.
    // We could just use U_EXTENDED_CHAR_NAME except that it returns strings for characters
    // that aren't unassigned but that don't have names, and those strings aren't in the form
    // Java specifies.
    bool isControl = (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f));
    UCharNameChoice nameType = isControl ? U_EXTENDED_CHAR_NAME : U_UNICODE_CHAR_NAME;
    UErrorCode status = U_ZERO_ERROR;
    char buf[BUFSIZ]; // TODO: is there a more sensible upper bound?
    int32_t byteCount = u_charName(codePoint, nameType, &buf[0], sizeof(buf), &status);
    return (U_FAILURE(status) || byteCount == 0) ? NULL : env->NewStringUTF(buf);
}
Exemple #9
0
static void
printProps(UChar32 codePoint) {
    char buffer[100];
    UErrorCode errorCode;

    /* get the character name */
    errorCode=U_ZERO_ERROR;
    u_charName(codePoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);

    /* print the code point and the character name */
    printf("U+%04lx\t%s\n", codePoint, buffer);

    /* print some properties */
    printf("  general category (numeric enum value): %u\n", u_charType(codePoint));

    /* note: these APIs do not provide the data from SpecialCasing.txt */
    printf("  is lowercase: %d  uppercase: U+%04lx\n", u_islower(codePoint), u_toupper(codePoint));

    printf("  is digit: %d  decimal digit value: %d\n", u_isdigit(codePoint), u_charDigitValue(codePoint));

    printf("  BiDi directional category (numeric enum value): %u\n", u_charDirection(codePoint));
}
Exemple #10
0
int32_t __hs_u_charName(UChar32 code, UCharNameChoice nameChoice,
			char *buffer, int32_t bufferLength,
			UErrorCode *pErrorCode)
{
    return u_charName(code, nameChoice, buffer, bufferLength, pErrorCode);
}
Exemple #11
0
void get_unicode_info(const char* text, const icu::UnicodeString& us, Sqlite::Statement& insert) {
    bool allokay = true;
    for (const char* t = text; *t; ++t) {
        if (!(std::isalnum(*t) || *t == '_' || *t == ':' || *t == ' ' || *t == '.' || *t == '-')) {
            allokay = false;
            break;
        }
    }

    if (allokay) {
        return;
    }

    bool unusual = false;
    for (icu::StringCharacterIterator it(us); it.hasNext(); it.next()) {
        UChar32 codepoint = it.current32();
        int8_t chartype = u_charType(codepoint);
        if (! u_isprint(codepoint)) {
            unusual = true;
            break;
        }
        if (u_charDirection(codepoint) != 0) {
            unusual = true;
            break;
        }
        if (chartype !=  1 && // UPPERCASE_LETTER
            chartype !=  2 && // LOWERCASE_LETTER
            chartype !=  9 && // DECIMAL_DIGIT_NUMBER
            chartype != 12 && // SPACE_SEPARATOR
            chartype != 19 && // DASH_PUNCTUATION
            chartype != 22 && // CONNECTOR_PUNCTUATION
            chartype != 23) { // OTHER_PUNCTUATION
            unusual = true;
            break;
        }
    }

    if (unusual) {
        int num = 0;
        for (icu::StringCharacterIterator it(us); it.hasNext(); it.next(), ++num) {
            UChar32 codepoint = it.current32();

            int8_t chartype = u_charType(codepoint);

            char buffer[100];
            UErrorCode errorCode = U_ZERO_ERROR;
            u_charName(codepoint, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);

            UCharDirection direction = u_charDirection(codepoint);
            int32_t block = u_getIntPropertyValue(codepoint, UCHAR_BLOCK);

            icu::UnicodeString ustr(codepoint);
            std::string str;
            ustr.toUTF8String(str);

            char uplus[10];
            snprintf(uplus, 10, "U+%04x", codepoint);

            insert.
                bind_text(text).
                bind_int(num).
                bind_text(str.c_str()).
                bind_text(uplus).
                bind_int(block).
                bind_text(category_to_string(chartype)).
                bind_int(direction).
                bind_text(buffer).
                execute();
        }
    }
}