Exemplo n.º 1
0
static void TestAppend() {
    static const UChar32 codePoints[]={
        0x61, 0xdf, 0x901, 0x3040,
        0xac00, 0xd800, 0xdbff, 0xdcde,
        0xdffd, 0xe000, 0xffff, 0x10000,
        0x12345, 0xe0021, 0x10ffff, 0x110000,
        0x234567, 0x7fffffff, -1, -1000,
        0, 0x400
    };
    static const uint8_t expectUnsafe[]={
        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
        0xea, 0xb0, 0x80,  0xed, 0xa0, 0x80,  0xed, 0xaf, 0xbf,  0xed, 0xb3, 0x9e,
        0xed, 0xbf, 0xbd,  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
        /* none from this line */
        0,  0xd0, 0x80
    }, expectSafe[]={
        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
        0xea, 0xb0, 0x80,  /* no surrogates */
        /* no surrogates */  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
        /* none from this line */
        0,  0xd0, 0x80
    };

    uint8_t buffer[100];
    UChar32 c;
    int32_t i, length;
    UBool isError, expectIsError, wrongIsError;

    length=0;
    for(i=0; i<LENGTHOF(codePoints); ++i) {
        c=codePoints[i];
        if(c<0 || 0x10ffff<c) {
            continue; /* skip non-code points for U8_APPEND_UNSAFE */
        }

        U8_APPEND_UNSAFE(buffer, length, c);
    }
    if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
        log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
    }

    length=0;
    wrongIsError=FALSE;
    for(i=0; i<LENGTHOF(codePoints); ++i) {
        c=codePoints[i];
        expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
        isError=FALSE;

        U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
        wrongIsError|= isError!=expectIsError;
    }
    if(wrongIsError) {
        log_err("U8_APPEND did not set isError correctly\n");
    }
    if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
        log_err("U8_APPEND did not generate the expected output\n");
    }
}
Exemplo n.º 2
0
void
BUnicodeChar::ToUTF8(uint32 c, char **out)
{
	int i = 0;
	U8_APPEND_UNSAFE(*out, i, c);
	*out += i;
}
Exemplo n.º 3
0
Variant HHVM_STATIC_METHOD(IntlChar, foldCase,
                           const Variant& arg, int64_t options) {
  GETCP(arg, cp);
  auto ret = u_foldCase(cp, options);
  if (arg.isString()) {
    char buffer[5];
    int buffer_len = 0;
    U8_APPEND_UNSAFE(buffer, buffer_len, ret);
    return String(buffer, buffer_len, CopyString);
  } else {
    return ret;
  }
}
Exemplo n.º 4
0
Variant uchar_method(Class* self_, const Variant& arg) {
  GETCP(arg, cp);
  auto ret = T(cp);
  if (arg.isString()) {
    String buf(5, ReserveString);
    auto s = buf.bufferSlice().ptr;
    int s_len = 0;
    U8_APPEND_UNSAFE(s, s_len, ret);
    s[s_len] = 0;
    buf.setSize(s_len);
    return buf;
  } else {
    return ret;
  }
}
Exemplo n.º 5
0
	void ICUUnicodeSupport::_toLowerCase<1>(StringHolder<1> _str)
	{
		if(!_str.empty())
		{
			uint8_t* buf = &_str[0];
			int32_t len = _str.length();
			int32_t ofs = 0, ofs2 = 0;
			while(ofs != len)
			{
				UChar32 c;
				U8_NEXT(buf, ofs, len, c);
				c = u_tolower(c);
				U8_APPEND_UNSAFE( buf, ofs2, c);
			}
		}
	}
Exemplo n.º 6
0
CString TextCodecUTF8::encode(const UChar* characters, size_t length, UnencodableHandling)
{
    // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3.
    // BMP characters take only one UTF-16 code unit and can take up to 3 bytes (3x).
    // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes (2x).
    if (length > numeric_limits<size_t>::max() / 3)
        CRASH();
    Vector<uint8_t> bytes(length * 3);

    size_t i = 0;
    size_t bytesWritten = 0;
    while (i < length) {
        UChar32 character;
        U16_NEXT(characters, i, length, character);
        U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character);
    }

    return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten);
}
Exemplo n.º 7
0
    SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
        // Verify that the frozen set is equal to the unfrozen one.
        UnicodeSet set;
        char utf8[4];
        UChar32 c;
        int32_t length;

        for(c=0; c<=0x10ffff; ++c) {
            if(c==0xd800) {
                c=0xe000;
            }
            length=0;
            U8_APPEND_UNSAFE(utf8, length, c);
            if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
                set.add(c);
            }
        }
        if(set!=testcase.set) {
            fprintf(stderr, "error: frozen set != original!\n");
        }
    }
Exemplo n.º 8
0
CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length)
{
    // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3.
    // BMP characters take only one UTF-16 code unit and can take up to 3 bytes (3x).
    // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes (2x).
    if (length > numeric_limits<size_t>::max() / 3)
        CRASH();
    Vector<uint8_t> bytes(length * 3);

    size_t i = 0;
    size_t bytesWritten = 0;
    while (i < length) {
        UChar32 character;
        U16_NEXT(characters, i, length, character);
        // U16_NEXT will simply emit a surrogate code point if an unmatched surrogate
        // is encountered; we must convert it to a U+FFFD (REPLACEMENT CHARACTER) here.
        if (0xD800 <= character && character <= 0xDFFF)
            character = replacementCharacter;
        U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character);
    }

    return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten);
}