tb_long_t tb_charset_conv_cstr(tb_size_t ftype, tb_size_t ttype, tb_char_t const* cstr, tb_byte_t* data, tb_size_t size) { // check tb_assert_and_check_return_val(TB_CHARSET_TYPE_OK(ftype) && TB_CHARSET_TYPE_OK(ttype) && cstr && data && size, -1); // conv return tb_charset_conv_data(ftype, ttype, (tb_byte_t const*)cstr, tb_strlen(cstr), data, size); }
static tb_bool_t tb_oc_bplist_writer_func_string(tb_oc_bplist_writer_t* writer, tb_object_ref_t object, tb_size_t item_size) { // check tb_assert_and_check_return_val(writer && writer->stream && object, tb_false); #if 0 // writ utf8 return tb_oc_bplist_writer_func_rdata(writer, TB_OBJECT_BPLIST_TYPE_STRING, tb_oc_string_cstr(object), tb_oc_string_size(object), item_size); #else // writ utf16 tb_char_t const* utf8 = tb_oc_string_cstr(object); tb_size_t size = tb_oc_string_size(object); if (utf8 && size) { #ifdef TB_CONFIG_MODULE_HAVE_CHARSET // done tb_bool_t ok = tb_false; tb_char_t* utf16 = tb_null; tb_size_t osize = 0; do { // init utf16 data utf16 = tb_malloc_cstr((size + 1) << 2); tb_assert_and_check_break(utf16); // utf8 to utf16 osize = tb_charset_conv_data(TB_CHARSET_TYPE_UTF8, TB_CHARSET_TYPE_UTF16, (tb_byte_t const*)utf8, size, (tb_byte_t*)utf16, (size + 1) << 2); tb_assert_and_check_break(osize > 0 && osize < (size + 1) << 2); tb_assert_and_check_break(!(osize & 1)); // ok ok = tb_true; } while (0); // ok? if (ok) { // only ascii? writ utf8 if (osize == (size << 1)) ok = tb_oc_bplist_writer_func_rdata(writer, TB_OBJECT_BPLIST_TYPE_STRING, (tb_byte_t*)utf8, size, item_size); // writ utf16 else ok = tb_oc_bplist_writer_func_rdata(writer, TB_OBJECT_BPLIST_TYPE_UNICODE, (tb_byte_t*)utf16, osize >> 1, item_size); } // exit utf16 if (utf16) tb_free(utf16); utf16 = tb_null; #else // writ utf8 only tb_bool_t ok = tb_oc_bplist_writer_func_rdata(writer, TB_OBJECT_BPLIST_TYPE_STRING, (tb_byte_t*)utf8, size, item_size); #endif // ok? return ok; }
tb_size_t tb_wcstombs(tb_char_t* s1, tb_wchar_t const* s2, tb_size_t n) { // check tb_assert_and_check_return_val(s1 && s2, 0); // init tb_long_t r = 0; tb_size_t l = tb_wcslen(s2); // atow if (l) { tb_size_t e = (sizeof(tb_wchar_t) == 4)? TB_CHARSET_TYPE_UCS4 : TB_CHARSET_TYPE_UCS2; r = tb_charset_conv_data(e | TB_CHARSET_TYPE_LE, TB_CHARSET_TYPE_UTF8, (tb_byte_t const*)s2, l * sizeof(tb_wchar_t), (tb_byte_t*)s1, n); } // strip if (r >= 0) s1[r] = '\0'; // ok? return r > 0? r : -1; }
static tb_object_ref_t tb_object_bplist_reader_func_string(tb_object_bplist_reader_t* reader, tb_size_t type, tb_size_t size, tb_size_t item_size) { // check tb_assert_and_check_return_val(reader && reader->stream, tb_null); // init tb_char_t* utf8 = tb_null; tb_char_t* utf16 = tb_null; tb_object_ref_t object = tb_null; // read switch (type) { case TB_OBJECT_BPLIST_TYPE_STRING: { // size is too large? if (size == 0x0f) { // read size tb_long_t val = tb_object_bplist_reader_func_size(reader, item_size); tb_assert_and_check_return_val(val >= 0, tb_null); size = (tb_size_t)val; } // read string if (size) { // init utf8 utf8 = tb_malloc_cstr(size + 1); tb_assert_and_check_break(utf8); // read utf8 if (!tb_stream_bread(reader->stream, (tb_byte_t*)utf8, size)) break; utf8[size] = '\0'; } // init object object = tb_object_string_init_from_cstr(utf8); } break; case TB_OBJECT_BPLIST_TYPE_UNICODE: { #ifdef TB_CONFIG_MODULE_HAVE_CHARSET // size is too large? if (size == 0x0f) { // read size tb_long_t val = tb_object_bplist_reader_func_size(reader, item_size); tb_assert_and_check_return_val(val >= 0, tb_null); size = (tb_size_t)val; } // read string if (size) { // init utf8 & utf16 data utf8 = tb_malloc_cstr((size + 1) << 2); utf16 = tb_malloc_cstr(size << 1); tb_assert_and_check_break(utf8 && utf16); // read utf16 if (!tb_stream_bread(reader->stream, (tb_byte_t*)utf16, size << 1)) break; // utf16 to utf8 tb_long_t osize = tb_charset_conv_data(TB_CHARSET_TYPE_UTF16, TB_CHARSET_TYPE_UTF8, (tb_byte_t*)utf16, size << 1, (tb_byte_t*)utf8, (size + 1) << 2); tb_assert_and_check_break(osize > 0 && osize < (tb_long_t)((size + 1) << 2)); utf8[osize] = '\0'; // init object object = tb_object_string_init_from_cstr(utf8); } #else // trace tb_trace1_e("unicode type is not supported, please enable charset module config if you want to use it!"); #endif } break; default: break; } // exit if (utf8) tb_free(utf8); if (utf16) tb_free(utf16); // ok? return object; }