static tb_object_ref_t tb_object_bin_reader_func_number(tb_object_bin_reader_t* reader, tb_size_t type, tb_uint64_t size) { // check tb_assert_and_check_return_val(reader && reader->stream && reader->list, tb_null); // the number type tb_size_t number_type = (tb_size_t)size; // read number tb_object_ref_t number = tb_null; switch (number_type) { case TB_NUMBER_TYPE_UINT64: number = tb_object_number_init_from_uint64(tb_stream_bread_u64_be(reader->stream)); break; case TB_NUMBER_TYPE_SINT64: number = tb_object_number_init_from_sint64(tb_stream_bread_s64_be(reader->stream)); break; case TB_NUMBER_TYPE_UINT32: number = tb_object_number_init_from_uint32(tb_stream_bread_u32_be(reader->stream)); break; case TB_NUMBER_TYPE_SINT32: number = tb_object_number_init_from_sint32(tb_stream_bread_s32_be(reader->stream)); break; case TB_NUMBER_TYPE_UINT16: number = tb_object_number_init_from_uint16(tb_stream_bread_u16_be(reader->stream)); break; case TB_NUMBER_TYPE_SINT16: number = tb_object_number_init_from_sint16(tb_stream_bread_s16_be(reader->stream)); break; case TB_NUMBER_TYPE_UINT8: number = tb_object_number_init_from_uint8(tb_stream_bread_u8(reader->stream)); break; case TB_NUMBER_TYPE_SINT8: number = tb_object_number_init_from_sint8(tb_stream_bread_s8(reader->stream)); break; #ifdef TB_CONFIG_TYPE_FLOAT case TB_NUMBER_TYPE_FLOAT: { tb_byte_t data[4] = {0}; if (!tb_stream_bread(reader->stream, data, 4)) return tb_null; number = tb_object_number_init_from_float(tb_bits_get_float_be(data)); } break; case TB_NUMBER_TYPE_DOUBLE: { tb_byte_t data[8] = {0}; if (!tb_stream_bread(reader->stream, data, 8)) return tb_null; number = tb_object_number_init_from_double(tb_bits_get_double_bbe(data)); } break; #endif default: tb_assert_and_check_return_val(0, tb_null); break; } // ok? return number; }
static tb_object_ref_t tb_object_json_reader_done(tb_stream_ref_t stream) { // check tb_assert_and_check_return_val(stream, tb_null); // init reader tb_object_json_reader_t reader = {0}; reader.stream = stream; // skip spaces tb_char_t type = '\0'; while (tb_stream_left(stream)) { type = tb_stream_bread_s8(stream); if (!tb_isspace(type)) break; } // empty? tb_check_return_val(tb_stream_left(stream), tb_null); // the func tb_object_json_reader_func_t func = tb_object_json_reader_func(type); tb_assert_and_check_return_val(func, tb_null); // read it return func(&reader, type); }
static tb_object_ref_t tb_object_json_reader_func_array(tb_object_json_reader_t* reader, tb_char_t type) { // check tb_assert_and_check_return_val(reader && reader->stream && type == '[', tb_null); // init array tb_object_ref_t array = tb_object_array_init(TB_OBJECT_JSON_READER_ARRAY_GROW, tb_false); tb_assert_and_check_return_val(array, tb_null); // done tb_char_t ch; tb_bool_t ok = tb_true; while (ok && tb_stream_left(reader->stream)) { // read one character ch = tb_stream_bread_s8(reader->stream); // end? if (ch == ']') break; // no space? skip ',' else if (!tb_isspace(ch) && ch != ',') { // the func tb_object_json_reader_func_t func = tb_object_json_reader_func(ch); tb_assert_and_check_break_state(func, ok, tb_false); // read item tb_object_ref_t item = func(reader, ch); tb_assert_and_check_break_state(item, ok, tb_false); // append item tb_object_array_append(array, item); } } // failed? if (!ok) { // exit it if (array) tb_object_exit(array); array = tb_null; } // ok? return array; }
/* ////////////////////////////////////////////////////////////////////////////////////// * parser implementation */ static tb_char_t const* tb_xml_reader_element_parse(tb_xml_reader_impl_t* reader) { // clear element tb_string_clear(&reader->element); // parse element tb_char_t ch = '\0'; tb_size_t in = 0; while ((ch = tb_stream_bread_s8(reader->rstream))) { // append element if (!in && ch == '<') in = 1; else if (in) { if (ch != '>') tb_string_chrcat(&reader->element, ch); else return tb_string_cstr(&reader->element); } } // failed tb_assertf_abort(0, "invalid element: %s from %s", tb_string_cstr(&reader->element), tb_url_get(tb_stream_url(reader->istream))); return tb_null; }
static tb_object_ref_t tb_object_json_reader_func_dictionary(tb_object_json_reader_t* reader, tb_char_t type) { // check tb_assert_and_check_return_val(reader && reader->stream && type == '{', tb_null); // init key name tb_static_string_t kname; tb_char_t kdata[8192]; if (!tb_static_string_init(&kname, kdata, 8192)) return tb_null; // init dictionary tb_object_ref_t dictionary = tb_object_dictionary_init(0, tb_false); tb_assert_and_check_return_val(dictionary, tb_null); // walk tb_char_t ch; tb_bool_t ok = tb_true; tb_bool_t bkey = tb_false; tb_size_t bstr = 0; while (ok && tb_stream_left(reader->stream)) { // read one character ch = tb_stream_bread_s8(reader->stream); // end? if (ch == '}') break; // no space? skip ',' else if (!tb_isspace(ch) && ch != ',') { // no key? if (!bkey) { // is str? if (ch == '\"' || ch == '\'') bstr = !bstr; // is key end? else if (!bstr && ch == ':') bkey = tb_true; // append key else if (bstr) tb_static_string_chrcat(&kname, ch); } // key ok? read val else { // trace tb_trace_d("key: %s", tb_static_string_cstr(&kname)); // the func tb_object_json_reader_func_t func = tb_object_json_reader_func(ch); tb_assert_and_check_break_state(func, ok, tb_false); // read val tb_object_ref_t val = func(reader, ch); tb_assert_and_check_break_state(val, ok, tb_false); // set key => val tb_object_dictionary_insert(dictionary, tb_static_string_cstr(&kname), val); // reset key bstr = 0; bkey = tb_false; tb_static_string_clear(&kname); } } } // failed? if (!ok) { // exit it if (dictionary) tb_object_exit(dictionary); dictionary = tb_null; } // exit key name tb_static_string_exit(&kname); // ok? return dictionary; }
static tb_object_ref_t tb_object_json_reader_func_string(tb_object_json_reader_t* reader, tb_char_t type) { // check tb_assert_and_check_return_val(reader && reader->stream && (type == '\"' || type == '\''), tb_null); // init data tb_string_t data; if (!tb_string_init(&data)) return tb_null; // walk tb_char_t ch; while (tb_stream_left(reader->stream)) { // read one character ch = tb_stream_bread_s8(reader->stream); // end? if (ch == '\"' || ch == '\'') break; // the escaped character? else if (ch == '\\') { // read one character ch = tb_stream_bread_s8(reader->stream); // unicode? if (ch == 'u') { #ifdef TB_CONFIG_MODULE_HAVE_CHARSET // the unicode string tb_char_t unicode_str[5]; unicode_str[0] = tb_stream_bread_s8(reader->stream); unicode_str[1] = tb_stream_bread_s8(reader->stream); unicode_str[2] = tb_stream_bread_s8(reader->stream); unicode_str[3] = tb_stream_bread_s8(reader->stream); unicode_str[4] = '\0'; // the unicode value tb_uint16_t unicode_val = tb_s16toi32(unicode_str); // the utf8 stream tb_char_t utf8_data[16] = {0}; tb_static_stream_t utf8_stream; tb_static_stream_init(&utf8_stream, (tb_byte_t*)utf8_data, sizeof(utf8_data)); // the unicode stream tb_static_stream_t unicode_stream = {0}; tb_static_stream_init(&unicode_stream, (tb_byte_t*)&unicode_val, 2); // unicode to utf8 tb_long_t utf8_size = tb_charset_conv_bst(TB_CHARSET_TYPE_UCS2 | TB_CHARSET_TYPE_NE, TB_CHARSET_TYPE_UTF8, &unicode_stream, &utf8_stream); if (utf8_size > 0) tb_string_cstrncat(&data, utf8_data, utf8_size); #else // trace tb_trace1_e("unicode type is not supported, please enable charset module config if you want to use it!"); // only append it tb_string_chrcat(&data, ch); #endif } // append escaped character else tb_string_chrcat(&data, ch); } // append character else tb_string_chrcat(&data, ch); } // init string tb_object_ref_t string = tb_object_string_init_from_cstr(tb_string_cstr(&data)); // trace tb_trace_d("string: %s", tb_string_cstr(&data)); // exit data tb_string_exit(&data); // ok? return string; }
tb_size_t tb_xml_reader_next(tb_xml_reader_ref_t reader) { // check tb_xml_reader_impl_t* impl = (tb_xml_reader_impl_t*)reader; tb_assert_and_check_return_val(impl && impl->rstream, TB_XML_READER_EVENT_NONE); // reset event impl->event = TB_XML_READER_EVENT_NONE; // next while (!impl->event) { // peek character tb_char_t* pc = tb_null; if (!tb_stream_need(impl->rstream, (tb_byte_t**)&pc, 1) || !pc) break; // is element? if (*pc == '<') { // parse element: <...> tb_char_t const* element = tb_xml_reader_element_parse(impl); tb_assert_and_check_break(element); // is document begin: <?xml version="..." charset=".." ?> tb_size_t size = tb_string_size(&impl->element); if (size > 4 && !tb_strnicmp(element, "?xml", 4)) { // update event impl->event = TB_XML_READER_EVENT_DOCUMENT; // update version & charset tb_xml_node_ref_t attr = (tb_xml_node_ref_t)tb_xml_reader_attributes(reader); for (; attr; attr = attr->next) { if (!tb_string_cstricmp(&attr->name, "version")) tb_string_strcpy(&impl->version, &attr->data); if (!tb_string_cstricmp(&attr->name, "encoding")) tb_string_strcpy(&impl->charset, &attr->data); } // transform stream => utf-8 if (tb_string_cstricmp(&impl->charset, "utf-8") && tb_string_cstricmp(&impl->charset, "utf8")) { // charset tb_size_t charset = TB_CHARSET_TYPE_UTF8; if (!tb_string_cstricmp(&impl->charset, "gb2312") || !tb_string_cstricmp(&impl->charset, "gbk")) charset = TB_CHARSET_TYPE_GB2312; else tb_trace_e("the charset: %s is not supported", tb_string_cstr(&impl->charset)); // init transform stream if (charset != TB_CHARSET_TYPE_UTF8) { #ifdef TB_CONFIG_MODULE_HAVE_CHARSET // init the filter stream if (!impl->fstream) impl->fstream = tb_stream_init_filter_from_charset(impl->istream, charset, TB_CHARSET_TYPE_UTF8); else { // ctrl stream if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_SET_STREAM, impl->istream)) break; // the filter tb_stream_filter_ref_t filter = tb_null; if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_GET_FILTER, &filter)) break; tb_assert_and_check_break(filter); // ctrl filter if (!tb_stream_filter_ctrl(filter, TB_STREAM_FILTER_CTRL_CHARSET_SET_FTYPE, charset)) break; } // open the filter stream if (impl->fstream && tb_stream_open(impl->fstream)) impl->rstream = impl->fstream; tb_string_cstrcpy(&impl->charset, "utf-8"); #else // trace tb_trace_e("unicode type is not supported, please enable charset module config if you want to use it!"); #endif } } } // is document type: <!DOCTYPE ... > else if (size > 8 && !tb_strnicmp(element, "!DOCTYPE", 8)) { // update event impl->event = TB_XML_READER_EVENT_DOCUMENT_TYPE; } // is element end: </name> else if (size > 1 && element[0] == '/') { // check tb_check_break(impl->level); // update event impl->event = TB_XML_READER_EVENT_ELEMENT_END; // leave impl->level--; } // is comment: <!-- text --> else if (size >= 3 && !tb_strncmp(element, "!--", 3)) { // no comment end? if (element[size - 2] != '-' || element[size - 1] != '-') { // patch '>' tb_string_chrcat(&impl->element, '>'); // seek to comment end tb_char_t ch = '\0'; tb_int_t n = 0; while ((ch = tb_stream_bread_s8(impl->rstream))) { // --> if (n == 2 && ch == '>') break; else { // append it tb_string_chrcat(&impl->element, ch); if (ch == '-') n++; else n = 0; } } // update event if (ch != '\0') impl->event = TB_XML_READER_EVENT_COMMENT; } else impl->event = TB_XML_READER_EVENT_COMMENT; } // is cdata: <![CDATA[ text ]]> else if (size >= 8 && !tb_strnicmp(element, "![CDATA[", 8)) { if (element[size - 2] != ']' || element[size - 1] != ']') { // patch '>' tb_string_chrcat(&impl->element, '>'); // seek to cdata end tb_char_t ch = '\0'; tb_int_t n = 0; while ((ch = tb_stream_bread_s8(impl->rstream))) { // ]]> if (n == 2 && ch == '>') break; else { // append it tb_string_chrcat(&impl->element, ch); if (ch == ']') n++; else n = 0; } } // update event if (ch != '\0') impl->event = TB_XML_READER_EVENT_CDATA; } else impl->event = TB_XML_READER_EVENT_CDATA; } // is empty element: <name/> else if (size > 1 && element[size - 1] == '/') { // update event impl->event = TB_XML_READER_EVENT_ELEMENT_EMPTY; } // is element begin: <name> else { // update event impl->event = TB_XML_READER_EVENT_ELEMENT_BEG; // enter impl->level++; } // trace // tb_trace_d("<%s>", element); } // is text: <> text </> else if (*pc) { // parse text: <> ... <> tb_char_t const* text = tb_xml_reader_text_parse(impl); if (text && tb_string_cstrcmp(&impl->text, "\r\n") && tb_string_cstrcmp(&impl->text, "\n")) impl->event = TB_XML_READER_EVENT_TEXT; // trace // tb_trace_d("%s", text); } else { // skip the invalid character if (!tb_stream_skip(impl->rstream, 1)) break; } } // ok? return impl->event; }