/* ////////////////////////////////////////////////////////////////////////////////////// * implementation */ static tb_object_ref_t tb_object_json_reader_func_null(tb_object_json_reader_t* reader, tb_char_t type) { // check tb_assert_and_check_return_val(reader && reader->stream, tb_null); // init data tb_static_string_t data; tb_char_t buff[256]; if (!tb_static_string_init(&data, buff, 256)) return tb_null; // done tb_object_ref_t null = tb_null; do { // append character tb_static_string_chrcat(&data, type); // walk tb_bool_t failed = tb_false; while (!failed && tb_stream_left(reader->stream)) { // need one character tb_byte_t* p = tb_null; if (!tb_stream_need(reader->stream, &p, 1) && p) { failed = tb_true; break; } // the character tb_char_t ch = *p; // append character if (tb_isalpha(ch)) tb_static_string_chrcat(&data, ch); else break; // skip it tb_stream_skip(reader->stream, 1); } // failed? tb_check_break(!failed); // check tb_assert_and_check_break(tb_static_string_size(&data)); // trace tb_trace_d("null: %s", tb_static_string_cstr(&data)); // null? if (!tb_stricmp(tb_static_string_cstr(&data), "null")) null = tb_object_null_init(); } while (0); // exit data tb_static_string_exit(&data); // ok? return null; }
static tb_char_t const* tb_xml_reader_text_parse(tb_xml_reader_impl_t* reader) { // clear text tb_string_clear(&reader->text); // parse text tb_char_t* pc = tb_null; while (tb_stream_need(reader->rstream, (tb_byte_t**)&pc, 1) && pc) { // is end? </ ..> if (pc[0] == '<') return tb_string_cstr(&reader->text); else { tb_string_chrcat(&reader->text, *pc); if (!tb_stream_skip(reader->rstream, 1)) return tb_null; } } return tb_null; }
static tb_object_ref_t tb_object_json_reader_func_number(tb_object_json_reader_t* reader, tb_char_t type) { // check tb_assert_and_check_return_val(reader && reader->stream, tb_null); // init data tb_static_string_t data; tb_char_t buff[256]; if (!tb_static_string_init(&data, buff, 256)) return tb_null; // done tb_object_ref_t number = tb_null; do { // append character tb_static_string_chrcat(&data, type); // walk tb_bool_t bs = (type == '-')? tb_true : tb_false; tb_bool_t bf = (type == '.')? tb_true : tb_false; tb_bool_t failed = tb_false; while (!failed && tb_stream_left(reader->stream)) { // need one character tb_byte_t* p = tb_null; if (!tb_stream_need(reader->stream, &p, 1) && p) { failed = tb_true; break; } // the character tb_char_t ch = *p; // is float? if (!bf && ch == '.') bf = tb_true; else if (bf && ch == '.') { failed = tb_true; break; } // append character if (tb_isdigit10(ch) || ch == '.' || ch == 'e' || ch == 'E' || ch == '-' || ch == '+') tb_static_string_chrcat(&data, ch); else break; // skip it tb_stream_skip(reader->stream, 1); } // failed? tb_check_break(!failed); // check tb_assert_and_check_break(tb_static_string_size(&data)); // trace tb_trace_d("number: %s", tb_static_string_cstr(&data)); // init number #ifdef TB_CONFIG_TYPE_FLOAT if (bf) number = tb_object_number_init_from_float(tb_stof(tb_static_string_cstr(&data))); #else if (bf) tb_trace_noimpl(); #endif else if (bs) { tb_sint64_t value = tb_stoi64(tb_static_string_cstr(&data)); tb_size_t bytes = tb_object_need_bytes(-value); switch (bytes) { case 1: number = tb_object_number_init_from_sint8((tb_sint8_t)value); break; case 2: number = tb_object_number_init_from_sint16((tb_sint16_t)value); break; case 4: number = tb_object_number_init_from_sint32((tb_sint32_t)value); break; case 8: number = tb_object_number_init_from_sint64((tb_sint64_t)value); break; default: break; } } else { tb_uint64_t value = tb_stou64(tb_static_string_cstr(&data)); tb_size_t bytes = tb_object_need_bytes(value); switch (bytes) { case 1: number = tb_object_number_init_from_uint8((tb_uint8_t)value); break; case 2: number = tb_object_number_init_from_uint16((tb_uint16_t)value); break; case 4: number = tb_object_number_init_from_uint32((tb_uint32_t)value); break; case 8: number = tb_object_number_init_from_uint64((tb_uint64_t)value); break; default: break; } } } while (0); // exit data tb_static_string_exit(&data); // ok? return number; }
tb_size_t tb_xml_reader_next(tb_xml_reader_ref_t reader) { // check tb_xml_reader_impl_t* impl = (tb_xml_reader_impl_t*)reader; tb_assert_and_check_return_val(impl && impl->rstream, TB_XML_READER_EVENT_NONE); // reset event impl->event = TB_XML_READER_EVENT_NONE; // next while (!impl->event) { // peek character tb_char_t* pc = tb_null; if (!tb_stream_need(impl->rstream, (tb_byte_t**)&pc, 1) || !pc) break; // is element? if (*pc == '<') { // parse element: <...> tb_char_t const* element = tb_xml_reader_element_parse(impl); tb_assert_and_check_break(element); // is document begin: <?xml version="..." charset=".." ?> tb_size_t size = tb_string_size(&impl->element); if (size > 4 && !tb_strnicmp(element, "?xml", 4)) { // update event impl->event = TB_XML_READER_EVENT_DOCUMENT; // update version & charset tb_xml_node_ref_t attr = (tb_xml_node_ref_t)tb_xml_reader_attributes(reader); for (; attr; attr = attr->next) { if (!tb_string_cstricmp(&attr->name, "version")) tb_string_strcpy(&impl->version, &attr->data); if (!tb_string_cstricmp(&attr->name, "encoding")) tb_string_strcpy(&impl->charset, &attr->data); } // transform stream => utf-8 if (tb_string_cstricmp(&impl->charset, "utf-8") && tb_string_cstricmp(&impl->charset, "utf8")) { // charset tb_size_t charset = TB_CHARSET_TYPE_UTF8; if (!tb_string_cstricmp(&impl->charset, "gb2312") || !tb_string_cstricmp(&impl->charset, "gbk")) charset = TB_CHARSET_TYPE_GB2312; else tb_trace_e("the charset: %s is not supported", tb_string_cstr(&impl->charset)); // init transform stream if (charset != TB_CHARSET_TYPE_UTF8) { #ifdef TB_CONFIG_MODULE_HAVE_CHARSET // init the filter stream if (!impl->fstream) impl->fstream = tb_stream_init_filter_from_charset(impl->istream, charset, TB_CHARSET_TYPE_UTF8); else { // ctrl stream if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_SET_STREAM, impl->istream)) break; // the filter tb_stream_filter_ref_t filter = tb_null; if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_GET_FILTER, &filter)) break; tb_assert_and_check_break(filter); // ctrl filter if (!tb_stream_filter_ctrl(filter, TB_STREAM_FILTER_CTRL_CHARSET_SET_FTYPE, charset)) break; } // open the filter stream if (impl->fstream && tb_stream_open(impl->fstream)) impl->rstream = impl->fstream; tb_string_cstrcpy(&impl->charset, "utf-8"); #else // trace tb_trace_e("unicode type is not supported, please enable charset module config if you want to use it!"); #endif } } } // is document type: <!DOCTYPE ... > else if (size > 8 && !tb_strnicmp(element, "!DOCTYPE", 8)) { // update event impl->event = TB_XML_READER_EVENT_DOCUMENT_TYPE; } // is element end: </name> else if (size > 1 && element[0] == '/') { // check tb_check_break(impl->level); // update event impl->event = TB_XML_READER_EVENT_ELEMENT_END; // leave impl->level--; } // is comment: <!-- text --> else if (size >= 3 && !tb_strncmp(element, "!--", 3)) { // no comment end? if (element[size - 2] != '-' || element[size - 1] != '-') { // patch '>' tb_string_chrcat(&impl->element, '>'); // seek to comment end tb_char_t ch = '\0'; tb_int_t n = 0; while ((ch = tb_stream_bread_s8(impl->rstream))) { // --> if (n == 2 && ch == '>') break; else { // append it tb_string_chrcat(&impl->element, ch); if (ch == '-') n++; else n = 0; } } // update event if (ch != '\0') impl->event = TB_XML_READER_EVENT_COMMENT; } else impl->event = TB_XML_READER_EVENT_COMMENT; } // is cdata: <![CDATA[ text ]]> else if (size >= 8 && !tb_strnicmp(element, "![CDATA[", 8)) { if (element[size - 2] != ']' || element[size - 1] != ']') { // patch '>' tb_string_chrcat(&impl->element, '>'); // seek to cdata end tb_char_t ch = '\0'; tb_int_t n = 0; while ((ch = tb_stream_bread_s8(impl->rstream))) { // ]]> if (n == 2 && ch == '>') break; else { // append it tb_string_chrcat(&impl->element, ch); if (ch == ']') n++; else n = 0; } } // update event if (ch != '\0') impl->event = TB_XML_READER_EVENT_CDATA; } else impl->event = TB_XML_READER_EVENT_CDATA; } // is empty element: <name/> else if (size > 1 && element[size - 1] == '/') { // update event impl->event = TB_XML_READER_EVENT_ELEMENT_EMPTY; } // is element begin: <name> else { // update event impl->event = TB_XML_READER_EVENT_ELEMENT_BEG; // enter impl->level++; } // trace // tb_trace_d("<%s>", element); } // is text: <> text </> else if (*pc) { // parse text: <> ... <> tb_char_t const* text = tb_xml_reader_text_parse(impl); if (text && tb_string_cstrcmp(&impl->text, "\r\n") && tb_string_cstrcmp(&impl->text, "\n")) impl->event = TB_XML_READER_EVENT_TEXT; // trace // tb_trace_d("%s", text); } else { // skip the invalid character if (!tb_stream_skip(impl->rstream, 1)) break; } } // ok? return impl->event; }