static tb_object_ref_t tb_object_xplist_reader_func_string(tb_object_xplist_reader_t* reader, tb_size_t event) { // check tb_assert_and_check_return_val(reader && reader->reader && event, tb_null); // empty? if (event == TB_XML_READER_EVENT_ELEMENT_EMPTY) return tb_object_string_init_from_cstr(tb_null); // done tb_bool_t leave = tb_false; tb_object_ref_t string = tb_null; while (!leave && (event = tb_xml_reader_next(reader->reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_END: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, leave, tb_true); // is end? if (!tb_stricmp(name, "string")) { // empty? if (!string) string = tb_object_string_init_from_cstr(tb_null); // leave it leave = tb_true; } } break; case TB_XML_READER_EVENT_TEXT: { // text tb_char_t const* text = tb_xml_reader_text(reader->reader); tb_assert_and_check_break_state(text, leave, tb_true); tb_trace_d("string: %s", text); // string string = tb_object_string_init_from_cstr(text); tb_assert_and_check_break_state(string, leave, tb_true); } break; default: break; } } // ok? return string; }
static tb_object_ref_t tb_object_xplist_reader_done(tb_stream_ref_t stream) { // init reader tb_object_xplist_reader_t reader = {0}; reader.reader = tb_xml_reader_init(); tb_assert_and_check_return_val(reader.reader, tb_null); // open reader tb_object_ref_t object = tb_null; if (tb_xml_reader_open(reader.reader, stream, tb_false)) { // done tb_bool_t leave = tb_false; tb_size_t event = TB_XML_READER_EVENT_NONE; while (!leave && !object && (event = tb_xml_reader_next(reader.reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_EMPTY: case TB_XML_READER_EVENT_ELEMENT_BEG: { // name tb_char_t const* name = tb_xml_reader_element(reader.reader); tb_assert_and_check_break_state(name, leave, tb_true); // <plist/> ? if (tb_stricmp(name, "plist")) { // func tb_object_xplist_reader_func_t func = tb_object_xplist_reader_func(name); tb_assert_and_check_break_state(func, leave, tb_true); // read object = func(&reader, event); } } break; default: break; } } } // exit reader tb_xml_reader_exit(reader.reader); // ok? return object; }
/* ////////////////////////////////////////////////////////////////////////////////////// * implementation */ static tb_object_ref_t tb_object_xplist_reader_func_date(tb_object_xplist_reader_t* reader, tb_size_t event) { // check tb_assert_and_check_return_val(reader && reader->reader && event, tb_null); // empty? if (event == TB_XML_READER_EVENT_ELEMENT_EMPTY) return tb_object_date_init_from_time(0); // done tb_bool_t leave = tb_false; tb_object_ref_t date = tb_null; while (!leave && (event = tb_xml_reader_next(reader->reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_END: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, leave, tb_true); // is end? if (!tb_stricmp(name, "date")) { // empty? if (!date) date = tb_object_date_init_from_time(0); // leave it leave = tb_true; } } break; case TB_XML_READER_EVENT_TEXT: { // text tb_char_t const* text = tb_xml_reader_text(reader->reader); tb_assert_and_check_break_state(text, leave, tb_true); tb_trace_d("date: %s", text); // done date: %04ld-%02ld-%02ld %02ld:%02ld:%02ld tb_tm_t tm = {0}; tb_char_t const* p = text; tb_char_t const* e = text + tb_strlen(text); // init year while (p < e && *p && !tb_isdigit(*p)) p++; tb_assert_and_check_break_state(p < e, leave, tb_true); tm.year = tb_atoi(p); // init month while (p < e && *p && tb_isdigit(*p)) p++; while (p < e && *p && !tb_isdigit(*p)) p++; tb_assert_and_check_break_state(p < e, leave, tb_true); tm.month = tb_atoi(p); // init day while (p < e && *p && tb_isdigit(*p)) p++; while (p < e && *p && !tb_isdigit(*p)) p++; tb_assert_and_check_break_state(p < e, leave, tb_true); tm.mday = tb_atoi(p); // init hour while (p < e && *p && tb_isdigit(*p)) p++; while (p < e && *p && !tb_isdigit(*p)) p++; tb_assert_and_check_break_state(p < e, leave, tb_true); tm.hour = tb_atoi(p); // init minute while (p < e && *p && tb_isdigit(*p)) p++; while (p < e && *p && !tb_isdigit(*p)) p++; tb_assert_and_check_break_state(p < e, leave, tb_true); tm.minute = tb_atoi(p); // init second while (p < e && *p && tb_isdigit(*p)) p++; while (p < e && *p && !tb_isdigit(*p)) p++; tb_assert_and_check_break_state(p < e, leave, tb_true); tm.second = tb_atoi(p); // time tb_time_t time = tb_mktime(&tm); tb_assert_and_check_break_state(time >= 0, leave, tb_true); // date date = tb_object_date_init_from_time(time); } break; default: break; } } // ok? return date; }
static tb_object_ref_t tb_object_xplist_reader_func_dictionary(tb_object_xplist_reader_t* reader, tb_size_t event) { // check tb_assert_and_check_return_val(reader && reader->reader && event, tb_null); // empty? if (event == TB_XML_READER_EVENT_ELEMENT_EMPTY) return tb_object_dictionary_init(TB_OBJECT_DICTIONARY_SIZE_MICRO, tb_false); // init key name tb_static_string_t kname; tb_char_t kdata[8192]; if (!tb_static_string_init(&kname, kdata, 8192)) return tb_null; // init dictionary tb_object_ref_t dictionary = tb_object_dictionary_init(0, tb_false); tb_assert_and_check_return_val(dictionary, tb_null); // done tb_long_t ok = 0; tb_bool_t key = tb_false; while (!ok && (event = tb_xml_reader_next(reader->reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_BEG: case TB_XML_READER_EVENT_ELEMENT_EMPTY: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, ok, -1); tb_trace_d("%s", name); // is key if (!tb_stricmp(name, "key")) key = tb_true; else if (!key) { // func tb_object_xplist_reader_func_t func = tb_object_xplist_reader_func(name); tb_assert_and_check_break_state(func, ok, -1); // read tb_object_ref_t object = func(reader, event); tb_trace_d("%s => %p", tb_static_string_cstr(&kname), object); tb_assert_and_check_break_state(object, ok, -1); // set key & value if (tb_static_string_size(&kname) && dictionary) tb_object_dictionary_insert(dictionary, tb_static_string_cstr(&kname), object); // clear key name tb_static_string_clear(&kname); } } break; case TB_XML_READER_EVENT_ELEMENT_END: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, ok, -1); // is end? if (!tb_stricmp(name, "dict")) ok = 1; else if (!tb_stricmp(name, "key")) key = tb_false; } break; case TB_XML_READER_EVENT_TEXT: { if (key) { // text tb_char_t const* text = tb_xml_reader_text(reader->reader); tb_assert_and_check_break_state(text, ok, -1); // writ key name tb_static_string_cstrcpy(&kname, text); } } break; default: break; } } // failed if (ok < 0) { // exit it if (dictionary) tb_object_exit(dictionary); dictionary = tb_null; } // exit key name tb_static_string_exit(&kname); // ok? return dictionary; }
static tb_object_ref_t tb_object_xplist_reader_func_number(tb_object_xplist_reader_t* reader, tb_size_t event) { // check tb_assert_and_check_return_val(reader && reader->reader && event, tb_null); // empty? if (event == TB_XML_READER_EVENT_ELEMENT_EMPTY) return tb_object_number_init_from_uint32(0); // done tb_bool_t leave = tb_false; tb_object_ref_t number = tb_null; while (!leave && (event = tb_xml_reader_next(reader->reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_END: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, leave, tb_true); // is end? if (!tb_stricmp(name, "integer") || !tb_stricmp(name, "real")) leave = tb_true; } break; case TB_XML_READER_EVENT_TEXT: { // text tb_char_t const* text = tb_xml_reader_text(reader->reader); tb_assert_and_check_break_state(text, leave, tb_true); tb_trace_d("number: %s", text); // has sign? is float? tb_size_t s = 0; tb_size_t f = 0; tb_char_t const* p = text; for (; *p; p++) { if (!s && *p == '-') s = 1; if (!f && *p == '.') f = 1; if (s && f) break; } // number #ifdef TB_CONFIG_TYPE_HAVE_FLOAT if (f) number = tb_object_number_init_from_double(tb_atof(text)); #else if (f) tb_trace_noimpl(); #endif else number = s? tb_object_number_init_from_sint64(tb_stoi64(text)) : tb_object_number_init_from_uint64(tb_stou64(text)); tb_assert_and_check_break_state(number, leave, tb_true); } break; default: break; } } // ok? return number; }
static tb_object_ref_t tb_object_xplist_reader_func_array(tb_object_xplist_reader_t* reader, tb_size_t event) { // check tb_assert_and_check_return_val(reader && reader->reader && event, tb_null); // empty? if (event == TB_XML_READER_EVENT_ELEMENT_EMPTY) return tb_object_array_init(TB_OBJECT_XPLIST_READER_ARRAY_GROW, tb_false); // init array tb_object_ref_t array = tb_object_array_init(TB_OBJECT_XPLIST_READER_ARRAY_GROW, tb_false); tb_assert_and_check_return_val(array, tb_null); // done tb_long_t ok = 0; while (!ok && (event = tb_xml_reader_next(reader->reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_BEG: case TB_XML_READER_EVENT_ELEMENT_EMPTY: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, ok, -1); tb_trace_d("item: %s", name); // func tb_object_xplist_reader_func_t func = tb_object_xplist_reader_func(name); tb_assert_and_check_break_state(func, ok, -1); // read tb_object_ref_t object = func(reader, event); // append object if (object) tb_object_array_append(array, object); } break; case TB_XML_READER_EVENT_ELEMENT_END: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, ok, -1); // is end? if (!tb_stricmp(name, "array")) ok = 1; } break; default: break; } } // failed? if (ok < 0) { // exit it if (array) tb_object_exit(array); array = tb_null; } // ok? return array; }
static tb_object_ref_t tb_object_xplist_reader_func_data(tb_object_xplist_reader_t* reader, tb_size_t event) { // check tb_assert_and_check_return_val(reader && reader->reader && event, tb_null); // empty? if (event == TB_XML_READER_EVENT_ELEMENT_EMPTY) return tb_object_data_init_from_data(tb_null, 0); // done tb_bool_t leave = tb_false; tb_char_t* base64 = tb_null; tb_object_ref_t data = tb_null; while (!leave && (event = tb_xml_reader_next(reader->reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_END: { // name tb_char_t const* name = tb_xml_reader_element(reader->reader); tb_assert_and_check_break_state(name, leave, tb_true); // is end? if (!tb_stricmp(name, "data")) { // empty? if (!data) data = tb_object_data_init_from_data(tb_null, 0); // leave it leave = tb_true; } } break; case TB_XML_READER_EVENT_TEXT: { // text tb_char_t const* text = tb_xml_reader_text(reader->reader); tb_assert_and_check_break_state(text, leave, tb_true); tb_trace_d("data: %s", text); // base64 base64 = tb_strdup(text); tb_char_t* p = base64; tb_char_t* q = p; for (; *p; p++) if (!tb_isspace(*p)) *q++ = *p; *q = '\0'; // decode base64 data tb_char_t const* ib = base64; tb_size_t in = tb_strlen(base64); if (in) { tb_size_t on = in; tb_byte_t* ob = tb_malloc0_bytes(on); tb_assert_and_check_break_state(ob && on, leave, tb_true); on = tb_base64_decode(ib, in, ob, on); tb_trace_d("base64: %u => %u", in, on); // init data data = tb_object_data_init_from_data(ob, on); tb_free(ob); } else data = tb_object_data_init_from_data(tb_null, 0); tb_assert_and_check_break_state(data, leave, tb_true); } break; default: break; } } // free if (base64) tb_free(base64); // ok? return data; }
static tb_size_t tb_demo_spider_parser_get_url(tb_xml_reader_ref_t reader, tb_url_ref_t url) { // check tb_assert_and_check_return_val(reader && url, tb_false); // done tb_size_t ok = 0; tb_size_t event = TB_XML_READER_EVENT_NONE; while (!ok && (event = tb_xml_reader_next(reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_EMPTY: case TB_XML_READER_EVENT_ELEMENT_BEG: { // the element name tb_char_t const* name = tb_xml_reader_element(reader); tb_check_break(name); // <a href="" />? // <link href="" /> // <img src="" />? // <script src="" />? // <source src="" />? // <frame src="" />? if ( !tb_stricmp(name, "a") || !tb_stricmp(name, "link") || !tb_stricmp(name, "img") || !tb_stricmp(name, "frame") || !tb_stricmp(name, "source")) { // walk attributes tb_xml_node_ref_t attr = (tb_xml_node_ref_t)tb_xml_reader_attributes(reader); for (; attr; attr = attr->next) { // href or src? if ( tb_string_size(&attr->data) && ( !tb_string_cstricmp(&attr->name, "href") || !tb_string_cstricmp(&attr->name, "src"))) { // the url protocol tb_size_t protocol = tb_url_protocol_probe(tb_string_cstr(&attr->data)); // http? if(protocol == TB_URL_PROTOCOL_HTTP) { // save url ok = tb_url_set(url, tb_string_cstr(&attr->data)); } // file? else if (protocol == TB_URL_PROTOCOL_FILE) { // save path tb_url_path_set(url, tb_string_cstr(&attr->data)); // ok ok = tb_true; } } } } } break; default: break; } } // ok? return ok; }
tb_xml_node_ref_t tb_xml_reader_load(tb_xml_reader_ref_t reader) { // check tb_assert_and_check_return_val(reader, tb_null); // done tb_bool_t ok = tb_true; tb_xml_node_ref_t node = tb_null; tb_size_t event = TB_XML_READER_EVENT_NONE; while (ok && (event = tb_xml_reader_next(reader))) { // init document node if (!node) { node = tb_xml_node_init_document(tb_xml_reader_version(reader), tb_xml_reader_charset(reader)); tb_assert_and_check_break_state(node && !node->parent, ok, tb_false); } switch (event) { case TB_XML_READER_EVENT_DOCUMENT: break; case TB_XML_READER_EVENT_DOCUMENT_TYPE: { // init tb_xml_node_ref_t doctype = tb_xml_node_init_document_type(tb_xml_reader_doctype(reader)); tb_assert_and_check_break_state(doctype, ok, tb_false); // append tb_xml_node_append_ctail(node, doctype); tb_assert_and_check_break_state(doctype->parent, ok, tb_false); } break; case TB_XML_READER_EVENT_ELEMENT_EMPTY: { // init tb_xml_node_ref_t element = tb_xml_node_init_element(tb_xml_reader_element(reader)); tb_assert_and_check_break_state(element, ok, tb_false); // attributes tb_xml_node_ref_t attr = tb_xml_reader_attributes(reader); for (; attr; attr = attr->next) tb_xml_node_append_atail(element, tb_xml_node_init_attribute(tb_string_cstr(&attr->name), tb_string_cstr(&attr->data))); // append tb_xml_node_append_ctail(node, element); tb_assert_and_check_break_state(element->parent, ok, tb_false); } break; case TB_XML_READER_EVENT_ELEMENT_BEG: { // init tb_xml_node_ref_t element = tb_xml_node_init_element(tb_xml_reader_element(reader)); tb_assert_and_check_break_state(element, ok, tb_false); // attributes tb_xml_node_ref_t attr = tb_xml_reader_attributes(reader); for (; attr; attr = attr->next) tb_xml_node_append_atail(element, tb_xml_node_init_attribute(tb_string_cstr(&attr->name), tb_string_cstr(&attr->data))); // append tb_xml_node_append_ctail(node, element); tb_assert_and_check_break_state(element->parent, ok, tb_false); // enter node = element; } break; case TB_XML_READER_EVENT_ELEMENT_END: { // check tb_assert_and_check_break_state(node, ok, tb_false); // the parent node node = node->parent; } break; case TB_XML_READER_EVENT_TEXT: { // init tb_xml_node_ref_t text = tb_xml_node_init_text(tb_xml_reader_text(reader)); tb_assert_and_check_break_state(text, ok, tb_false); // append tb_xml_node_append_ctail(node, text); tb_assert_and_check_break_state(text->parent, ok, tb_false); } break; case TB_XML_READER_EVENT_CDATA: { // init tb_xml_node_ref_t cdata = tb_xml_node_init_cdata(tb_xml_reader_cdata(reader)); tb_assert_and_check_break_state(cdata, ok, tb_false); // append tb_xml_node_append_ctail(node, cdata); tb_assert_and_check_break_state(cdata->parent, ok, tb_false); } break; case TB_XML_READER_EVENT_COMMENT: { // init tb_xml_node_ref_t comment = tb_xml_node_init_comment(tb_xml_reader_comment(reader)); tb_assert_and_check_break_state(comment, ok, tb_false); // append tb_xml_node_append_ctail(node, comment); tb_assert_and_check_break_state(comment->parent, ok, tb_false); } break; default: break; } } // failed? if (!ok) { // exit it if (node) tb_xml_node_exit(node); node = tb_null; } // ok return node; }
tb_bool_t tb_xml_reader_goto(tb_xml_reader_ref_t reader, tb_char_t const* path) { // check tb_xml_reader_impl_t* impl = (tb_xml_reader_impl_t*)reader; tb_assert_and_check_return_val(impl && impl->rstream && path, tb_false); tb_trace_d("goto: %s", path); // init level impl->level = 0; // seek to the stream head if (!tb_stream_seek(impl->rstream, 0)) return tb_false; // init tb_static_string_t s; tb_char_t data[8192]; if (!tb_static_string_init(&s, data, 8192)) return tb_false; // save the current offset tb_hize_t save = tb_stream_offset(impl->rstream); // done tb_bool_t ok = tb_false; tb_bool_t leave = tb_false; tb_size_t event = TB_XML_READER_EVENT_NONE; while (!leave && !ok && (event = tb_xml_reader_next(reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_EMPTY: { // name tb_char_t const* name = tb_xml_reader_element(reader); tb_assert_and_check_break_state(name, leave, tb_true); // append tb_size_t n = tb_static_string_size(&s); tb_static_string_chrcat(&s, '/'); tb_static_string_cstrcat(&s, name); // ok? if (!tb_static_string_cstricmp(&s, path)) ok = tb_true; tb_trace_d("path: %s", tb_static_string_cstr(&s)); // remove tb_static_string_strip(&s, n); // restore if (ok) if (!(ok = tb_stream_seek(impl->rstream, save))) leave = tb_true; } break; case TB_XML_READER_EVENT_ELEMENT_BEG: { // name tb_char_t const* name = tb_xml_reader_element(reader); tb_assert_and_check_break_state(name, leave, tb_true); // append tb_static_string_chrcat(&s, '/'); tb_static_string_cstrcat(&s, name); // ok? if (!tb_static_string_cstricmp(&s, path)) ok = tb_true; tb_trace_d("path: %s", tb_static_string_cstr(&s)); // restore if (ok) if (!(ok = tb_stream_seek(impl->rstream, save))) leave = tb_true; } break; case TB_XML_READER_EVENT_ELEMENT_END: { // remove tb_long_t p = tb_static_string_strrchr(&s, 0, '/'); if (p >= 0) tb_static_string_strip(&s, p); // ok? if (!tb_static_string_cstricmp(&s, path)) ok = tb_true; tb_trace_d("path: %s", tb_static_string_cstr(&s)); // restore if (ok) if (!(ok = tb_stream_seek(impl->rstream, save))) leave = tb_true; } break; default: break; } // save save = tb_stream_offset(impl->rstream); } // exit string tb_static_string_exit(&s); // clear level impl->level = 0; // failed? restore to the stream head if (!ok) tb_stream_seek(impl->rstream, 0); // ok? return ok; }