static tb_object_ref_t tb_object_xplist_reader_done(tb_stream_ref_t stream) { // init reader tb_object_xplist_reader_t reader = {0}; reader.reader = tb_xml_reader_init(); tb_assert_and_check_return_val(reader.reader, tb_null); // open reader tb_object_ref_t object = tb_null; if (tb_xml_reader_open(reader.reader, stream, tb_false)) { // done tb_bool_t leave = tb_false; tb_size_t event = TB_XML_READER_EVENT_NONE; while (!leave && !object && (event = tb_xml_reader_next(reader.reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_EMPTY: case TB_XML_READER_EVENT_ELEMENT_BEG: { // name tb_char_t const* name = tb_xml_reader_element(reader.reader); tb_assert_and_check_break_state(name, leave, tb_true); // <plist/> ? if (tb_stricmp(name, "plist")) { // func tb_object_xplist_reader_func_t func = tb_object_xplist_reader_func(name); tb_assert_and_check_break_state(func, leave, tb_true); // read object = func(&reader, event); } } break; default: break; } } } // exit reader tb_xml_reader_exit(reader.reader); // ok? return object; }
static tb_void_t tb_demo_spider_parser_task_done(tb_thread_pool_worker_ref_t worker, tb_cpointer_t priv) { // check tb_demo_spider_task_t* task = (tb_demo_spider_task_t*)priv; tb_assert_and_check_return(worker && task && task->spider); // init parser tb_demo_spider_parser_t* parser = tb_demo_spider_parser_init(worker); tb_assert_and_check_return(parser && parser->stream && parser->reader && parser->cache); // open stream if (tb_demo_spider_parser_open_html(parser->stream, task->ourl)) { // open reader if (tb_xml_reader_open(parser->reader, parser->stream, tb_false)) { // trace tb_trace_d("parser: open: %s", task->ourl); // init url tb_url_set(&parser->iurl, task->iurl); // parse url while ( TB_STATE_OK == tb_atomic_get(&task->spider->state) && tb_demo_spider_parser_get_url(parser->reader, &parser->iurl)) { // trace tb_trace_d("parser: done: %s", tb_url_get(&parser->iurl)); // done task tb_bool_t full = tb_false; if (!tb_demo_spider_task_done(task->spider, tb_url_get(&parser->iurl), &full)) { // full? tb_assert_and_check_break(full); // cache url if (!tb_circle_queue_full(parser->cache)) tb_circle_queue_put(parser->cache, tb_url_get(&parser->iurl)); // trace tb_trace_d("parser: cache: save: %s, size: %lu", tb_url_get(&parser->iurl), tb_circle_queue_size(parser->cache)); } } // clos reader tb_xml_reader_clos(parser->reader); } // clos stream tb_stream_clos(parser->stream); } // done task from the cache while (!tb_circle_queue_null(parser->cache)) { // the url tb_char_t const* url = (tb_char_t const*)tb_circle_queue_get(parser->cache); tb_assert_and_check_break(url); // done task if (!tb_demo_spider_task_done(task->spider, url, tb_null)) break; // trace tb_trace_d("parser: cache: load: %s, size: %lu", url, tb_circle_queue_size(parser->cache)); // pop it tb_circle_queue_pop(parser->cache); } }