tb_bool_t tb_async_transfer_init_istream_from_url(tb_async_transfer_ref_t transfer, tb_char_t const* url) { // check tb_async_transfer_impl_t* impl = (tb_async_transfer_impl_t*)transfer; tb_assert_and_check_return_val(impl && impl->aicp && url, tb_false); // muse be closed tb_assert_and_check_return_val(TB_STATE_CLOSED == tb_atomic_get(&impl->state), tb_false); // check stream type if (impl->istream) { // probe protocol tb_size_t protocol = tb_url_protocol_probe(url); tb_assert_static((tb_size_t)TB_URL_PROTOCOL_FILE == (tb_size_t)TB_STREAM_TYPE_FILE); tb_assert_static((tb_size_t)TB_URL_PROTOCOL_HTTP == (tb_size_t)TB_STREAM_TYPE_HTTP); tb_assert_static((tb_size_t)TB_URL_PROTOCOL_SOCK == (tb_size_t)TB_STREAM_TYPE_SOCK); tb_assert_static((tb_size_t)TB_URL_PROTOCOL_DATA == (tb_size_t)TB_STREAM_TYPE_DATA); // protocol => type tb_size_t type = protocol; if (!type || type > TB_STREAM_TYPE_DATA) { tb_trace_e("unknown stream for url: %s", url); return tb_false; } // exit the previous stream first if be different stream type if (tb_async_stream_type(impl->istream) != type) { if (impl->iowner) tb_async_stream_exit(impl->istream); impl->istream = tb_null; } } // using the previous stream? if (impl->istream) { // ctrl stream if (!tb_async_stream_ctrl(impl->istream, TB_STREAM_CTRL_SET_URL, url)) return tb_false; } else { // init stream impl->istream = tb_async_stream_init_from_url(impl->aicp, url); tb_assert_and_check_return_val(impl->istream, tb_false); // init owner impl->iowner = 1; } // ok return tb_true; }
static tb_size_t tb_demo_spider_parser_get_url(tb_xml_reader_ref_t reader, tb_url_ref_t url) { // check tb_assert_and_check_return_val(reader && url, tb_false); // done tb_size_t ok = 0; tb_size_t event = TB_XML_READER_EVENT_NONE; while (!ok && (event = tb_xml_reader_next(reader))) { switch (event) { case TB_XML_READER_EVENT_ELEMENT_EMPTY: case TB_XML_READER_EVENT_ELEMENT_BEG: { // the element name tb_char_t const* name = tb_xml_reader_element(reader); tb_check_break(name); // <a href="" />? // <link href="" /> // <img src="" />? // <script src="" />? // <source src="" />? // <frame src="" />? if ( !tb_stricmp(name, "a") || !tb_stricmp(name, "link") || !tb_stricmp(name, "img") || !tb_stricmp(name, "frame") || !tb_stricmp(name, "source")) { // walk attributes tb_xml_node_ref_t attr = (tb_xml_node_ref_t)tb_xml_reader_attributes(reader); for (; attr; attr = attr->next) { // href or src? if ( tb_string_size(&attr->data) && ( !tb_string_cstricmp(&attr->name, "href") || !tb_string_cstricmp(&attr->name, "src"))) { // the url protocol tb_size_t protocol = tb_url_protocol_probe(tb_string_cstr(&attr->data)); // http? if(protocol == TB_URL_PROTOCOL_HTTP) { // save url ok = tb_url_set(url, tb_string_cstr(&attr->data)); } // file? else if (protocol == TB_URL_PROTOCOL_FILE) { // save path tb_url_path_set(url, tb_string_cstr(&attr->data)); // ok ok = tb_true; } } } } } break; default: break; } } // ok? return ok; }