static tb_size_t tb_object_xplist_reader_probe(tb_stream_ref_t stream) { // check tb_assert_and_check_return_val(stream, 0); // need it tb_byte_t* p = tb_null; if (!tb_stream_need(stream, &p, 5)) return 0; tb_assert_and_check_return_val(p, 0); // is xml data? if (!tb_strnicmp((tb_char_t const*)p, "<?xml", 5)) { // need more data if (!tb_stream_need(stream, &p, 256)) return 5; tb_assert_and_check_return_val(p, 5); // is xplist? return tb_strnistr((tb_char_t const*)p, 256, "DOCTYPE plist")? 80 : 10; } // ok? return 0; }
/* ////////////////////////////////////////////////////////////////////////////////////// * implementation */ static tb_bool_t tb_demo_spider_parser_open_html(tb_stream_ref_t stream, tb_char_t const* url) { // check tb_assert_and_check_return_val(stream && url, tb_false); // done tb_bool_t ok = tb_false; do { // find the .suffix tb_char_t const* p = tb_strrchr(url, '.'); if (p) { // not html? tb_check_break ( tb_stricmp(p, ".css") && tb_stricmp(p, ".js") && tb_stricmp(p, ".png") && tb_stricmp(p, ".jpg") && tb_stricmp(p, ".gif") && tb_stricmp(p, ".ico") && tb_stricmp(p, ".bmp") && tb_stricmp(p, ".mp4") && tb_stricmp(p, ".mp3") && tb_stricmp(p, ".flv") && tb_stricmp(p, ".avi") && tb_stricmp(p, ".exe") && tb_stricmp(p, ".msi") && tb_stricmp(p, ".zip") && tb_stricmp(p, ".rar") && tb_stricmp(p, ".7z")); } // ctrl stream if (!tb_stream_ctrl(stream, TB_STREAM_CTRL_SET_URL, url)) break; // open stream if (!tb_stream_open(stream)) break; // the stream size tb_hong_t size = tb_stream_size(stream); tb_check_break(size); // prefetch some data tb_byte_t* data = tb_null; tb_size_t need = tb_min((tb_size_t)size, 256); if (!tb_stream_need(stream, &data, need)) break; // is html? if (tb_strnistr((tb_char_t const*)data, need, "<!DOCTYPE html>")) { ok = tb_true; break; } // is html? ok = tb_strnistr((tb_char_t const*)data, need, "<html")? tb_true : tb_false; } while (0); // failed? if (!ok) { // clos stream if (stream) tb_stream_clos(stream); } // ok? return ok; }