/* ////////////////////////////////////////////////////////////////////////////////////// * interfaces */ tb_size_t tb_database_sqlite3_probe(tb_url_ref_t url) { // check tb_assert_and_check_return_val(url, 0); // done tb_size_t score = 0; tb_stream_ref_t stream = tb_null; do { // the url arguments tb_char_t const* args = tb_url_args(url); if (args) { // find the database type tb_char_t const* ptype = tb_stristr(args, "type="); if (ptype && !tb_strnicmp(ptype + 5, "sqlite3", 7)) { // ok score = 100; break; } } // has host or port? no sqlite3 if (tb_url_host(url) || tb_url_port(url)) break; // the database path tb_char_t const* path = tb_url_cstr((tb_url_ref_t)url); tb_assert_and_check_break(path); // is file? if (tb_url_protocol(url) == TB_URL_PROTOCOL_FILE) score += 20; // init stream stream = tb_stream_init_from_url(path); tb_assert_and_check_break(stream); // open stream if (!tb_stream_open(stream)) break; // read head tb_char_t head[16] = {0}; if (!tb_stream_bread(stream, (tb_byte_t*)head, 15)) break; // is sqlite3? if (!tb_stricmp(head, "SQLite format 3")) score = 100; } while (0); // exit stream if (stream) tb_stream_exit(stream); stream = tb_null; // trace tb_trace_d("probe: %s, score: %lu", tb_url_cstr((tb_url_ref_t)url), score); // ok? return score; }
tb_long_t tb_string_cstristr(tb_string_ref_t string, tb_size_t p, tb_char_t const* s2) { // check tb_char_t const* s = tb_string_cstr(string); tb_size_t n = tb_string_size(string); tb_assert_and_check_return_val(s && p && p < n, -1); // done tb_char_t* q = tb_stristr(s + p, s2); return (q? q - s : -1); }
static tb_size_t tb_object_xplist_reader_probe(tb_stream_ref_t stream) { // check tb_assert_and_check_return_val(stream, 0); // need it tb_byte_t* p = tb_null; if (!tb_stream_need(stream, &p, 5)) return 0; tb_assert_and_check_return_val(p, 0); // is xml data? if (!tb_strnicmp((tb_char_t const*)p, "<?xml", 5)) { // need more data if (!tb_stream_need(stream, &p, 256)) return 5; tb_assert_and_check_return_val(p, 5); // is xplist? return tb_stristr((tb_char_t const*)p, "DOCTYPE plist")? 80 : 10; } // ok? return 0; }
static tb_bool_t tb_demo_spider_task_done(tb_demo_spider_t* spider, tb_char_t const* iurl, tb_bool_t* full) { // check tb_assert_and_check_return_val(spider && iurl, tb_false); // killed? tb_check_return_val(TB_STATE_OK == tb_atomic_get(&spider->state), tb_false); // only for home? if (spider->home_only && !tb_stristr(iurl, spider->home_domain)) { // trace tb_trace_d("task: done: %s: skip", iurl); return tb_true; } // enter tb_spinlock_enter(&spider->lock); // done tb_bool_t ok = tb_false; tb_size_t size = 0; tb_demo_spider_task_t* task = tb_null; tb_bool_t repeat = tb_false; do { // check tb_assert_and_check_break(spider->filter && spider->pool); // the task count size = tb_fixed_pool_size(spider->pool); // make the output url if (!tb_demo_spider_make_ourl(spider, iurl, spider->ourl, sizeof(spider->ourl) - 1)) break; // have been done already? if (!tb_bloom_filter_set(spider->filter, spider->ourl)) { // trace tb_trace_d("task: size: %lu, done: %s: repeat", size, iurl); // ok ok = tb_true; repeat = tb_true; break; } // trace tb_trace_d("task: size: %lu, done: %s: ..", size, iurl); // full? tb_check_break(size < TB_DEMO_SPIDER_TASK_MAXN); // make task task = (tb_demo_spider_task_t*)tb_fixed_pool_malloc0(spider->pool); tb_assert_and_check_break(task); // init task task->spider = spider; tb_strlcpy(task->iurl, iurl, sizeof(task->iurl) - 1); tb_strlcpy(task->ourl, spider->ourl, sizeof(task->ourl) - 1); // ok ok = tb_true; } while (0); // leave tb_spinlock_leave(&spider->lock); // ok? if (ok && !repeat) { // done ok = tb_false; do { // check tb_assert_and_check_break(task); // killed? tb_check_break(TB_STATE_OK == tb_atomic_get(&spider->state)); // repeat? if (tb_file_info(task->ourl, tb_null)) { // trace tb_trace_d("task: size: %lu, done: %s: repeat", size, iurl); // ok ok = tb_true; repeat = tb_true; break; } // done task ok = tb_transfer_pool_done(tb_transfer_pool(), task->iurl, task->ourl, 0, spider->limited_rate, tb_demo_spider_task_save, tb_demo_spider_task_ctrl, task); tb_assert_and_check_break(ok); } while (0); } // repeat or failed? if (repeat || !ok) { // exit task if (task) tb_demo_spider_task_exit(task); task = tb_null; // failed? if (!full && !ok) { // trace tb_trace_e("task: size: %lu, done: %s: post failed", size, iurl); } // save full if (full) *full = size < TB_DEMO_SPIDER_TASK_MAXN? tb_false : tb_true; } // ok? return ok; }