/* ////////////////////////////////////////////////////////////////////////////////////// * implementation */ static tb_bool_t tb_demo_spider_parser_open_html(tb_stream_ref_t stream, tb_char_t const* url) { // check tb_assert_and_check_return_val(stream && url, tb_false); // done tb_bool_t ok = tb_false; do { // find the .suffix tb_char_t const* p = tb_strrchr(url, '.'); if (p) { // not html? tb_check_break ( tb_stricmp(p, ".css") && tb_stricmp(p, ".js") && tb_stricmp(p, ".png") && tb_stricmp(p, ".jpg") && tb_stricmp(p, ".gif") && tb_stricmp(p, ".ico") && tb_stricmp(p, ".bmp") && tb_stricmp(p, ".mp4") && tb_stricmp(p, ".mp3") && tb_stricmp(p, ".flv") && tb_stricmp(p, ".avi") && tb_stricmp(p, ".exe") && tb_stricmp(p, ".msi") && tb_stricmp(p, ".zip") && tb_stricmp(p, ".rar") && tb_stricmp(p, ".7z")); } // ctrl stream if (!tb_stream_ctrl(stream, TB_STREAM_CTRL_SET_URL, url)) break; // open stream if (!tb_stream_open(stream)) break; // the stream size tb_hong_t size = tb_stream_size(stream); tb_check_break(size); // prefetch some data tb_byte_t* data = tb_null; tb_size_t need = tb_min((tb_size_t)size, 256); if (!tb_stream_need(stream, &data, need)) break; // is html? if (tb_strnistr((tb_char_t const*)data, need, "<!DOCTYPE html>")) { ok = tb_true; break; } // is html? ok = tb_strnistr((tb_char_t const*)data, need, "<html")? tb_true : tb_false; } while (0); // failed? if (!ok) { // clos stream if (stream) tb_stream_clos(stream); } // ok? return ok; }
static tb_bool_t tb_demo_istream_open_func(tb_async_stream_ref_t stream, tb_size_t state, tb_cpointer_t priv) { // check tb_demo_context_t* context = (tb_demo_context_t*)priv; tb_assert_and_check_return_val(stream && context && context->option, tb_false); // done tb_bool_t ok = tb_false; do { // check if (state != TB_STATE_OK) { // print verbose info if (context->verbose) { tb_char_t const* url = tb_null; tb_async_stream_ctrl(stream, TB_STREAM_CTRL_GET_URL, &url); tb_printf("open: %s: %s\n", url, tb_state_cstr(state)); } break; } // trace if (context->verbose) tb_printf("open: ok\n"); // init ostream if (tb_option_find(context->option, "more0")) { // the path tb_char_t const* path = tb_option_item_cstr(context->option, "more0"); // init context->ostream = tb_async_stream_init_from_file(tb_async_stream_aicp((tb_async_stream_ref_t)stream), path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC); // print verbose info if (context->verbose) tb_printf("save: %s: ..\n", path); } else { // the name tb_char_t const* name = tb_strrchr(tb_option_item_cstr(context->option, "url"), '/'); if (!name) name = tb_strrchr(tb_option_item_cstr(context->option, "url"), '\\'); if (!name) name = "/async_stream.file"; // the path tb_char_t path[TB_PATH_MAXN] = {0}; if (tb_directory_curt(path, TB_PATH_MAXN)) { // append name tb_strcat(path, name); // init file context->ostream = tb_async_stream_init_from_file(tb_async_stream_aicp((tb_async_stream_ref_t)stream), path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC); // print verbose info if (context->verbose) tb_printf("save: %s: ..\n", path); } } tb_assert_and_check_break(context->ostream); // init transfer context->transfer = tb_async_transfer_init(tb_null, tb_true); tb_assert_and_check_break(context->transfer); // init transfer stream if (!tb_async_transfer_init_istream(context->transfer, stream)) break; if (!tb_async_transfer_init_ostream(context->transfer, context->ostream)) break; // the limit rate if (tb_option_find(context->option, "limitrate")) tb_async_transfer_limitrate(context->transfer, tb_option_item_uint32(context->option, "limitrate")); // open and done transfer if (!tb_async_transfer_open_done(context->transfer, 0, tb_demo_transfer_done_func, context)) break; // ok ok = tb_true; } while (0); // failed or closed? exit wait if (state != TB_STATE_OK && context->event) tb_event_post(context->event); // ok? return ok; }
/* ////////////////////////////////////////////////////////////////////////////////////// * main */ tb_int_t tb_demo_stream_main(tb_int_t argc, tb_char_t** argv) { // done tb_option_ref_t option = tb_null; tb_stream_ref_t istream = tb_null; tb_stream_ref_t ostream = tb_null; tb_stream_ref_t pstream = tb_null; do { // init option option = tb_option_init("stream", "the stream demo", g_options); tb_assert_and_check_break(option); // done option if (tb_option_done(option, argc - 1, &argv[1])) { // debug & verbose tb_bool_t debug = tb_option_find(option, "debug"); tb_bool_t verbose = tb_option_find(option, "no-verbose")? tb_false : tb_true; // done url if (tb_option_find(option, "url")) { // init istream istream = tb_stream_init_from_url(tb_option_item_cstr(option, "url")); tb_assert_and_check_break(istream); // ctrl http if (tb_stream_type(istream) == TB_STREAM_TYPE_HTTP) { // enable gzip? if (tb_option_find(option, "gzip")) { // auto unzip if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_AUTO_UNZIP, 1)) break; // need gzip if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Accept-Encoding", "gzip,deflate")) break; } // enable debug? if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD_FUNC, debug? tb_demo_stream_head_func : tb_null)) break; // custem header? if (tb_option_find(option, "header")) { // init tb_string_t key; tb_string_t val; tb_string_init(&key); tb_string_init(&val); // done tb_bool_t k = tb_true; tb_char_t const* p = tb_option_item_cstr(option, "header"); while (*p) { // is key? if (k) { if (*p != ':' && !tb_isspace(*p)) tb_string_chrcat(&key, *p++); else if (*p == ':') { // skip ':' p++; // skip space while (*p && tb_isspace(*p)) p++; // is val now k = tb_false; } else p++; } // is val? else { if (*p != ';') tb_string_chrcat(&val, *p++); else { // skip ';' p++; // skip space while (*p && tb_isspace(*p)) p++; // set header if (tb_string_size(&key) && tb_string_size(&val)) { if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val)); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break; } // is key now k = tb_true; // clear key & val tb_string_clear(&key); tb_string_clear(&val); } } } // set header if (tb_string_size(&key) && tb_string_size(&val)) { if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val)); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break; } // exit tb_string_exit(&key); tb_string_exit(&val); } // keep alive? if (tb_option_find(option, "keep-alive")) { if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Connection", "keep-alive")) break; } // post-data? if (tb_option_find(option, "post-data")) { tb_char_t const* post_data = tb_option_item_cstr(option, "post-data"); tb_hize_t post_size = tb_strlen(post_data); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_DATA, post_data, post_size)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break; if (debug) tb_printf("post: %llu\n", post_size); } // post-file? else if (tb_option_find(option, "post-file")) { tb_char_t const* url = tb_option_item_cstr(option, "post-file"); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_URL, url)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break; if (debug) tb_printf("post: %s\n", url); } } // set range if (tb_option_find(option, "range")) { tb_char_t const* p = tb_option_item_cstr(option, "range"); if (p) { // the bof tb_hize_t eof = 0; tb_hize_t bof = tb_atoll(p); while (*p && tb_isdigit(*p)) p++; if (*p == '-') { p++; eof = tb_atoll(p); } if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_RANGE, bof, eof)) break; } } // set timeout if (tb_option_find(option, "timeout")) { tb_size_t timeout = tb_option_item_uint32(option, "timeout"); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_SET_TIMEOUT, timeout)) break; } // print verbose info if (verbose) tb_printf("open: %s: ..\n", tb_option_item_cstr(option, "url")); // open istream if (!tb_stream_open(istream)) { // print verbose info if (verbose) tb_printf("open: %s\n", tb_state_cstr(tb_stream_state(istream))); break; } // print verbose info if (verbose) tb_printf("open: ok\n"); // init ostream if (tb_option_find(option, "more0")) { // the path tb_char_t const* path = tb_option_item_cstr(option, "more0"); // init ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC); // print verbose info if (verbose) tb_printf("save: %s\n", path); } else { // the name tb_char_t const* name = tb_strrchr(tb_option_item_cstr(option, "url"), '/'); if (!name) name = tb_strrchr(tb_option_item_cstr(option, "url"), '\\'); if (!name) name = "/stream.file"; // the path tb_char_t path[TB_PATH_MAXN] = {0}; if (tb_directory_current(path, TB_PATH_MAXN)) tb_strcat(path, name); else break; // init file ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC); // print verbose info if (verbose) tb_printf("save: %s\n", path); } tb_assert_and_check_break(ostream); // the limit rate tb_size_t limitrate = 0; if (tb_option_find(option, "limitrate")) limitrate = tb_option_item_uint32(option, "limitrate"); // save it tb_hong_t save = 0; tb_demo_context_t context = {0}; context.verbose = verbose; if ((save = tb_transfer_done(istream, ostream, limitrate, tb_demo_stream_save_func, &context)) < 0) break; } else tb_option_help(option); } else tb_option_help(option); } while (0); // exit pstream if (pstream) tb_stream_exit(pstream); pstream = tb_null; // exit istream if (istream) tb_stream_exit(istream); istream = tb_null; // exit ostream if (ostream) tb_stream_exit(ostream); ostream = tb_null; // exit option if (option) tb_option_exit(option); option = tb_null; return 0; }