예제 #1
0
파일: spider.c 프로젝트: ahnan4arch/tbox
/* //////////////////////////////////////////////////////////////////////////////////////
 * implementation
 */ 
static tb_bool_t tb_demo_spider_parser_open_html(tb_stream_ref_t stream, tb_char_t const* url)
{
    // check
    tb_assert_and_check_return_val(stream && url, tb_false);

    // done
    tb_bool_t ok = tb_false;
    do
    {
        // find the .suffix
        tb_char_t const* p = tb_strrchr(url, '.');
        if (p)
        {
            // not html?
            tb_check_break (    tb_stricmp(p, ".css")
                            &&  tb_stricmp(p, ".js")
                            &&  tb_stricmp(p, ".png")
                            &&  tb_stricmp(p, ".jpg")
                            &&  tb_stricmp(p, ".gif")
                            &&  tb_stricmp(p, ".ico")
                            &&  tb_stricmp(p, ".bmp")
                            &&  tb_stricmp(p, ".mp4")
                            &&  tb_stricmp(p, ".mp3")
                            &&  tb_stricmp(p, ".flv")
                            &&  tb_stricmp(p, ".avi")
                            &&  tb_stricmp(p, ".exe")
                            &&  tb_stricmp(p, ".msi")
                            &&  tb_stricmp(p, ".zip")
                            &&  tb_stricmp(p, ".rar")
                            &&  tb_stricmp(p, ".7z"));
        }

        // ctrl stream
        if (!tb_stream_ctrl(stream, TB_STREAM_CTRL_SET_URL, url)) break;

        // open stream
        if (!tb_stream_open(stream)) break;

        // the stream size
        tb_hong_t size = tb_stream_size(stream);
        tb_check_break(size);

        // prefetch some data
        tb_byte_t*  data = tb_null;
        tb_size_t   need = tb_min((tb_size_t)size, 256);
        if (!tb_stream_need(stream, &data, need)) break;

        // is html?
        if (tb_strnistr((tb_char_t const*)data, need, "<!DOCTYPE html>"))
        {
            ok = tb_true;
            break;
        }

        // is html?
        ok = tb_strnistr((tb_char_t const*)data, need, "<html")? tb_true : tb_false;

    } while (0);

    // failed?
    if (!ok) 
    {
        // clos stream
        if (stream) tb_stream_clos(stream);
    }

    // ok?
    return ok;
}
예제 #2
0
static tb_bool_t tb_demo_istream_open_func(tb_async_stream_ref_t stream, tb_size_t state, tb_cpointer_t priv)
{
    // check
    tb_demo_context_t* context = (tb_demo_context_t*)priv;
    tb_assert_and_check_return_val(stream && context && context->option, tb_false);

    // done
    tb_bool_t ok = tb_false;
    do
    {
        // check
        if (state != TB_STATE_OK)
        {
            // print verbose info
            if (context->verbose) 
            {
                tb_char_t const* url = tb_null;
                tb_async_stream_ctrl(stream, TB_STREAM_CTRL_GET_URL, &url);
                tb_printf("open: %s: %s\n", url, tb_state_cstr(state));
            }
            break;
        }
    
        // trace
        if (context->verbose) tb_printf("open: ok\n");

        // init ostream
        if (tb_option_find(context->option, "more0"))
        {
            // the path
            tb_char_t const* path = tb_option_item_cstr(context->option, "more0");

            // init
            context->ostream = tb_async_stream_init_from_file(tb_async_stream_aicp((tb_async_stream_ref_t)stream), path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC);

            // print verbose info
            if (context->verbose) tb_printf("save: %s: ..\n", path);
        }
        else 
        {
            // the name
            tb_char_t const* name = tb_strrchr(tb_option_item_cstr(context->option, "url"), '/');
            if (!name) name = tb_strrchr(tb_option_item_cstr(context->option, "url"), '\\');
            if (!name) name = "/async_stream.file";

            // the path
            tb_char_t path[TB_PATH_MAXN] = {0};
            if (tb_directory_curt(path, TB_PATH_MAXN))
            {
                // append name
                tb_strcat(path, name);

                // init file
                context->ostream = tb_async_stream_init_from_file(tb_async_stream_aicp((tb_async_stream_ref_t)stream), path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC);

                // print verbose info
                if (context->verbose) tb_printf("save: %s: ..\n", path);
            }
        }
        tb_assert_and_check_break(context->ostream);

        // init transfer
        context->transfer = tb_async_transfer_init(tb_null, tb_true);
        tb_assert_and_check_break(context->transfer);

        // init transfer stream
        if (!tb_async_transfer_init_istream(context->transfer, stream)) break;
        if (!tb_async_transfer_init_ostream(context->transfer, context->ostream)) break;

        // the limit rate
        if (tb_option_find(context->option, "limitrate"))
            tb_async_transfer_limitrate(context->transfer, tb_option_item_uint32(context->option, "limitrate"));

        // open and done transfer
        if (!tb_async_transfer_open_done(context->transfer, 0, tb_demo_transfer_done_func, context)) break;

        // ok
        ok = tb_true;

    } while (0);

    // failed or closed? exit wait
    if (state != TB_STATE_OK && context->event) tb_event_post(context->event);

    // ok?
    return ok;
}
예제 #3
0
파일: stream.c 프로젝트: AlexShiLucky/tbox
/* //////////////////////////////////////////////////////////////////////////////////////
 * main
 */ 
tb_int_t tb_demo_stream_main(tb_int_t argc, tb_char_t** argv)
{
    // done
    tb_option_ref_t     option = tb_null;
    tb_stream_ref_t     istream = tb_null;
    tb_stream_ref_t     ostream = tb_null;
    tb_stream_ref_t     pstream = tb_null;
    do
    {
        // init option
        option = tb_option_init("stream", "the stream demo", g_options);
        tb_assert_and_check_break(option);
    
        // done option
        if (tb_option_done(option, argc - 1, &argv[1]))
        {
            // debug & verbose
            tb_bool_t debug = tb_option_find(option, "debug");
            tb_bool_t verbose = tb_option_find(option, "no-verbose")? tb_false : tb_true;
        
            // done url
            if (tb_option_find(option, "url")) 
            {
                // init istream
                istream = tb_stream_init_from_url(tb_option_item_cstr(option, "url"));
                tb_assert_and_check_break(istream);
    
                // ctrl http
                if (tb_stream_type(istream) == TB_STREAM_TYPE_HTTP) 
                {
                    // enable gzip?
                    if (tb_option_find(option, "gzip"))
                    {
                        // auto unzip
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_AUTO_UNZIP, 1)) break;

                        // need gzip
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Accept-Encoding", "gzip,deflate")) break;
                    }

                    // enable debug?
                    if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD_FUNC, debug? tb_demo_stream_head_func : tb_null)) break;

                    // custem header?
                    if (tb_option_find(option, "header"))
                    {
                        // init
                        tb_string_t key;
                        tb_string_t val;
                        tb_string_init(&key);
                        tb_string_init(&val);

                        // done
                        tb_bool_t           k = tb_true;
                        tb_char_t const*    p = tb_option_item_cstr(option, "header");
                        while (*p)
                        {
                            // is key?
                            if (k)
                            {
                                if (*p != ':' && !tb_isspace(*p)) tb_string_chrcat(&key, *p++);
                                else if (*p == ':') 
                                {
                                    // skip ':'
                                    p++;

                                    // skip space
                                    while (*p && tb_isspace(*p)) p++;

                                    // is val now
                                    k = tb_false;
                                }
                                else p++;
                            }
                            // is val?
                            else
                            {
                                if (*p != ';') tb_string_chrcat(&val, *p++);
                                else
                                {
                                    // skip ';'
                                    p++;

                                    // skip space
                                    while (*p && tb_isspace(*p)) p++;

                                    // set header
                                    if (tb_string_size(&key) && tb_string_size(&val))
                                    {
                                        if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val));
                                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break;
                                    }

                                    // is key now
                                    k = tb_true;

                                    // clear key & val
                                    tb_string_clear(&key);
                                    tb_string_clear(&val);
                                }
                            }
                        }

                        // set header
                        if (tb_string_size(&key) && tb_string_size(&val))
                        {
                            if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val));
                            if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break;
                        }

                        // exit 
                        tb_string_exit(&key);
                        tb_string_exit(&val);
                    }

                    // keep alive?
                    if (tb_option_find(option, "keep-alive"))
                    {
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Connection", "keep-alive")) break;
                    }

                    // post-data?
                    if (tb_option_find(option, "post-data"))
                    {
                        tb_char_t const*    post_data = tb_option_item_cstr(option, "post-data");
                        tb_hize_t           post_size = tb_strlen(post_data);
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_DATA, post_data, post_size)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break;
                        if (debug) tb_printf("post: %llu\n", post_size);
                    }
                    // post-file?
                    else if (tb_option_find(option, "post-file"))
                    {
                        tb_char_t const* url = tb_option_item_cstr(option, "post-file");
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_URL, url)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break;
                        if (debug) tb_printf("post: %s\n", url);
                    }
                }

                // set range
                if (tb_option_find(option, "range"))
                {
                    tb_char_t const* p = tb_option_item_cstr(option, "range");
                    if (p)
                    {
                        // the bof
                        tb_hize_t eof = 0;
                        tb_hize_t bof = tb_atoll(p);
                        while (*p && tb_isdigit(*p)) p++;
                        if (*p == '-')
                        {
                            p++;
                            eof = tb_atoll(p);
                        }
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_RANGE, bof, eof)) break;
                    }
                }

                // set timeout
                if (tb_option_find(option, "timeout"))
                {
                    tb_size_t timeout = tb_option_item_uint32(option, "timeout");
                    if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_SET_TIMEOUT, timeout)) break;
                }

                // print verbose info
                if (verbose) tb_printf("open: %s: ..\n", tb_option_item_cstr(option, "url"));

                // open istream
                if (!tb_stream_open(istream)) 
                {
                    // print verbose info
                    if (verbose) tb_printf("open: %s\n", tb_state_cstr(tb_stream_state(istream)));
                    break;
                }

                // print verbose info
                if (verbose) tb_printf("open: ok\n");

                // init ostream
                if (tb_option_find(option, "more0"))
                {
                    // the path
                    tb_char_t const* path = tb_option_item_cstr(option, "more0");

                    // init
                    ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC);

                    // print verbose info
                    if (verbose) tb_printf("save: %s\n", path);
                }
                else 
                {
                    // the name
                    tb_char_t const* name = tb_strrchr(tb_option_item_cstr(option, "url"), '/');
                    if (!name) name = tb_strrchr(tb_option_item_cstr(option, "url"), '\\');
                    if (!name) name = "/stream.file";

                    // the path
                    tb_char_t path[TB_PATH_MAXN] = {0};
                    if (tb_directory_current(path, TB_PATH_MAXN))
                        tb_strcat(path, name);
                    else break;

                    // init file
                    ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC);

                    // print verbose info
                    if (verbose) tb_printf("save: %s\n", path);
                }
                tb_assert_and_check_break(ostream);

                // the limit rate
                tb_size_t limitrate = 0;
                if (tb_option_find(option, "limitrate"))
                    limitrate = tb_option_item_uint32(option, "limitrate");

                // save it
                tb_hong_t           save = 0;
                tb_demo_context_t   context = {0}; 
                context.verbose     = verbose;
                if ((save = tb_transfer_done(istream, ostream, limitrate, tb_demo_stream_save_func, &context)) < 0) break;
            }
            else tb_option_help(option);
        }
        else tb_option_help(option);

    } while (0);

    // exit pstream
    if (pstream) tb_stream_exit(pstream);
    pstream = tb_null;

    // exit istream
    if (istream) tb_stream_exit(istream);
    istream = tb_null;

    // exit ostream
    if (ostream) tb_stream_exit(ostream);
    ostream = tb_null;

    // exit option
    if (option) tb_option_exit(option);
    option = tb_null;

    return 0;
}