Exemple #1
0
tb_bool_t tb_async_transfer_init_istream_from_url(tb_async_transfer_ref_t transfer, tb_char_t const* url)
{
    // check
    tb_async_transfer_impl_t* impl = (tb_async_transfer_impl_t*)transfer;
    tb_assert_and_check_return_val(impl && impl->aicp && url, tb_false);

    // muse be closed
    tb_assert_and_check_return_val(TB_STATE_CLOSED == tb_atomic_get(&impl->state), tb_false);

    // check stream type
    if (impl->istream)
    {
        // probe protocol
        tb_size_t protocol = tb_url_protocol_probe(url);
        tb_assert_static((tb_size_t)TB_URL_PROTOCOL_FILE == (tb_size_t)TB_STREAM_TYPE_FILE);
        tb_assert_static((tb_size_t)TB_URL_PROTOCOL_HTTP == (tb_size_t)TB_STREAM_TYPE_HTTP);
        tb_assert_static((tb_size_t)TB_URL_PROTOCOL_SOCK == (tb_size_t)TB_STREAM_TYPE_SOCK);
        tb_assert_static((tb_size_t)TB_URL_PROTOCOL_DATA == (tb_size_t)TB_STREAM_TYPE_DATA);

        // protocol => type
        tb_size_t type = protocol;
        if (!type || type > TB_STREAM_TYPE_DATA)
        {
            tb_trace_e("unknown stream for url: %s", url);
            return tb_false;
        }

        // exit the previous stream first if be different stream type
        if (tb_async_stream_type(impl->istream) != type)
        {
            if (impl->iowner) tb_async_stream_exit(impl->istream);
            impl->istream = tb_null;
        }
    }

    // using the previous stream?
    if (impl->istream)
    {
        // ctrl stream
        if (!tb_async_stream_ctrl(impl->istream, TB_STREAM_CTRL_SET_URL, url)) return tb_false;
    }
    else 
    {
        // init stream
        impl->istream = tb_async_stream_init_from_url(impl->aicp, url);
        tb_assert_and_check_return_val(impl->istream, tb_false);

        // init owner
        impl->iowner = 1;
    }

    // ok
    return tb_true;
}
Exemple #2
0
static tb_size_t tb_demo_spider_parser_get_url(tb_xml_reader_ref_t reader, tb_url_ref_t url)
{
    // check
    tb_assert_and_check_return_val(reader && url, tb_false);

    // done
    tb_size_t ok = 0;
    tb_size_t event = TB_XML_READER_EVENT_NONE;
    while (!ok && (event = tb_xml_reader_next(reader)))
    {
        switch (event)
        {
        case TB_XML_READER_EVENT_ELEMENT_EMPTY: 
        case TB_XML_READER_EVENT_ELEMENT_BEG: 
            {
                // the element name
                tb_char_t const* name = tb_xml_reader_element(reader);
                tb_check_break(name);

                // <a href="" />? 
                // <link href="" /> 
                // <img src="" />? 
                // <script src="" />? 
                // <source src="" />? 
                // <frame src="" />? 
                if (    !tb_stricmp(name, "a")
                    ||  !tb_stricmp(name, "link")
                    ||  !tb_stricmp(name, "img")
                    ||  !tb_stricmp(name, "frame")
                    ||  !tb_stricmp(name, "source"))
                {
                    // walk attributes
                    tb_xml_node_ref_t attr = (tb_xml_node_ref_t)tb_xml_reader_attributes(reader); 
                    for (; attr; attr = attr->next)
                    {
                        // href or src?
                        if (    tb_string_size(&attr->data)
                            &&  (   !tb_string_cstricmp(&attr->name, "href")
                                ||  !tb_string_cstricmp(&attr->name, "src")))
                        {
                            // the url protocol
                            tb_size_t protocol = tb_url_protocol_probe(tb_string_cstr(&attr->data));

                            // http?
                            if(protocol == TB_URL_PROTOCOL_HTTP)
                            {
                                // save url
                                ok = tb_url_set(url, tb_string_cstr(&attr->data));
                            }
                            // file?
                            else if (protocol == TB_URL_PROTOCOL_FILE)
                            {
                                // save path
                                tb_url_path_set(url, tb_string_cstr(&attr->data));

                                // ok
                                ok = tb_true;
                            }
                        }
                    }
                }
            }
            break;
        default:
            break;
        }
    }

    // ok?
    return ok;
}