示例#1
0
文件: reader.c 项目: 1060460048/tbox
tb_size_t tb_xml_reader_next(tb_xml_reader_ref_t reader)
{
    // check
    tb_xml_reader_impl_t* impl = (tb_xml_reader_impl_t*)reader;
    tb_assert_and_check_return_val(impl && impl->rstream, TB_XML_READER_EVENT_NONE);

    // reset event
    impl->event = TB_XML_READER_EVENT_NONE;

    // next
    while (!impl->event)
    {
        // peek character
        tb_char_t* pc = tb_null;
        if (!tb_stream_need(impl->rstream, (tb_byte_t**)&pc, 1) || !pc) break;

        // is element?
        if (*pc == '<') 
        {
            // parse element: <...>
            tb_char_t const* element = tb_xml_reader_element_parse(impl);
            tb_assert_and_check_break(element);

            // is document begin: <?xml version="..." charset=".." ?>
            tb_size_t size = tb_string_size(&impl->element);
            if (size > 4 && !tb_strnicmp(element, "?xml", 4))
            {
                // update event
                impl->event = TB_XML_READER_EVENT_DOCUMENT;

                // update version & charset
                tb_xml_node_ref_t attr = (tb_xml_node_ref_t)tb_xml_reader_attributes(reader); 
                for (; attr; attr = attr->next)
                {
                    if (!tb_string_cstricmp(&attr->name, "version")) tb_string_strcpy(&impl->version, &attr->data);
                    if (!tb_string_cstricmp(&attr->name, "encoding")) tb_string_strcpy(&impl->charset, &attr->data);
                }

                // transform stream => utf-8
                if (tb_string_cstricmp(&impl->charset, "utf-8") && tb_string_cstricmp(&impl->charset, "utf8"))
                {
                    // charset
                    tb_size_t charset = TB_CHARSET_TYPE_UTF8;
                    if (!tb_string_cstricmp(&impl->charset, "gb2312") || !tb_string_cstricmp(&impl->charset, "gbk")) 
                        charset = TB_CHARSET_TYPE_GB2312;
                    else tb_trace_e("the charset: %s is not supported", tb_string_cstr(&impl->charset));

                    // init transform stream
                    if (charset != TB_CHARSET_TYPE_UTF8)
                    {
#ifdef TB_CONFIG_MODULE_HAVE_CHARSET
                        // init the filter stream
                        if (!impl->fstream) impl->fstream = tb_stream_init_filter_from_charset(impl->istream, charset, TB_CHARSET_TYPE_UTF8);
                        else
                        {
                            // ctrl stream
                            if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_SET_STREAM, impl->istream)) break;

                            // the filter
                            tb_stream_filter_ref_t filter = tb_null;
                            if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_GET_FILTER, &filter)) break;
                            tb_assert_and_check_break(filter);

                            // ctrl filter
                            if (!tb_stream_filter_ctrl(filter, TB_STREAM_FILTER_CTRL_CHARSET_SET_FTYPE, charset)) break;
                        }

                        // open the filter stream
                        if (impl->fstream && tb_stream_open(impl->fstream))
                            impl->rstream = impl->fstream;
                        tb_string_cstrcpy(&impl->charset, "utf-8");
#else
                        // trace
                        tb_trace_e("unicode type is not supported, please enable charset module config if you want to use it!");
#endif
                    }
                }
            }
            // is document type: <!DOCTYPE ... >
            else if (size > 8 && !tb_strnicmp(element, "!DOCTYPE", 8))
            {
                // update event
                impl->event = TB_XML_READER_EVENT_DOCUMENT_TYPE;
            }
            // is element end: </name>
            else if (size > 1 && element[0] == '/')
            {
                // check
                tb_check_break(impl->level);

                // update event
                impl->event = TB_XML_READER_EVENT_ELEMENT_END;

                // leave
                impl->level--;
            }
            // is comment: <!-- text -->
            else if (size >= 3 && !tb_strncmp(element, "!--", 3))
            {
                // no comment end?
                if (element[size - 2] != '-' || element[size - 1] != '-')
                {
                    // patch '>'
                    tb_string_chrcat(&impl->element, '>');

                    // seek to comment end
                    tb_char_t ch = '\0';
                    tb_int_t n = 0;
                    while ((ch = tb_stream_bread_s8(impl->rstream)))
                    {
                        // -->
                        if (n == 2 && ch == '>') break;
                        else
                        {
                            // append it
                            tb_string_chrcat(&impl->element, ch);

                            if (ch == '-') n++;
                            else n = 0;
                        }
                    }

                    // update event
                    if (ch != '\0') impl->event = TB_XML_READER_EVENT_COMMENT;
                }
                else impl->event = TB_XML_READER_EVENT_COMMENT;
            }
            // is cdata: <![CDATA[ text ]]>
            else if (size >= 8 && !tb_strnicmp(element, "![CDATA[", 8))
            {
                if (element[size - 2] != ']' || element[size - 1] != ']')
                {
                    // patch '>'
                    tb_string_chrcat(&impl->element, '>');

                    // seek to cdata end
                    tb_char_t ch = '\0';
                    tb_int_t n = 0;
                    while ((ch = tb_stream_bread_s8(impl->rstream)))
                    {
                        // ]]>
                        if (n == 2 && ch == '>') break;
                        else
                        {
                            // append it
                            tb_string_chrcat(&impl->element, ch);

                            if (ch == ']') n++;
                            else n = 0;
                        }
                    }

                    // update event
                    if (ch != '\0') impl->event = TB_XML_READER_EVENT_CDATA;
                }
                else impl->event = TB_XML_READER_EVENT_CDATA;
            }
            // is empty element: <name/>
            else if (size > 1 && element[size - 1] == '/')
            {
                // update event
                impl->event = TB_XML_READER_EVENT_ELEMENT_EMPTY;
            }
            // is element begin: <name>
            else
            {
                // update event
                impl->event = TB_XML_READER_EVENT_ELEMENT_BEG;

                // enter
                impl->level++;
            }

            // trace
//          tb_trace_d("<%s>", element);
        }
        // is text: <> text </>
        else if (*pc)
        {
            // parse text: <> ... <>
            tb_char_t const* text = tb_xml_reader_text_parse(impl);
            if (text && tb_string_cstrcmp(&impl->text, "\r\n") && tb_string_cstrcmp(&impl->text, "\n"))
                impl->event = TB_XML_READER_EVENT_TEXT;

            // trace
//          tb_trace_d("%s", text);
        }
        else 
        {
            // skip the invalid character
            if (!tb_stream_skip(impl->rstream, 1)) break;
        }
    }

    // ok?
    return impl->event;
}
示例#2
0
/* //////////////////////////////////////////////////////////////////////////////////////
 * main
 */ 
tb_int_t tb_demo_stream_main(tb_int_t argc, tb_char_t** argv)
{
    // done
    tb_option_ref_t     option = tb_null;
    tb_stream_ref_t     istream = tb_null;
    tb_stream_ref_t     ostream = tb_null;
    tb_stream_ref_t     pstream = tb_null;
    do
    {
        // init option
        option = tb_option_init("stream", "the stream demo", g_options);
        tb_assert_and_check_break(option);
    
        // done option
        if (tb_option_done(option, argc - 1, &argv[1]))
        {
            // debug & verbose
            tb_bool_t debug = tb_option_find(option, "debug");
            tb_bool_t verbose = tb_option_find(option, "no-verbose")? tb_false : tb_true;
        
            // done url
            if (tb_option_find(option, "url")) 
            {
                // init istream
                istream = tb_stream_init_from_url(tb_option_item_cstr(option, "url"));
                tb_assert_and_check_break(istream);
    
                // ctrl http
                if (tb_stream_type(istream) == TB_STREAM_TYPE_HTTP) 
                {
                    // enable gzip?
                    if (tb_option_find(option, "gzip"))
                    {
                        // auto unzip
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_AUTO_UNZIP, 1)) break;

                        // need gzip
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Accept-Encoding", "gzip,deflate")) break;
                    }

                    // enable debug?
                    if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD_FUNC, debug? tb_demo_stream_head_func : tb_null)) break;

                    // custem header?
                    if (tb_option_find(option, "header"))
                    {
                        // init
                        tb_string_t key;
                        tb_string_t val;
                        tb_string_init(&key);
                        tb_string_init(&val);

                        // done
                        tb_bool_t           k = tb_true;
                        tb_char_t const*    p = tb_option_item_cstr(option, "header");
                        while (*p)
                        {
                            // is key?
                            if (k)
                            {
                                if (*p != ':' && !tb_isspace(*p)) tb_string_chrcat(&key, *p++);
                                else if (*p == ':') 
                                {
                                    // skip ':'
                                    p++;

                                    // skip space
                                    while (*p && tb_isspace(*p)) p++;

                                    // is val now
                                    k = tb_false;
                                }
                                else p++;
                            }
                            // is val?
                            else
                            {
                                if (*p != ';') tb_string_chrcat(&val, *p++);
                                else
                                {
                                    // skip ';'
                                    p++;

                                    // skip space
                                    while (*p && tb_isspace(*p)) p++;

                                    // set header
                                    if (tb_string_size(&key) && tb_string_size(&val))
                                    {
                                        if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val));
                                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break;
                                    }

                                    // is key now
                                    k = tb_true;

                                    // clear key & val
                                    tb_string_clear(&key);
                                    tb_string_clear(&val);
                                }
                            }
                        }

                        // set header
                        if (tb_string_size(&key) && tb_string_size(&val))
                        {
                            if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val));
                            if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break;
                        }

                        // exit 
                        tb_string_exit(&key);
                        tb_string_exit(&val);
                    }

                    // keep alive?
                    if (tb_option_find(option, "keep-alive"))
                    {
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Connection", "keep-alive")) break;
                    }

                    // post-data?
                    if (tb_option_find(option, "post-data"))
                    {
                        tb_char_t const*    post_data = tb_option_item_cstr(option, "post-data");
                        tb_hize_t           post_size = tb_strlen(post_data);
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_DATA, post_data, post_size)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break;
                        if (debug) tb_printf("post: %llu\n", post_size);
                    }
                    // post-file?
                    else if (tb_option_find(option, "post-file"))
                    {
                        tb_char_t const* url = tb_option_item_cstr(option, "post-file");
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_URL, url)) break;
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break;
                        if (debug) tb_printf("post: %s\n", url);
                    }
                }

                // set range
                if (tb_option_find(option, "range"))
                {
                    tb_char_t const* p = tb_option_item_cstr(option, "range");
                    if (p)
                    {
                        // the bof
                        tb_hize_t eof = 0;
                        tb_hize_t bof = tb_atoll(p);
                        while (*p && tb_isdigit(*p)) p++;
                        if (*p == '-')
                        {
                            p++;
                            eof = tb_atoll(p);
                        }
                        if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_RANGE, bof, eof)) break;
                    }
                }

                // set timeout
                if (tb_option_find(option, "timeout"))
                {
                    tb_size_t timeout = tb_option_item_uint32(option, "timeout");
                    if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_SET_TIMEOUT, timeout)) break;
                }

                // print verbose info
                if (verbose) tb_printf("open: %s: ..\n", tb_option_item_cstr(option, "url"));

                // open istream
                if (!tb_stream_open(istream)) 
                {
                    // print verbose info
                    if (verbose) tb_printf("open: %s\n", tb_state_cstr(tb_stream_state(istream)));
                    break;
                }

                // print verbose info
                if (verbose) tb_printf("open: ok\n");

                // init ostream
                if (tb_option_find(option, "more0"))
                {
                    // the path
                    tb_char_t const* path = tb_option_item_cstr(option, "more0");

                    // init
                    ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC);

                    // print verbose info
                    if (verbose) tb_printf("save: %s\n", path);
                }
                else 
                {
                    // the name
                    tb_char_t const* name = tb_strrchr(tb_option_item_cstr(option, "url"), '/');
                    if (!name) name = tb_strrchr(tb_option_item_cstr(option, "url"), '\\');
                    if (!name) name = "/stream.file";

                    // the path
                    tb_char_t path[TB_PATH_MAXN] = {0};
                    if (tb_directory_current(path, TB_PATH_MAXN))
                        tb_strcat(path, name);
                    else break;

                    // init file
                    ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC);

                    // print verbose info
                    if (verbose) tb_printf("save: %s\n", path);
                }
                tb_assert_and_check_break(ostream);

                // the limit rate
                tb_size_t limitrate = 0;
                if (tb_option_find(option, "limitrate"))
                    limitrate = tb_option_item_uint32(option, "limitrate");

                // save it
                tb_hong_t           save = 0;
                tb_demo_context_t   context = {0}; 
                context.verbose     = verbose;
                if ((save = tb_transfer_done(istream, ostream, limitrate, tb_demo_stream_save_func, &context)) < 0) break;
            }
            else tb_option_help(option);
        }
        else tb_option_help(option);

    } while (0);

    // exit pstream
    if (pstream) tb_stream_exit(pstream);
    pstream = tb_null;

    // exit istream
    if (istream) tb_stream_exit(istream);
    istream = tb_null;

    // exit ostream
    if (ostream) tb_stream_exit(ostream);
    ostream = tb_null;

    // exit option
    if (option) tb_option_exit(option);
    option = tb_null;

    return 0;
}
示例#3
0
/* chunked_data
 *
 *   head     data   tail
 * ea5\r\n ..........\r\n e65\r\n..............\r\n 0\r\n\r\n
 * ---------------------- ------------------------- ---------
 *        chunk0                  chunk1               end
 */
static tb_long_t tb_filter_chunked_spak(tb_filter_t* filter, tb_static_stream_ref_t istream, tb_static_stream_ref_t ostream, tb_long_t sync)
{
    // check
    tb_filter_chunked_t* cfilter = tb_filter_chunked_cast(filter);
    tb_assert_and_check_return_val(cfilter && istream && ostream, -1);
    tb_assert_and_check_return_val(tb_static_stream_valid(istream) && tb_static_stream_valid(ostream), -1);

    // the idata
    tb_byte_t const*    ip = tb_static_stream_pos(istream);
    tb_byte_t const*    ie = tb_static_stream_end(istream);

    // trace
    tb_trace_d("[%p]: isize: %lu, beof: %d", cfilter, tb_static_stream_size(istream), filter->beof);

    // find the eof: '\r\n 0\r\n\r\n'
    if (    !filter->beof
        &&  ip + 6 < ie
        &&  ie[-7] == '\r'
        &&  ie[-6] == '\n'
        &&  ie[-5] == '0'
        &&  ie[-4] == '\r'
        &&  ie[-3] == '\n'
        &&  ie[-2] == '\r'
        &&  ie[-1] == '\n')
    {
        // is eof
        filter->beof = tb_true;
    }

    // the odata
    tb_byte_t*          op = (tb_byte_t*)tb_static_stream_pos(ostream);
    tb_byte_t*          oe = (tb_byte_t*)tb_static_stream_end(ostream);
    tb_byte_t*          ob = op;

    // parse chunked head and chunked tail
    if (!cfilter->size || cfilter->read >= cfilter->size)
    {
        // walk
        while (ip < ie)
        {
            // the charactor
            tb_char_t ch = *ip++;

            // trace
            tb_trace_d("[%p]: character: %x", cfilter, ch);

            // check
            tb_assert_and_check_return_val(ch, -1);
        
            // append char to line
            if (ch != '\n') tb_string_chrcat(&cfilter->line, ch);
            // is line end?
            else
            {
                // check
                tb_char_t const*    pb = tb_string_cstr(&cfilter->line);
                tb_size_t           pn = tb_string_size(&cfilter->line);
                tb_assert_and_check_return_val(pb, -1);

                // trace
                tb_trace_d("[%p]: line: %s", cfilter, tb_string_cstr(&cfilter->line));

                // strip '\r' if exists
                if (pb[pn - 1] == '\r') tb_string_strip(&cfilter->line, pn - 1);

                // is chunked tail? only "\r\n"
                if (!tb_string_size(&cfilter->line)) 
                {
                    // reset size
                    cfilter->read = 0;
                    cfilter->size = 0;

                    // trace
                    tb_trace_d("[%p]: tail", cfilter);

                    // continue
                    continue ;
                }
                // is chunked head? parse size
                else
                {
                    // parse size
                    cfilter->size = tb_s16tou32(pb);

                    // trace
                    tb_trace_d("[%p]: size: %lu", cfilter, cfilter->size);

                    // clear data
                    tb_string_clear(&cfilter->line);

                    // is eof? "0\r\n\r\n"
                    if (!cfilter->size)
                    {
                        // trace
                        tb_trace_d("[%p]: eof", cfilter);

                        // is eof
                        filter->beof = tb_true;

                        // continue to spak the end data 
                        continue ;
                    }

                    // ok
                    break;
                }
            }
        }
    }

    // check
    tb_assert_and_check_return_val(cfilter->read <= cfilter->size, -1);

    // read chunked data
    tb_size_t size = tb_min3(ie - ip, oe - op, cfilter->size - cfilter->read);
    if (size) 
    {
        // copy data
        tb_memcpy((tb_byte_t*)op, ip, size);
        ip += size;
        op += size;

        // update read
        cfilter->read += size;
    }

    // update stream
    tb_static_stream_goto(istream, (tb_byte_t*)ip);
    tb_static_stream_goto(ostream, (tb_byte_t*)op);

    // trace
    tb_trace_d("[%p]: read: %lu, size: %lu, beof: %u, ileft: %lu", cfilter, cfilter->read, cfilter->size, filter->beof, tb_static_stream_left(istream));

    // ok
    return (op - ob);
}