tb_size_t tb_xml_reader_next(tb_xml_reader_ref_t reader) { // check tb_xml_reader_impl_t* impl = (tb_xml_reader_impl_t*)reader; tb_assert_and_check_return_val(impl && impl->rstream, TB_XML_READER_EVENT_NONE); // reset event impl->event = TB_XML_READER_EVENT_NONE; // next while (!impl->event) { // peek character tb_char_t* pc = tb_null; if (!tb_stream_need(impl->rstream, (tb_byte_t**)&pc, 1) || !pc) break; // is element? if (*pc == '<') { // parse element: <...> tb_char_t const* element = tb_xml_reader_element_parse(impl); tb_assert_and_check_break(element); // is document begin: <?xml version="..." charset=".." ?> tb_size_t size = tb_string_size(&impl->element); if (size > 4 && !tb_strnicmp(element, "?xml", 4)) { // update event impl->event = TB_XML_READER_EVENT_DOCUMENT; // update version & charset tb_xml_node_ref_t attr = (tb_xml_node_ref_t)tb_xml_reader_attributes(reader); for (; attr; attr = attr->next) { if (!tb_string_cstricmp(&attr->name, "version")) tb_string_strcpy(&impl->version, &attr->data); if (!tb_string_cstricmp(&attr->name, "encoding")) tb_string_strcpy(&impl->charset, &attr->data); } // transform stream => utf-8 if (tb_string_cstricmp(&impl->charset, "utf-8") && tb_string_cstricmp(&impl->charset, "utf8")) { // charset tb_size_t charset = TB_CHARSET_TYPE_UTF8; if (!tb_string_cstricmp(&impl->charset, "gb2312") || !tb_string_cstricmp(&impl->charset, "gbk")) charset = TB_CHARSET_TYPE_GB2312; else tb_trace_e("the charset: %s is not supported", tb_string_cstr(&impl->charset)); // init transform stream if (charset != TB_CHARSET_TYPE_UTF8) { #ifdef TB_CONFIG_MODULE_HAVE_CHARSET // init the filter stream if (!impl->fstream) impl->fstream = tb_stream_init_filter_from_charset(impl->istream, charset, TB_CHARSET_TYPE_UTF8); else { // ctrl stream if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_SET_STREAM, impl->istream)) break; // the filter tb_stream_filter_ref_t filter = tb_null; if (!tb_stream_ctrl(impl->fstream, TB_STREAM_CTRL_FLTR_GET_FILTER, &filter)) break; tb_assert_and_check_break(filter); // ctrl filter if (!tb_stream_filter_ctrl(filter, TB_STREAM_FILTER_CTRL_CHARSET_SET_FTYPE, charset)) break; } // open the filter stream if (impl->fstream && tb_stream_open(impl->fstream)) impl->rstream = impl->fstream; tb_string_cstrcpy(&impl->charset, "utf-8"); #else // trace tb_trace_e("unicode type is not supported, please enable charset module config if you want to use it!"); #endif } } } // is document type: <!DOCTYPE ... > else if (size > 8 && !tb_strnicmp(element, "!DOCTYPE", 8)) { // update event impl->event = TB_XML_READER_EVENT_DOCUMENT_TYPE; } // is element end: </name> else if (size > 1 && element[0] == '/') { // check tb_check_break(impl->level); // update event impl->event = TB_XML_READER_EVENT_ELEMENT_END; // leave impl->level--; } // is comment: <!-- text --> else if (size >= 3 && !tb_strncmp(element, "!--", 3)) { // no comment end? if (element[size - 2] != '-' || element[size - 1] != '-') { // patch '>' tb_string_chrcat(&impl->element, '>'); // seek to comment end tb_char_t ch = '\0'; tb_int_t n = 0; while ((ch = tb_stream_bread_s8(impl->rstream))) { // --> if (n == 2 && ch == '>') break; else { // append it tb_string_chrcat(&impl->element, ch); if (ch == '-') n++; else n = 0; } } // update event if (ch != '\0') impl->event = TB_XML_READER_EVENT_COMMENT; } else impl->event = TB_XML_READER_EVENT_COMMENT; } // is cdata: <![CDATA[ text ]]> else if (size >= 8 && !tb_strnicmp(element, "![CDATA[", 8)) { if (element[size - 2] != ']' || element[size - 1] != ']') { // patch '>' tb_string_chrcat(&impl->element, '>'); // seek to cdata end tb_char_t ch = '\0'; tb_int_t n = 0; while ((ch = tb_stream_bread_s8(impl->rstream))) { // ]]> if (n == 2 && ch == '>') break; else { // append it tb_string_chrcat(&impl->element, ch); if (ch == ']') n++; else n = 0; } } // update event if (ch != '\0') impl->event = TB_XML_READER_EVENT_CDATA; } else impl->event = TB_XML_READER_EVENT_CDATA; } // is empty element: <name/> else if (size > 1 && element[size - 1] == '/') { // update event impl->event = TB_XML_READER_EVENT_ELEMENT_EMPTY; } // is element begin: <name> else { // update event impl->event = TB_XML_READER_EVENT_ELEMENT_BEG; // enter impl->level++; } // trace // tb_trace_d("<%s>", element); } // is text: <> text </> else if (*pc) { // parse text: <> ... <> tb_char_t const* text = tb_xml_reader_text_parse(impl); if (text && tb_string_cstrcmp(&impl->text, "\r\n") && tb_string_cstrcmp(&impl->text, "\n")) impl->event = TB_XML_READER_EVENT_TEXT; // trace // tb_trace_d("%s", text); } else { // skip the invalid character if (!tb_stream_skip(impl->rstream, 1)) break; } } // ok? return impl->event; }
/* ////////////////////////////////////////////////////////////////////////////////////// * main */ tb_int_t tb_demo_stream_main(tb_int_t argc, tb_char_t** argv) { // done tb_option_ref_t option = tb_null; tb_stream_ref_t istream = tb_null; tb_stream_ref_t ostream = tb_null; tb_stream_ref_t pstream = tb_null; do { // init option option = tb_option_init("stream", "the stream demo", g_options); tb_assert_and_check_break(option); // done option if (tb_option_done(option, argc - 1, &argv[1])) { // debug & verbose tb_bool_t debug = tb_option_find(option, "debug"); tb_bool_t verbose = tb_option_find(option, "no-verbose")? tb_false : tb_true; // done url if (tb_option_find(option, "url")) { // init istream istream = tb_stream_init_from_url(tb_option_item_cstr(option, "url")); tb_assert_and_check_break(istream); // ctrl http if (tb_stream_type(istream) == TB_STREAM_TYPE_HTTP) { // enable gzip? if (tb_option_find(option, "gzip")) { // auto unzip if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_AUTO_UNZIP, 1)) break; // need gzip if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Accept-Encoding", "gzip,deflate")) break; } // enable debug? if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD_FUNC, debug? tb_demo_stream_head_func : tb_null)) break; // custem header? if (tb_option_find(option, "header")) { // init tb_string_t key; tb_string_t val; tb_string_init(&key); tb_string_init(&val); // done tb_bool_t k = tb_true; tb_char_t const* p = tb_option_item_cstr(option, "header"); while (*p) { // is key? if (k) { if (*p != ':' && !tb_isspace(*p)) tb_string_chrcat(&key, *p++); else if (*p == ':') { // skip ':' p++; // skip space while (*p && tb_isspace(*p)) p++; // is val now k = tb_false; } else p++; } // is val? else { if (*p != ';') tb_string_chrcat(&val, *p++); else { // skip ';' p++; // skip space while (*p && tb_isspace(*p)) p++; // set header if (tb_string_size(&key) && tb_string_size(&val)) { if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val)); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break; } // is key now k = tb_true; // clear key & val tb_string_clear(&key); tb_string_clear(&val); } } } // set header if (tb_string_size(&key) && tb_string_size(&val)) { if (debug) tb_printf("header: %s: %s\n", tb_string_cstr(&key), tb_string_cstr(&val)); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, tb_string_cstr(&key), tb_string_cstr(&val))) break; } // exit tb_string_exit(&key); tb_string_exit(&val); } // keep alive? if (tb_option_find(option, "keep-alive")) { if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_HEAD, "Connection", "keep-alive")) break; } // post-data? if (tb_option_find(option, "post-data")) { tb_char_t const* post_data = tb_option_item_cstr(option, "post-data"); tb_hize_t post_size = tb_strlen(post_data); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_DATA, post_data, post_size)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break; if (debug) tb_printf("post: %llu\n", post_size); } // post-file? else if (tb_option_find(option, "post-file")) { tb_char_t const* url = tb_option_item_cstr(option, "post-file"); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_METHOD, TB_HTTP_METHOD_POST)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_URL, url)) break; if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_POST_FUNC, tb_demo_http_post_func)) break; if (debug) tb_printf("post: %s\n", url); } } // set range if (tb_option_find(option, "range")) { tb_char_t const* p = tb_option_item_cstr(option, "range"); if (p) { // the bof tb_hize_t eof = 0; tb_hize_t bof = tb_atoll(p); while (*p && tb_isdigit(*p)) p++; if (*p == '-') { p++; eof = tb_atoll(p); } if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_HTTP_SET_RANGE, bof, eof)) break; } } // set timeout if (tb_option_find(option, "timeout")) { tb_size_t timeout = tb_option_item_uint32(option, "timeout"); if (!tb_stream_ctrl(istream, TB_STREAM_CTRL_SET_TIMEOUT, timeout)) break; } // print verbose info if (verbose) tb_printf("open: %s: ..\n", tb_option_item_cstr(option, "url")); // open istream if (!tb_stream_open(istream)) { // print verbose info if (verbose) tb_printf("open: %s\n", tb_state_cstr(tb_stream_state(istream))); break; } // print verbose info if (verbose) tb_printf("open: ok\n"); // init ostream if (tb_option_find(option, "more0")) { // the path tb_char_t const* path = tb_option_item_cstr(option, "more0"); // init ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC); // print verbose info if (verbose) tb_printf("save: %s\n", path); } else { // the name tb_char_t const* name = tb_strrchr(tb_option_item_cstr(option, "url"), '/'); if (!name) name = tb_strrchr(tb_option_item_cstr(option, "url"), '\\'); if (!name) name = "/stream.file"; // the path tb_char_t path[TB_PATH_MAXN] = {0}; if (tb_directory_current(path, TB_PATH_MAXN)) tb_strcat(path, name); else break; // init file ostream = tb_stream_init_from_file(path, TB_FILE_MODE_RW | TB_FILE_MODE_CREAT | TB_FILE_MODE_BINARY | TB_FILE_MODE_TRUNC); // print verbose info if (verbose) tb_printf("save: %s\n", path); } tb_assert_and_check_break(ostream); // the limit rate tb_size_t limitrate = 0; if (tb_option_find(option, "limitrate")) limitrate = tb_option_item_uint32(option, "limitrate"); // save it tb_hong_t save = 0; tb_demo_context_t context = {0}; context.verbose = verbose; if ((save = tb_transfer_done(istream, ostream, limitrate, tb_demo_stream_save_func, &context)) < 0) break; } else tb_option_help(option); } else tb_option_help(option); } while (0); // exit pstream if (pstream) tb_stream_exit(pstream); pstream = tb_null; // exit istream if (istream) tb_stream_exit(istream); istream = tb_null; // exit ostream if (ostream) tb_stream_exit(ostream); ostream = tb_null; // exit option if (option) tb_option_exit(option); option = tb_null; return 0; }
/* chunked_data * * head data tail * ea5\r\n ..........\r\n e65\r\n..............\r\n 0\r\n\r\n * ---------------------- ------------------------- --------- * chunk0 chunk1 end */ static tb_long_t tb_filter_chunked_spak(tb_filter_t* filter, tb_static_stream_ref_t istream, tb_static_stream_ref_t ostream, tb_long_t sync) { // check tb_filter_chunked_t* cfilter = tb_filter_chunked_cast(filter); tb_assert_and_check_return_val(cfilter && istream && ostream, -1); tb_assert_and_check_return_val(tb_static_stream_valid(istream) && tb_static_stream_valid(ostream), -1); // the idata tb_byte_t const* ip = tb_static_stream_pos(istream); tb_byte_t const* ie = tb_static_stream_end(istream); // trace tb_trace_d("[%p]: isize: %lu, beof: %d", cfilter, tb_static_stream_size(istream), filter->beof); // find the eof: '\r\n 0\r\n\r\n' if ( !filter->beof && ip + 6 < ie && ie[-7] == '\r' && ie[-6] == '\n' && ie[-5] == '0' && ie[-4] == '\r' && ie[-3] == '\n' && ie[-2] == '\r' && ie[-1] == '\n') { // is eof filter->beof = tb_true; } // the odata tb_byte_t* op = (tb_byte_t*)tb_static_stream_pos(ostream); tb_byte_t* oe = (tb_byte_t*)tb_static_stream_end(ostream); tb_byte_t* ob = op; // parse chunked head and chunked tail if (!cfilter->size || cfilter->read >= cfilter->size) { // walk while (ip < ie) { // the charactor tb_char_t ch = *ip++; // trace tb_trace_d("[%p]: character: %x", cfilter, ch); // check tb_assert_and_check_return_val(ch, -1); // append char to line if (ch != '\n') tb_string_chrcat(&cfilter->line, ch); // is line end? else { // check tb_char_t const* pb = tb_string_cstr(&cfilter->line); tb_size_t pn = tb_string_size(&cfilter->line); tb_assert_and_check_return_val(pb, -1); // trace tb_trace_d("[%p]: line: %s", cfilter, tb_string_cstr(&cfilter->line)); // strip '\r' if exists if (pb[pn - 1] == '\r') tb_string_strip(&cfilter->line, pn - 1); // is chunked tail? only "\r\n" if (!tb_string_size(&cfilter->line)) { // reset size cfilter->read = 0; cfilter->size = 0; // trace tb_trace_d("[%p]: tail", cfilter); // continue continue ; } // is chunked head? parse size else { // parse size cfilter->size = tb_s16tou32(pb); // trace tb_trace_d("[%p]: size: %lu", cfilter, cfilter->size); // clear data tb_string_clear(&cfilter->line); // is eof? "0\r\n\r\n" if (!cfilter->size) { // trace tb_trace_d("[%p]: eof", cfilter); // is eof filter->beof = tb_true; // continue to spak the end data continue ; } // ok break; } } } } // check tb_assert_and_check_return_val(cfilter->read <= cfilter->size, -1); // read chunked data tb_size_t size = tb_min3(ie - ip, oe - op, cfilter->size - cfilter->read); if (size) { // copy data tb_memcpy((tb_byte_t*)op, ip, size); ip += size; op += size; // update read cfilter->read += size; } // update stream tb_static_stream_goto(istream, (tb_byte_t*)ip); tb_static_stream_goto(ostream, (tb_byte_t*)op); // trace tb_trace_d("[%p]: read: %lu, size: %lu, beof: %u, ileft: %lu", cfilter, cfilter->read, cfilter->size, filter->beof, tb_static_stream_left(istream)); // ok return (op - ob); }