void CMyHtmlParser::callbackNodeInserted(myhtml_tree_t* /*tree*/, myhtml_tree_node_t* node) { HtmlTag tag; tag.type = myhtml_node_tag_id(node); const char* text = myhtml_node_text(node, nullptr); if (text) tag.text = QString::fromUtf8(text); for (myhtml_tree_attr_t *attr = myhtml_node_attribute_first(node); attr != nullptr; attr = myhtml_attribute_next(attr)) { HtmlTagAttribute attribute; const char *name = myhtml_attribute_key(attr, nullptr); if (name) attribute.name = QString::fromUtf8(name); const char *value = myhtml_attribute_value(attr, nullptr); if (value) attribute.value = QString::fromUtf8(value); assert(name || value); tag.attributes.push_back(attribute); } _tags.push_back(tag); }
int main(int argc, const char * argv[]) { const char* path; if (argc == 2) { path = argv[1]; } else { printf("Bad ARGV!\nUse: get_title_high_level <path_to_html_file>\n"); exit(EXIT_FAILURE); } struct res_html res = load_html_file(path); // basic init myhtml_t* myhtml = myhtml_create(); myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0); // init tree myhtml_tree_t* tree = myhtml_tree_create(); myhtml_tree_init(tree, myhtml); // parse html myhtml_parse(tree, MyHTML_ENCODING_UTF_8, res.html, res.size); // parse html myhtml_collection_t *collection = myhtml_get_nodes_by_tag_id(tree, NULL, MyHTML_TAG_TITLE, NULL); if(collection && collection->list && collection->length) { myhtml_tree_node_t *text_node = myhtml_node_child(collection->list[0]); if(text_node) { const char* text = myhtml_node_text(text_node, NULL); if(text) printf("Title: %s\n", text); } } // release resources myhtml_collection_destroy(collection); myhtml_tree_destroy(tree); myhtml_destroy(myhtml); free(res.html); return 0; }