int main(int argc, const char * argv[]) { const char* path; if (argc == 2) { path = argv[1]; } else { printf("Bad ARGV!\nUse: get_title_high_level <path_to_html_file>\n"); exit(EXIT_FAILURE); } struct res_html res = load_html_file(path); // basic init myhtml_t* myhtml = myhtml_create(); myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0); // init tree myhtml_tree_t* tree = myhtml_tree_create(); myhtml_tree_init(tree, myhtml); // parse html myhtml_parse(tree, MyHTML_ENCODING_UTF_8, res.html, res.size); // parse html myhtml_collection_t *collection = myhtml_get_nodes_by_tag_id(tree, NULL, MyHTML_TAG_TITLE, NULL); if(collection && collection->list && collection->length) { myhtml_tree_node_t *text_node = myhtml_node_child(collection->list[0]); if(text_node) { const char* text = myhtml_node_text(text_node, NULL); if(text) printf("Title: %s\n", text); } } // release resources myhtml_collection_destroy(collection); myhtml_tree_destroy(tree); myhtml_destroy(myhtml); free(res.html); return 0; }
int main(int argc, const char * argv[]) { const char* path; if (argc == 2) { path = argv[1]; } else { printf("Bad ARGV!\nUse: get_title_low_level <path_to_html_file>\n"); exit(EXIT_FAILURE); } struct res_html res = load_html_file(path); // basic init myhtml_t* myhtml = myhtml_create(); myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0); // init tree myhtml_tree_t* tree = myhtml_tree_create(); myhtml_tree_init(tree, myhtml); // parse html myhtml_parse(tree, MyENCODING_UTF_8, res.html, res.size); // get title from index myhtml_collection_t *titles_list = myhtml_get_nodes_by_tag_id(tree, NULL, MyHTML_TAG_TITLE, NULL); if(titles_list && titles_list->length != 0 && titles_list->list[0]->child) { mycore_string_raw_t str = {0}; myhtml_serialization_node(titles_list->list[0]->child, &str); printf("%s\n", str.data); mycore_string_raw_destroy(&str, false); } // release resources myhtml_collection_destroy(titles_list); myhtml_tree_destroy(tree); myhtml_destroy(myhtml); free(res.html); return 0; }
int main(int argc, const char * argv[]) { const char* path; const char* attr_key; if (argc == 3) { attr_key = argv[1]; path = argv[2]; } else { printf("Bad ARGV!\nUse: nodes_by_attr_key_high_level <attribute key> <path to html file>\n"); exit(EXIT_FAILURE); } struct res_html res = load_html_file(path); // basic init myhtml_t* myhtml = myhtml_create(); myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0); // init tree myhtml_tree_t* tree = myhtml_tree_create(); myhtml_tree_init(tree, myhtml); // parse html myhtml_parse(tree, MyHTML_ENCODING_UTF_8, res.html, res.size); // get and print myhtml_collection_t *collection = myhtml_get_nodes_by_attribute_key(tree, NULL, NULL, attr_key, strlen(attr_key), NULL); for(size_t i = 0; i < collection->length; i++) myhtml_tree_print_node(tree, collection->list[i], stdout); printf("Total found: %zu\n", collection->length); myhtml_collection_destroy(collection); // release resources myhtml_tree_destroy(tree); myhtml_destroy(myhtml); return 0; }
int main(int argc, const char * argv[]) { const char* path; const char* attr_value; struct res_argv rargv; if(argc > 2) { path = argv[1]; attr_value = argv[2]; rargv = get_argv(3, argc, argv); } else { print_usage(); exit(EXIT_FAILURE); } struct res_html res = load_html_file(path); // basic init myhtml_t* myhtml = myhtml_create(); myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0); // init tree myhtml_tree_t* tree = myhtml_tree_create(); myhtml_tree_init(tree, myhtml); // parse html myhtml_parse(tree, MyHTML_ENCODING_UTF_8, res.html, res.size); // get and print myhtml_collection_t* collection = NULL; switch (rargv.search_type) { case 0: collection = myhtml_get_nodes_by_attribute_value(tree, NULL, NULL, rargv.is_insensitive, rargv.key, rargv.key_length, attr_value, strlen(attr_value), NULL); break; case 1: collection = myhtml_get_nodes_by_attribute_value_whitespace_separated(tree, NULL, NULL, rargv.is_insensitive, rargv.key, rargv.key_length, attr_value, strlen(attr_value), NULL); break; case 2: collection = myhtml_get_nodes_by_attribute_value_begin(tree, NULL, NULL, rargv.is_insensitive, rargv.key, rargv.key_length, attr_value, strlen(attr_value), NULL); break; case 3: collection = myhtml_get_nodes_by_attribute_value_end(tree, NULL, NULL, rargv.is_insensitive, rargv.key, rargv.key_length, attr_value, strlen(attr_value), NULL); break; case 4: collection = myhtml_get_nodes_by_attribute_value_contain(tree, NULL, NULL, rargv.is_insensitive, rargv.key, rargv.key_length, attr_value, strlen(attr_value), NULL); break; case 5: collection = myhtml_get_nodes_by_attribute_value_hyphen_separated(tree, NULL, NULL, rargv.is_insensitive, rargv.key, rargv.key_length, attr_value, strlen(attr_value), NULL); break; default: print_usage(); exit(EXIT_FAILURE); } if(collection) { for(size_t i = 0; i < collection->length; i++) myhtml_tree_print_node(tree, collection->list[i], stdout); printf("Total found: %zu\n", collection->length); } myhtml_collection_destroy(collection); // release resources myhtml_tree_destroy(tree); myhtml_destroy(myhtml); return 0; }