/* <BASE HREF="http://msdn.microsoft.com/workshop/author/dhtml/reference/"/> <a href=""> text </a> <img src="" > return:buffer:Url\nUrl\...\n */ int extract_interesting_tag_attr(char *html_content, int content_len, char *interesting_tag, char *interesting_tag_attr, buffer_t *interesting_tag_attr_buf_p, int count) { struct interesting_tag_attr maparg; struct hash_table * interesting_tag_ht, *interesting_attr_ht; if (count == 0) return -1; interesting_tag_ht = make_nocase_string_hash_table(1); interesting_attr_ht = make_nocase_string_hash_table(1); hash_table_put(interesting_tag_ht, interesting_tag, interesting_tag_attr); hash_table_put(interesting_attr_ht, interesting_tag_attr, interesting_tag); maparg.interesting_tag_attr = interesting_tag_attr; maparg.interesting_tag_attr_buf_p = interesting_tag_attr_buf_p; maparg.is_ok = 0; maparg.count = count; map_html_tags(html_content, content_len, extract_interesting_tag_attr_mapfun, &maparg, MHT_TRIM_VALUES, interesting_tag_ht, interesting_attr_ht); if (interesting_tag_ht) hash_table_destroy(interesting_tag_ht); if (interesting_attr_ht) hash_table_destroy(interesting_attr_ht); if (maparg.is_ok) return 0; else return -1; }
// tag_attr_list不能为空,为空表示所有属性解析过程中都会返回,可以杜撰一个不存在的属性。 void extract_text_init(spec_tag_t *spec_tag_list, int tag_list_size, struct hash_table ** spec_tag_ht_pp, tag_attr_t *tag_attr_list, int attr_list_size, struct hash_table ** tag_attr_ht_pp) { int i; struct hash_table * ht; tag_attr_t * tagattr; ht = make_nocase_string_hash_table(tag_list_size); for (i = 0; i < tag_list_size; i++) hash_table_put(ht, spec_tag_list[i].tag_name, spec_tag_list + i); *spec_tag_ht_pp = ht; ht = make_nocase_string_hash_table(attr_list_size); for (i = 0; i < attr_list_size; i++) { // 先检查属性是否存在... tagattr = hash_table_get(ht,tag_attr_list[i].tag_attr); if(!tagattr) { // 不存在,插入为首节点 tag_attr_list[i].tail = tag_attr_list + i; hash_table_put(ht, tag_attr_list[i].tag_attr, tag_attr_list + i); } else { // 存在,插入到链表的末尾 tagattr->tail->next = tag_attr_list + i; tagattr->tail = tag_attr_list + i; } } *tag_attr_ht_pp = ht; }
static void init_interesting (void) { /* Init the variables interesting_tags and interesting_attributes that are used by the HTML parser to know which tags and attributes we're interested in. We initialize this only once, for performance reasons. Here we also make sure that what we put in interesting_tags matches the user's preferences as specified through --ignore-tags and --follow-tags. */ int i; interesting_tags = make_nocase_string_hash_table (countof (known_tags)); /* First, add all the tags we know hot to handle, mapped to their respective entries in known_tags. */ for (i = 0; i < countof (known_tags); i++) hash_table_put (interesting_tags, known_tags[i].name, known_tags + i); /* Then remove the tags ignored through --ignore-tags. */ if (opt.ignore_tags) { char **ignored; for (ignored = opt.ignore_tags; *ignored; ignored++) hash_table_remove (interesting_tags, *ignored); } /* If --follow-tags is specified, use only those tags. */ if (opt.follow_tags) { /* Create a new table intersecting --follow-tags and known_tags, and use it as interesting_tags. */ struct hash_table *intersect = make_nocase_string_hash_table (0); char **followed; for (followed = opt.follow_tags; *followed; followed++) { struct known_tag *t = hash_table_get (interesting_tags, *followed); if (!t) continue; /* ignore unknown --follow-tags entries. */ hash_table_put (intersect, *followed, t); } hash_table_destroy (interesting_tags); interesting_tags = intersect; } /* Add the attributes we care about. */ interesting_attributes = make_nocase_string_hash_table (10); for (i = 0; i < countof (additional_attributes); i++) hash_table_put (interesting_attributes, additional_attributes[i], "1"); for (i = 0; i < countof (tag_url_attributes); i++) hash_table_put (interesting_attributes, tag_url_attributes[i].attr_name, "1"); }
static void cache_store (const char *host, struct address_list *al) { if (!host_name_addresses_map) host_name_addresses_map = make_nocase_string_hash_table (0); ++al->refcount; hash_table_put (host_name_addresses_map, xstrdup_lower (host), al); IF_DEBUG { int i; debug_logprintf ("Caching %s =>", host); for (i = 0; i < al->count; i++) debug_logprintf (" %s", print_address (al->addresses + i)); debug_logprintf ("\n"); } }
static void cache_host_lookup (const char *host, struct address_list *al) { if (!host_name_addresses_map) host_name_addresses_map = make_nocase_string_hash_table (0); ++al->refcount; hash_table_put (host_name_addresses_map, xstrdup_lower (host), al); #ifdef ENABLE_DEBUG if (opt.debug) { int i; debug_logprintf ("Caching %s =>", host); for (i = 0; i < al->count; i++) debug_logprintf (" %s", pretty_print_address (al->addresses + i)); debug_logprintf ("\n"); } #endif }
void res_register_specs (const char *host, int port, struct robot_specs *specs) { struct robot_specs *old; char *hp, *hp_old; SET_HOSTPORT (host, port, hp); if (!registered_specs) registered_specs = make_nocase_string_hash_table (0); if (hash_table_get_pair (registered_specs, hp, &hp_old, &old)) { if (old) free_specs (old); hash_table_put (registered_specs, hp_old, specs); } else { hash_table_put (registered_specs, xstrdup (hp), specs); } }
static void store_cookie (struct cookie *cookie) { struct cookie *chain_head; char *hostport; char *chain_key; if (!cookies_hash_table) /* If the hash table is not initialized, do so now, because we'll need to store things. */ cookies_hash_table = make_nocase_string_hash_table (0); /* Initialize hash table key. */ SET_HOSTPORT (cookie->domain, cookie->port, hostport); if (hash_table_get_pair (cookies_hash_table, hostport, &chain_key, &chain_head)) { /* There already exists a chain of cookies with this exact domain. We need to check for duplicates -- if an existing cookie exactly matches our domain, path and name, we replace it. */ struct cookie *prev; struct cookie *victim = find_matching_cookie (cookie, &prev); if (victim) { /* Remove VICTIM from the chain. COOKIE will be placed at the head. */ if (prev) { prev->next = victim->next; cookie->next = chain_head; } else { /* prev is NULL; apparently VICTIM was at the head of the chain. This place will be taken by COOKIE, so all we need to do is: */ cookie->next = victim->next; } delete_cookie (victim); DEBUGP (("Deleted old cookie (to be replaced.)\n")); } else cookie->next = chain_head; } else { /* We are now creating the chain. Allocate the string that will be used as a key. It is unsafe to use cookie->domain for that, because it might get deallocated by the above code at some point later. */ cookie->next = NULL; chain_key = xstrdup (hostport); } hash_table_put (cookies_hash_table, chain_key, cookie); DEBUGP (("\nStored cookie %s %d %s %s %d %s %s %s\n", cookie->domain, cookie->port, cookie->path, cookie->permanent ? "permanent" : "nonpermanent", cookie->secure, asctime (localtime ((time_t *)&cookie->expiry_time)), cookie->attr, cookie->value)); }