/* Parse the object tag corresponding to a list item. * * At this step we look for all of the "param" child tags, using this information * to build up the information about the list item. When we reach the </object> * tag we know that we've finished parsing this list item. */ static IndexItem *parse_index_sitemap_object(HHInfo *info, stream_t *stream) { strbuf_t node, node_name; IndexItem *item; strbuf_init(&node); strbuf_init(&node_name); item = heap_alloc_zero(sizeof(IndexItem)); item->nItems = 0; item->items = heap_alloc_zero(0); item->itemFlags = 0x11; while(next_node(stream, &node)) { get_node_name(&node, &node_name); TRACE("%s\n", node.buf); if(!strcasecmp(node_name.buf, "param")) { parse_index_obj_node_param(item, node.buf, info->pCHMInfo->codePage); }else if(!strcasecmp(node_name.buf, "/object")) { break; }else { WARN("Unhandled tag! %s\n", node_name.buf); } strbuf_zero(&node); } strbuf_free(&node); strbuf_free(&node_name); return item; }
static ContentItem *parse_hhc(HHInfo *info, IStream *str, ContentItem *hhc_root, insert_type_t *insert_type) { stream_t stream; strbuf_t node, node_name; ContentItem *ret = NULL, *prev = NULL; *insert_type = INSERT_NEXT; strbuf_init(&node); strbuf_init(&node_name); stream_init(&stream, str); while(next_node(&stream, &node)) { get_node_name(&node, &node_name); TRACE("%s\n", node.buf); if(!strcasecmp(node_name.buf, "ul")) { ContentItem *item = parse_ul(info, &stream, hhc_root); prev = insert_item(prev, item, INSERT_CHILD); if(!ret) ret = prev; *insert_type = INSERT_CHILD; } strbuf_zero(&node); } strbuf_free(&node); strbuf_free(&node_name); return ret; }
void get_node_name(strbuf_t *node, strbuf_t *name) { const char *ptr = node->buf+1; strbuf_zero(name); while(*ptr != '>' && !isspace(*ptr)) ptr++; strbuf_append(name, node->buf+1, ptr-node->buf-1); strbuf_append(name, "", 1); }
/*--------------------------------------------------------------------*/ void strbuf_free (strbuf_t *buf) /* Free the given string buffer <buf>. * TODO: Not necessary once all strings are counted and with length? */ { if (buf->buf) xfree(buf->buf); strbuf_zero(buf); }
static ContentItem *parse_sitemap_object(HHInfo *info, stream_t *stream, ContentItem *hhc_root, insert_type_t *insert_type) { strbuf_t node, node_name; ContentItem *item; *insert_type = INSERT_NEXT; strbuf_init(&node); strbuf_init(&node_name); item = heap_alloc_zero(sizeof(ContentItem)); while(next_node(stream, &node)) { get_node_name(&node, &node_name); TRACE("%s\n", node.buf); if(!strcasecmp(node_name.buf, "/object")) break; if(!strcasecmp(node_name.buf, "param")) parse_obj_node_param(item, hhc_root, node.buf, info->pCHMInfo->codePage); strbuf_zero(&node); } strbuf_free(&node); strbuf_free(&node_name); if(item->merge.chm_index) { IStream *merge_stream; merge_stream = GetChmStream(info->pCHMInfo, item->merge.chm_file, &item->merge); if(merge_stream) { item->child = parse_hhc(info, merge_stream, hhc_root, insert_type); IStream_Release(merge_stream); }else { WARN("Could not get %s::%s stream\n", debugstr_w(item->merge.chm_file), debugstr_w(item->merge.chm_file)); if(!item->name) { free_content_item(item); item = NULL; } } } return item; }
/* Parse the HTML Help page corresponding to all of the Index items. * * At this high-level stage we locate out each HTML list item tag. * Since there is no end-tag for the <LI> item, we must hope that * the <LI> entry is parsed correctly or tags might get lost. * * Within each entry it is also possible to encounter an additional * <UL> tag. When this occurs the tag indicates that the topics * contained within it are related to the parent <LI> topic and * should be inset by an indent. */ static void parse_hhindex(HHInfo *info, IStream *str, IndexItem *item) { stream_t stream; strbuf_t node, node_name; int indent_level = -1; strbuf_init(&node); strbuf_init(&node_name); stream_init(&stream, str); while(next_node(&stream, &node)) { get_node_name(&node, &node_name); TRACE("%s\n", node.buf); if(!strcasecmp(node_name.buf, "li")) { IndexItem *new_item; new_item = parse_li(info, &stream); if(new_item && item->keyword && strcmpW(new_item->keyword, item->keyword) == 0) { int num_items = item->nItems; item_realloc(item, num_items+1); memcpy(&item->items[num_items], &new_item->items[0], sizeof(IndexSubItem)); heap_free(new_item->keyword); heap_free(new_item->items); heap_free(new_item); } else if(new_item) { item->next = new_item; item->next->merge = item->merge; item = item->next; item->indentLevel = indent_level; } }else if(!strcasecmp(node_name.buf, "ul")) { indent_level++; }else if(!strcasecmp(node_name.buf, "/ul")) { indent_level--; }else { WARN("Unhandled tag! %s\n", node_name.buf); } strbuf_zero(&node); } strbuf_free(&node); strbuf_free(&node_name); }
static ContentItem *parse_ul(HHInfo *info, stream_t *stream, ContentItem *hhc_root) { strbuf_t node, node_name; ContentItem *ret = NULL, *prev = NULL, *new_item = NULL; insert_type_t it; strbuf_init(&node); strbuf_init(&node_name); while(next_node(stream, &node)) { get_node_name(&node, &node_name); TRACE("%s\n", node.buf); if(!strcasecmp(node_name.buf, "object")) { const char *ptr; int len; static const char sz_text_sitemap[] = "text/sitemap"; ptr = get_attr(node.buf, "type", &len); if(ptr && len == sizeof(sz_text_sitemap)-1 && !memcmp(ptr, sz_text_sitemap, len)) { new_item = parse_sitemap_object(info, stream, hhc_root, &it); prev = insert_item(prev, new_item, it); if(!ret) ret = prev; } }else if(!strcasecmp(node_name.buf, "ul")) { new_item = parse_ul(info, stream, hhc_root); insert_item(prev, new_item, INSERT_CHILD); }else if(!strcasecmp(node_name.buf, "/ul")) { break; } strbuf_zero(&node); } strbuf_free(&node); strbuf_free(&node_name); return ret; }
/* Parse the HTML list item node corresponding to a specific help entry. * * At this stage we look for the only child tag we expect to find under * the list item: the <OBJECT> tag. We also only expect to find object * tags with the "type" attribute set to "text/sitemap". */ static IndexItem *parse_li(HHInfo *info, stream_t *stream) { strbuf_t node, node_name; IndexItem *ret = NULL; strbuf_init(&node); strbuf_init(&node_name); while(next_node(stream, &node)) { get_node_name(&node, &node_name); TRACE("%s\n", node.buf); if(!strcasecmp(node_name.buf, "object")) { const char *ptr; int len; static const char sz_text_sitemap[] = "text/sitemap"; ptr = get_attr(node.buf, "type", &len); if(ptr && len == sizeof(sz_text_sitemap)-1 && !memcmp(ptr, sz_text_sitemap, len)) { ret = parse_index_sitemap_object(info, stream); break; } }else { WARN("Unhandled tag! %s\n", node_name.buf); } strbuf_zero(&node); } if(!ret) FIXME("Failed to parse <li> tag!\n"); strbuf_free(&node); strbuf_free(&node_name); return ret; }
/* Search the CHM storage stream (an HTML file) for the requested text. * * Before searching the HTML file all HTML tags are removed so that only * the content of the document is scanned. If the search string is found * then the title of the document is returned. */ static WCHAR *SearchCHM_File(IStorage *pStorage, const WCHAR *file, const char *needle) { char *buffer = heap_alloc(BLOCK_SIZE); strbuf_t content, node, node_name; IStream *temp_stream = NULL; DWORD i, buffer_size = 0; WCHAR *title = NULL; BOOL found = FALSE; stream_t stream; HRESULT hres; hres = IStorage_OpenStream(pStorage, file, NULL, STGM_READ, 0, &temp_stream); if(FAILED(hres)) { FIXME("Could not open '%s' stream: %08x\n", debugstr_w(file), hres); goto cleanup; } strbuf_init(&node); strbuf_init(&content); strbuf_init(&node_name); stream_init(&stream, temp_stream); /* Remove all HTML formatting and record the title */ while(next_node(&stream, &node)) { get_node_name(&node, &node_name); if(next_content(&stream, &content) && content.len > 1) { char *text = &content.buf[1]; int textlen = content.len-1; if(!strcasecmp(node_name.buf, "title")) { int wlen = MultiByteToWideChar(CP_ACP, 0, text, textlen, NULL, 0); title = heap_alloc((wlen+1)*sizeof(WCHAR)); MultiByteToWideChar(CP_ACP, 0, text, textlen, title, wlen); title[wlen] = 0; } buffer = heap_realloc(buffer, buffer_size + textlen + 1); memcpy(&buffer[buffer_size], text, textlen); buffer[buffer_size + textlen] = '\0'; buffer_size += textlen; } strbuf_zero(&node); strbuf_zero(&content); } /* Convert the buffer to lower case for comparison against the * requested text (already in lower case). */ for(i=0;i<buffer_size;i++) buffer[i] = tolower(buffer[i]); /* Search the decoded buffer for the requested text */ if(strstr(buffer, needle)) found = TRUE; strbuf_free(&node); strbuf_free(&content); strbuf_free(&node_name); cleanup: heap_free(buffer); if(temp_stream) IStream_Release(temp_stream); if(!found) { heap_free(title); return NULL; } return title; }