Exemple #1
0
/* Parse the object tag corresponding to a list item.
 *
 * At this step we look for all of the "param" child tags, using this information
 * to build up the information about the list item.  When we reach the </object>
 * tag we know that we've finished parsing this list item.
 */
static IndexItem *parse_index_sitemap_object(HHInfo *info, stream_t *stream)
{
    strbuf_t node, node_name;
    IndexItem *item;

    strbuf_init(&node);
    strbuf_init(&node_name);

    item = heap_alloc_zero(sizeof(IndexItem));
    item->nItems = 0;
    item->items = heap_alloc_zero(0);
    item->itemFlags = 0x11;

    while(next_node(stream, &node)) {
        get_node_name(&node, &node_name);

        TRACE("%s\n", node.buf);

        if(!strcasecmp(node_name.buf, "param")) {
            parse_index_obj_node_param(item, node.buf, info->pCHMInfo->codePage);
        }else if(!strcasecmp(node_name.buf, "/object")) {
            break;
        }else {
            WARN("Unhandled tag! %s\n", node_name.buf);
        }

        strbuf_zero(&node);
    }

    strbuf_free(&node);
    strbuf_free(&node_name);

    return item;
}
Exemple #2
0
static ContentItem *parse_hhc(HHInfo *info, IStream *str, ContentItem *hhc_root,
        insert_type_t *insert_type)
{
    stream_t stream;
    strbuf_t node, node_name;
    ContentItem *ret = NULL, *prev = NULL;

    *insert_type = INSERT_NEXT;

    strbuf_init(&node);
    strbuf_init(&node_name);

    stream_init(&stream, str);

    while(next_node(&stream, &node)) {
        get_node_name(&node, &node_name);

        TRACE("%s\n", node.buf);

        if(!strcasecmp(node_name.buf, "ul")) {
            ContentItem *item = parse_ul(info, &stream, hhc_root);
            prev = insert_item(prev, item, INSERT_CHILD);
            if(!ret)
                ret = prev;
            *insert_type = INSERT_CHILD;
        }

        strbuf_zero(&node);
    }

    strbuf_free(&node);
    strbuf_free(&node_name);

    return ret;
}
Exemple #3
0
void get_node_name(strbuf_t *node, strbuf_t *name)
{
    const char *ptr = node->buf+1;

    strbuf_zero(name);

    while(*ptr != '>' && !isspace(*ptr))
        ptr++;

    strbuf_append(name, node->buf+1, ptr-node->buf-1);
    strbuf_append(name, "", 1);
}
Exemple #4
0
/*--------------------------------------------------------------------*/
void
strbuf_free (strbuf_t *buf)

/* Free the given string buffer <buf>.
 * TODO: Not necessary once all strings are counted and with length?
 */

{
    if (buf->buf)
        xfree(buf->buf);
    strbuf_zero(buf);
}
Exemple #5
0
static ContentItem *parse_sitemap_object(HHInfo *info, stream_t *stream, ContentItem *hhc_root,
        insert_type_t *insert_type)
{
    strbuf_t node, node_name;
    ContentItem *item;

    *insert_type = INSERT_NEXT;

    strbuf_init(&node);
    strbuf_init(&node_name);

    item = heap_alloc_zero(sizeof(ContentItem));

    while(next_node(stream, &node)) {
        get_node_name(&node, &node_name);

        TRACE("%s\n", node.buf);

        if(!strcasecmp(node_name.buf, "/object"))
            break;
        if(!strcasecmp(node_name.buf, "param"))
            parse_obj_node_param(item, hhc_root, node.buf, info->pCHMInfo->codePage);

        strbuf_zero(&node);
    }

    strbuf_free(&node);
    strbuf_free(&node_name);

    if(item->merge.chm_index) {
        IStream *merge_stream;

        merge_stream = GetChmStream(info->pCHMInfo, item->merge.chm_file, &item->merge);
        if(merge_stream) {
            item->child = parse_hhc(info, merge_stream, hhc_root, insert_type);
            IStream_Release(merge_stream);
        }else {
            WARN("Could not get %s::%s stream\n", debugstr_w(item->merge.chm_file),
                 debugstr_w(item->merge.chm_file));

            if(!item->name) {
                free_content_item(item);
                item = NULL;
            }
        }

    }

    return item;
}
Exemple #6
0
/* Parse the HTML Help page corresponding to all of the Index items.
 *
 * At this high-level stage we locate out each HTML list item tag.
 * Since there is no end-tag for the <LI> item, we must hope that
 * the <LI> entry is parsed correctly or tags might get lost.
 *
 * Within each entry it is also possible to encounter an additional
 * <UL> tag.  When this occurs the tag indicates that the topics
 * contained within it are related to the parent <LI> topic and
 * should be inset by an indent.
 */
static void parse_hhindex(HHInfo *info, IStream *str, IndexItem *item)
{
    stream_t stream;
    strbuf_t node, node_name;
    int indent_level = -1;

    strbuf_init(&node);
    strbuf_init(&node_name);

    stream_init(&stream, str);

    while(next_node(&stream, &node)) {
        get_node_name(&node, &node_name);

        TRACE("%s\n", node.buf);

        if(!strcasecmp(node_name.buf, "li")) {
            IndexItem *new_item;

            new_item = parse_li(info, &stream);
            if(new_item && item->keyword && strcmpW(new_item->keyword, item->keyword) == 0) {
                int num_items = item->nItems;

                item_realloc(item, num_items+1);
                memcpy(&item->items[num_items], &new_item->items[0], sizeof(IndexSubItem));
                heap_free(new_item->keyword);
                heap_free(new_item->items);
                heap_free(new_item);
            } else if(new_item) {
                item->next = new_item;
                item->next->merge = item->merge;
                item = item->next;
                item->indentLevel = indent_level;
            }
        }else if(!strcasecmp(node_name.buf, "ul")) {
            indent_level++;
        }else if(!strcasecmp(node_name.buf, "/ul")) {
            indent_level--;
        }else {
            WARN("Unhandled tag! %s\n", node_name.buf);
        }

        strbuf_zero(&node);
    }

    strbuf_free(&node);
    strbuf_free(&node_name);
}
Exemple #7
0
static ContentItem *parse_ul(HHInfo *info, stream_t *stream, ContentItem *hhc_root)
{
    strbuf_t node, node_name;
    ContentItem *ret = NULL, *prev = NULL, *new_item = NULL;
    insert_type_t it;

    strbuf_init(&node);
    strbuf_init(&node_name);

    while(next_node(stream, &node)) {
        get_node_name(&node, &node_name);

        TRACE("%s\n", node.buf);

        if(!strcasecmp(node_name.buf, "object")) {
            const char *ptr;
            int len;

            static const char sz_text_sitemap[] = "text/sitemap";

            ptr = get_attr(node.buf, "type", &len);

            if(ptr && len == sizeof(sz_text_sitemap)-1
               && !memcmp(ptr, sz_text_sitemap, len)) {
                new_item = parse_sitemap_object(info, stream, hhc_root, &it);
                prev = insert_item(prev, new_item, it);
                if(!ret)
                    ret = prev;
            }
        }else if(!strcasecmp(node_name.buf, "ul")) {
            new_item = parse_ul(info, stream, hhc_root);
            insert_item(prev, new_item, INSERT_CHILD);
        }else if(!strcasecmp(node_name.buf, "/ul")) {
            break;
        }

        strbuf_zero(&node);
    }

    strbuf_free(&node);
    strbuf_free(&node_name);

    return ret;
}
Exemple #8
0
/* Parse the HTML list item node corresponding to a specific help entry.
 *
 * At this stage we look for the only child tag we expect to find under
 * the list item: the <OBJECT> tag.  We also only expect to find object
 * tags with the "type" attribute set to "text/sitemap".
 */
static IndexItem *parse_li(HHInfo *info, stream_t *stream)
{
    strbuf_t node, node_name;
    IndexItem *ret = NULL;

    strbuf_init(&node);
    strbuf_init(&node_name);

    while(next_node(stream, &node)) {
        get_node_name(&node, &node_name);

        TRACE("%s\n", node.buf);

        if(!strcasecmp(node_name.buf, "object")) {
            const char *ptr;
            int len;

            static const char sz_text_sitemap[] = "text/sitemap";

            ptr = get_attr(node.buf, "type", &len);

            if(ptr && len == sizeof(sz_text_sitemap)-1
               && !memcmp(ptr, sz_text_sitemap, len)) {
                ret = parse_index_sitemap_object(info, stream);
                break;
            }
        }else {
            WARN("Unhandled tag! %s\n", node_name.buf);
        }

        strbuf_zero(&node);
    }
    if(!ret)
        FIXME("Failed to parse <li> tag!\n");

    strbuf_free(&node);
    strbuf_free(&node_name);

    return ret;
}
Exemple #9
0
/* Search the CHM storage stream (an HTML file) for the requested text.
 *
 * Before searching the HTML file all HTML tags are removed so that only
 * the content of the document is scanned.  If the search string is found
 * then the title of the document is returned.
 */
static WCHAR *SearchCHM_File(IStorage *pStorage, const WCHAR *file, const char *needle)
{
    char *buffer = heap_alloc(BLOCK_SIZE);
    strbuf_t content, node, node_name;
    IStream *temp_stream = NULL;
    DWORD i, buffer_size = 0;
    WCHAR *title = NULL;
    BOOL found = FALSE;
    stream_t stream;
    HRESULT hres;

    hres = IStorage_OpenStream(pStorage, file, NULL, STGM_READ, 0, &temp_stream);
    if(FAILED(hres)) {
        FIXME("Could not open '%s' stream: %08x\n", debugstr_w(file), hres);
        goto cleanup;
    }

    strbuf_init(&node);
    strbuf_init(&content);
    strbuf_init(&node_name);

    stream_init(&stream, temp_stream);

    /* Remove all HTML formatting and record the title */
    while(next_node(&stream, &node)) {
        get_node_name(&node, &node_name);

        if(next_content(&stream, &content) && content.len > 1)
        {
            char *text = &content.buf[1];
            int textlen = content.len-1;

            if(!strcasecmp(node_name.buf, "title"))
            {
                int wlen = MultiByteToWideChar(CP_ACP, 0, text, textlen, NULL, 0);
                title = heap_alloc((wlen+1)*sizeof(WCHAR));
                MultiByteToWideChar(CP_ACP, 0, text, textlen, title, wlen);
                title[wlen] = 0;
            }

            buffer = heap_realloc(buffer, buffer_size + textlen + 1);
            memcpy(&buffer[buffer_size], text, textlen);
            buffer[buffer_size + textlen] = '\0';
            buffer_size += textlen;
        }

        strbuf_zero(&node);
        strbuf_zero(&content);
    }

    /* Convert the buffer to lower case for comparison against the
     * requested text (already in lower case).
     */
    for(i=0;i<buffer_size;i++)
        buffer[i] = tolower(buffer[i]);

    /* Search the decoded buffer for the requested text */
    if(strstr(buffer, needle))
        found = TRUE;

    strbuf_free(&node);
    strbuf_free(&content);
    strbuf_free(&node_name);

cleanup:
    heap_free(buffer);
    if(temp_stream)
        IStream_Release(temp_stream);
    if(!found)
    {
        heap_free(title);
        return NULL;
    }
    return title;
}