예제 #1
0
static void WalkChmTocOrIndex(EbookTocVisitor *visitor, HtmlElement *list, UINT cp, bool isIndex, int level=1)
{
    CrashIf(Tag_Ul != list->tag);

    // some broken ToCs wrap every <li> into its own <ul>
    for (; list && Tag_Ul == list->tag; list = list->next) {
        for (HtmlElement *el = list->down; el; el = el->next) {
            if (Tag_Li != el->tag)
                continue; // ignore unexpected elements

            bool valid;
            HtmlElement *elObj = el->GetChildByTag(Tag_Object);
            if (!elObj)
                valid = false;
            else if (isIndex)
                valid = VisitChmIndexItem(visitor, elObj, cp, level);
            else
                valid = VisitChmTocItem(visitor, elObj, cp, level);
            if (!valid)
                continue; // skip incomplete elements and all their children

            HtmlElement *nested = el->GetChildByTag(Tag_Ul);
            // some broken ToCs have the <ul> follow right *after* a <li>
            if (!nested && el->next && Tag_Ul == el->next->tag)
                nested = el->next;
            if (nested)
                WalkChmTocOrIndex(visitor, nested, cp, isIndex, level + 1);
        }
    }
}
예제 #2
0
bool ChmDoc::ParseTocOrIndex(EbookTocVisitor *visitor, const char *path, bool isIndex)
{
    if (!path)
        return false;
    // TODO: is path already UTF-8 encoded - or do we need str::conv::ToUtf8(ToStr(path)) ?
    ScopedMem<unsigned char> htmlData(GetData(path, NULL));
    const char *html = (char *)htmlData.Get();
    if (!html)
        return false;

    HtmlParser p;
    UINT cp = codepage;
    // detect UTF-8 content by BOM
    if (str::StartsWith(html, UTF8_BOM)) {
        html += 3;
        cp = CP_UTF8;
    }
    // enforce the default codepage, so that pre-encoded text and
    // entities are in the same codepage and VisitChmTocItem yields
    // consistent results
    HtmlElement *el = p.Parse(html, CP_CHM_DEFAULT);
    if (!el)
        return false;
    el = p.FindElementByName("body");
    // since <body> is optional, also continue without one
    el = p.FindElementByName("ul", el);
    if (!el)
        return WalkBrokenChmTocOrIndex(visitor, p, cp, isIndex);
    WalkChmTocOrIndex(visitor, el, cp, isIndex);
    return true;
}
예제 #3
0
static void WalkChmTocOrIndex(EbookTocVisitor *visitor, HtmlElement *list, UINT cp, bool isIndex, int level=1)
{
    CrashIf(!list->NameIs("ul"));

    // some broken ToCs wrap every <li> into its own <ul>
    for (; list && list->NameIs("ul"); list = list->next) {
        for (HtmlElement *el = list->down; el; el = el->next) {
            if (!el->NameIs("li"))
                continue; // ignore unexpected elements
            bool valid = (isIndex ? VisitChmIndexItem : VisitChmTocItem)(visitor, el, cp, level);
            if (!valid)
                continue; // skip incomplete elements and all their children

            HtmlElement *nested = el->GetChildByName("ul");
            // some broken ToCs have the <ul> follow right *after* a <li>
            if (!nested && el->next && el->next->NameIs("ul"))
                nested = el->next;
            if (nested)
                WalkChmTocOrIndex(visitor, nested, cp, isIndex, level + 1);
        }
    }
}