static void WalkChmTocOrIndex(EbookTocVisitor *visitor, HtmlElement *list, UINT cp, bool isIndex, int level=1) { CrashIf(Tag_Ul != list->tag); // some broken ToCs wrap every <li> into its own <ul> for (; list && Tag_Ul == list->tag; list = list->next) { for (HtmlElement *el = list->down; el; el = el->next) { if (Tag_Li != el->tag) continue; // ignore unexpected elements bool valid; HtmlElement *elObj = el->GetChildByTag(Tag_Object); if (!elObj) valid = false; else if (isIndex) valid = VisitChmIndexItem(visitor, elObj, cp, level); else valid = VisitChmTocItem(visitor, elObj, cp, level); if (!valid) continue; // skip incomplete elements and all their children HtmlElement *nested = el->GetChildByTag(Tag_Ul); // some broken ToCs have the <ul> follow right *after* a <li> if (!nested && el->next && Tag_Ul == el->next->tag) nested = el->next; if (nested) WalkChmTocOrIndex(visitor, nested, cp, isIndex, level + 1); } } }
static void HtmlParser06() { HtmlParser p; HtmlElement *root = p.Parse("<ul><p>ignore<li><br><meta><li><ol><li></ul><dropme>"); utassert(9 == p.ElementsCount()); utassert(0 == p.TotalAttrCount()); utassert(root->NameIs("ul")); utassert(!root->next); HtmlElement *el = root->GetChildByTag(Tag_Li); utassert(el); utassert(el->down->NameIs("br")); utassert(el->down->next->NameIs("meta")); utassert(!el->down->next->next); el = root->GetChildByTag(Tag_Li, 1); utassert(el); utassert(!el->next); el = el->GetChildByTag(Tag_Ol); utassert(!el->next); utassert(el->down->NameIs("li")); utassert(!el->down->down); }