static void HtmlParser06() { HtmlParser p; HtmlElement *root = p.Parse("<ul><p>ignore<li><br><meta><li><ol><li></ul><dropme>"); assert(9 == p.ElementsCount()); assert(0 == p.TotalAttrCount()); assert(str::Eq("ul", root->name)); assert(!root->next); HtmlElement *el = root->GetChildByName("li"); assert(el); assert(str::Eq(el->down->name, "br")); assert(str::Eq(el->down->next->name, "meta")); assert(!el->down->next->next); el = root->GetChildByName("li", 1); assert(el); assert(!el->next); el = el->GetChildByName("ol"); assert(!el->next); assert(str::Eq(el->down->name, "li")); assert(!el->down->down); }
static void WalkChmTocOrIndex(EbookTocVisitor *visitor, HtmlElement *list, UINT cp, bool isIndex, int level=1) { CrashIf(!list->NameIs("ul")); // some broken ToCs wrap every <li> into its own <ul> for (; list && list->NameIs("ul"); list = list->next) { for (HtmlElement *el = list->down; el; el = el->next) { if (!el->NameIs("li")) continue; // ignore unexpected elements bool valid = (isIndex ? VisitChmIndexItem : VisitChmTocItem)(visitor, el, cp, level); if (!valid) continue; // skip incomplete elements and all their children HtmlElement *nested = el->GetChildByName("ul"); // some broken ToCs have the <ul> follow right *after* a <li> if (!nested && el->next && el->next->NameIs("ul")) nested = el->next; if (nested) WalkChmTocOrIndex(visitor, nested, cp, isIndex, level + 1); } } }