static void WalkChmTocOrIndex(EbookTocVisitor *visitor, HtmlElement *list, UINT cp, bool isIndex, int level=1) { CrashIf(Tag_Ul != list->tag); // some broken ToCs wrap every <li> into its own <ul> for (; list && Tag_Ul == list->tag; list = list->next) { for (HtmlElement *el = list->down; el; el = el->next) { if (Tag_Li != el->tag) continue; // ignore unexpected elements bool valid; HtmlElement *elObj = el->GetChildByTag(Tag_Object); if (!elObj) valid = false; else if (isIndex) valid = VisitChmIndexItem(visitor, elObj, cp, level); else valid = VisitChmTocItem(visitor, elObj, cp, level); if (!valid) continue; // skip incomplete elements and all their children HtmlElement *nested = el->GetChildByTag(Tag_Ul); // some broken ToCs have the <ul> follow right *after* a <li> if (!nested && el->next && Tag_Ul == el->next->tag) nested = el->next; if (nested) WalkChmTocOrIndex(visitor, nested, cp, isIndex, level + 1); } } }
// ignores any <ul><li> list structure and just extracts a linear list of <object type="text/sitemap">...</object> static bool WalkBrokenChmTocOrIndex(EbookTocVisitor* visitor, HtmlParser& p, UINT cp, bool isIndex) { bool hadOne = false; HtmlElement* el = p.FindElementByName("body"); while ((el = p.FindElementByName("object", el)) != nullptr) { AutoFreeW type(el->GetAttribute("type")); if (!str::EqI(type, L"text/sitemap")) continue; if (isIndex) hadOne |= VisitChmIndexItem(visitor, el, cp, 1); else hadOne |= VisitChmTocItem(visitor, el, cp, 1); } return hadOne; }