void EbookController::ExtractPageAnchors() { if (pageAnchorIds || pageAnchorIdxs) { CrashIf(!pageAnchorIds || !pageAnchorIdxs); return; } pageAnchorIds = new WStrVec(); pageAnchorIdxs = new Vec<int>(); ScopedMem<WCHAR> epubPagePath; int fb2TitleCount = 0; size_t len; const char *data = doc.GetHtmlData(len); HtmlPullParser parser(data, len); HtmlToken *tok; while ((tok = parser.Next()) != nullptr && !tok->IsError()) { if (!tok->IsStartTag() && !tok->IsEmptyElementEndTag()) continue; AttrInfo *attr = tok->GetAttrByName("id"); if (!attr && Tag_A == tok->tag && doc.Type() != Doc_Fb2) attr = tok->GetAttrByName("name"); if (attr) { ScopedMem<WCHAR> id(str::conv::FromUtf8(attr->val, attr->valLen)); pageAnchorIds->Append(str::Format(L"%s#%s", epubPagePath ? epubPagePath : L"", id.Get())); pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start())); } // update EPUB page paths and create an anchor per chapter if (Tag_Pagebreak == tok->tag && (attr = tok->GetAttrByName("page_path")) != nullptr && str::StartsWith(attr->val + attr->valLen, "\" page_marker />")) { CrashIf(doc.Type() != Doc_Epub); epubPagePath.Set(str::conv::FromUtf8(attr->val, attr->valLen)); pageAnchorIds->Append(str::Dup(epubPagePath)); pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start())); } // create FB2 title anchors (cf. Fb2Doc::ParseToc) if (Tag_Title == tok->tag && tok->IsStartTag() && Doc_Fb2 == doc.Type()) { ScopedMem<WCHAR> id(str::Format(TEXT(FB2_TOC_ENTRY_MARK) L"%d", ++fb2TitleCount)); pageAnchorIds->Append(id.StealData()); pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start())); } } }
// Return the next parsed page. Returns NULL if finished parsing. // For simplicity of implementation, we parse xml text node or // xml element at a time. This might cause a creation of one // or more pages, which we remeber and send to the caller // if we detect accumulated pages. HtmlPage *HtmlFormatter::Next(bool skipEmptyPages) { for (;;) { // send out all pages accumulated so far while (pagesToSend.Count() > 0) { HtmlPage *ret = pagesToSend.At(0); pagesToSend.RemoveAt(0); pageCount++; if (skipEmptyPages && IsEmptyPage(ret)) delete ret; else return ret; } // we can call ourselves recursively to send outstanding // pages after parsing has finished so this is to detect // that case and really end parsing if (finishedParsing) return NULL; HtmlToken *t = htmlParser->Next(); if (!t || t->IsError()) break; currReparseIdx = t->GetReparsePoint() - htmlParser->Start(); CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser)); if (t->IsTag()) HandleHtmlTag(t); else if (!IgnoreText()) HandleText(t); } // force layout of the last line AutoCloseTags(tagNesting.Count()); FlushCurrLine(true); UpdateLinkBboxes(currPage); pagesToSend.Append(currPage); currPage = NULL; // call ourselves recursively to return accumulated pages finishedParsing = true; return Next(); }