Esempio n. 1
0
void EbookController::ExtractPageAnchors()
{
    if (pageAnchorIds || pageAnchorIdxs) {
        CrashIf(!pageAnchorIds || !pageAnchorIdxs);
        return;
    }

    pageAnchorIds = new WStrVec();
    pageAnchorIdxs = new Vec<int>();

    ScopedMem<WCHAR> epubPagePath;
    int fb2TitleCount = 0;
    size_t len;
    const char *data = doc.GetHtmlData(len);
    HtmlPullParser parser(data, len);
    HtmlToken *tok;
    while ((tok = parser.Next()) != nullptr && !tok->IsError()) {
        if (!tok->IsStartTag() && !tok->IsEmptyElementEndTag())
            continue;
        AttrInfo *attr = tok->GetAttrByName("id");
        if (!attr && Tag_A == tok->tag && doc.Type() != Doc_Fb2)
            attr = tok->GetAttrByName("name");
        if (attr) {
            ScopedMem<WCHAR> id(str::conv::FromUtf8(attr->val, attr->valLen));
            pageAnchorIds->Append(str::Format(L"%s#%s", epubPagePath ? epubPagePath : L"", id.Get()));
            pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start()));
        }
        // update EPUB page paths and create an anchor per chapter
        if (Tag_Pagebreak == tok->tag &&
            (attr = tok->GetAttrByName("page_path")) != nullptr &&
            str::StartsWith(attr->val + attr->valLen, "\" page_marker />")) {
            CrashIf(doc.Type() != Doc_Epub);
            epubPagePath.Set(str::conv::FromUtf8(attr->val, attr->valLen));
            pageAnchorIds->Append(str::Dup(epubPagePath));
            pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start()));
        }
        // create FB2 title anchors (cf. Fb2Doc::ParseToc)
        if (Tag_Title == tok->tag && tok->IsStartTag() && Doc_Fb2 == doc.Type()) {
            ScopedMem<WCHAR> id(str::Format(TEXT(FB2_TOC_ENTRY_MARK) L"%d", ++fb2TitleCount));
            pageAnchorIds->Append(id.StealData());
            pageAnchorIdxs->Append((int)(tok->GetReparsePoint() - parser.Start()));
        }
    }
}
// Return the next parsed page. Returns NULL if finished parsing.
// For simplicity of implementation, we parse xml text node or
// xml element at a time. This might cause a creation of one
// or more pages, which we remeber and send to the caller
// if we detect accumulated pages.
HtmlPage *HtmlFormatter::Next(bool skipEmptyPages)
{
    for (;;)
    {
        // send out all pages accumulated so far
        while (pagesToSend.Count() > 0) {
            HtmlPage *ret = pagesToSend.At(0);
            pagesToSend.RemoveAt(0);
            pageCount++;
            if (skipEmptyPages && IsEmptyPage(ret))
                delete ret;
            else
                return ret;
        }
        // we can call ourselves recursively to send outstanding
        // pages after parsing has finished so this is to detect
        // that case and really end parsing
        if (finishedParsing)
            return NULL;
        HtmlToken *t = htmlParser->Next();
        if (!t || t->IsError())
            break;

        currReparseIdx = t->GetReparsePoint() - htmlParser->Start();
        CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser));
        if (t->IsTag())
            HandleHtmlTag(t);
        else if (!IgnoreText())
            HandleText(t);
    }
    // force layout of the last line
    AutoCloseTags(tagNesting.Count());
    FlushCurrLine(true);

    UpdateLinkBboxes(currPage);
    pagesToSend.Append(currPage);
    currPage = NULL;
    // call ourselves recursively to return accumulated pages
    finishedParsing = true;
    return Next();
}