Example #1
0
// returns true if prev can't contain curr and should thus be closed
static bool AutoCloseOnOpen(HtmlTag curr, HtmlTag prev) {
    CrashIf(IsInlineTag(curr));
    // always start afresh for a new <body>
    if (Tag_Body == curr)
        return true;
    // allow <div>s to be contained within inline tags
    // (e.g. <i><div>...</div></i> from pg12.mobi)
    if (Tag_Div == curr)
        return false;

    switch (prev) {
        case Tag_Dd:
        case Tag_Dt:
            return Tag_Dd == curr || Tag_Dt == curr;
        case Tag_H1:
        case Tag_H2:
        case Tag_H3:
        case Tag_H4:
        case Tag_H5:
        case Tag_H6:
            return IsTagH(curr);
        case Tag_Lh:
        case Tag_Li:
            return Tag_Lh == curr || Tag_Li == curr;
        case Tag_P:
            return true; // <p> can't contain any block-level elements
        case Tag_Td:
        case Tag_Tr:
            return Tag_Tr == curr;
        default:
            return IsInlineTag(prev);
    }
}
Example #2
0
void HtmlFormatter::UpdateTagNesting(HtmlToken* t) {
    CrashIf(!t->IsTag());
    if (keepTagNesting || Tag_NotFound == t->tag || t->IsEmptyElementEndTag() || IsTagSelfClosing(t->tag)) {
        return;
    }

    size_t idx = tagNesting.size();
    bool isInline = IsInlineTag(t->tag);
    if (t->IsStartTag()) {
        if (IsInlineTag(t->tag)) {
            tagNesting.Push(t->tag);
            return;
        }
        // close all tags that can't contain this new block-level tag
        for (; idx > 0 && AutoCloseOnOpen(t->tag, tagNesting.at(idx - 1)); idx--)
            ;
    } else {
        // close all tags that were contained within the current tag
        // (for inline tags just up to the next block-level tag)
        for (; idx > 0 && (!isInline || IsInlineTag(tagNesting.at(idx - 1))) && t->tag != tagNesting.at(idx - 1); idx--)
            ;
        if (0 == idx || tagNesting.at(idx - 1) != t->tag)
            return;
    }

    AutoCloseTags(tagNesting.size() - idx);

    if (t->IsStartTag())
        tagNesting.Push(t->tag);
    else {
        CrashIf(!t->IsEndTag() || t->tag != tagNesting.Last());
        tagNesting.Pop();
    }
}
static void Test01() {
    utassert(IsInlineTag(Tag_A));
    utassert(IsInlineTag(Tag_U));
    utassert(IsInlineTag(Tag_Span));
    utassert(!IsInlineTag(Tag_P));
    utassert(IsTagSelfClosing(Tag_Area));
    utassert(IsTagSelfClosing(Tag_Link));
    utassert(IsTagSelfClosing(Tag_Param));
    utassert(!IsTagSelfClosing(Tag_P));
}
Example #4
0
void HtmlFormatter::HandleDirAttr(HtmlToken* t) {
    // only apply reading direction changes to block elements (for now)
    if (t->IsStartTag() && !IsInlineTag(t->tag)) {
        AttrInfo* attr = t->GetAttrByName("dir");
        if (attr)
            dirRtl = CurrStyle()->dirRtl = attr->ValIs("RTL");
    }
}
Example #5
0
static WCHAR *ExtractHtmlText(EpubDoc *doc)
{
    size_t len;
    const char *data = doc->GetTextData(&len);

    str::Str<char> text(len / 2);
    HtmlPullParser p(data, len);
    HtmlToken *t;
    Vec<HtmlTag> tagNesting;
    while ((t = p.Next()) != NULL && !t->IsError()) {
        if (t->IsText() && !tagNesting.Contains(Tag_Head) && !tagNesting.Contains(Tag_Script) && !tagNesting.Contains(Tag_Style)) {
            // trim whitespace (TODO: also normalize within text?)
            while (t->sLen > 0 && str::IsWs(t->s[0])) {
                t->s++;
                t->sLen--;
            }
            while (t->sLen > 0 && str::IsWs(t->s[t->sLen-1]))
                t->sLen--;
            if (t->sLen > 0) {
                text.AppendAndFree(ResolveHtmlEntities(t->s, t->sLen));
                text.Append(' ');
            }
        }
        else if (t->IsStartTag()) {
            // TODO: force-close tags similar to HtmlFormatter.cpp's AutoCloseOnOpen?
            if (!IsTagSelfClosing(t->tag))
                tagNesting.Append(t->tag);
        }
        else if (t->IsEndTag()) {
            if (!IsInlineTag(t->tag) && text.Size() > 0 && text.Last() == ' ') {
                text.Pop();
                text.Append("\r\n");
            }
            // when closing a tag, if the top tag doesn't match but
            // there are only potentially self-closing tags on the
            // stack between the matching tag, we pop all of them
            if (tagNesting.Contains(t->tag)) {
                while (tagNesting.Last() != t->tag)
                    tagNesting.Pop();
            }
            if (tagNesting.Count() > 0 && tagNesting.Last() == t->tag)
                tagNesting.Pop();
        }
    }

    return str::conv::FromUtf8(text.Get());
}