Ejemplo n.º 1
0
void EpubFormatter::HandleHtmlTag(HtmlToken *t)
{
    CrashIf(!t->IsTag());
    if (hiddenDepth && t->IsEndTag() && tagNesting.Count() == hiddenDepth &&
        t->tag == tagNesting.Last()) {
        hiddenDepth = 0;
        UpdateTagNesting(t);
        return;
    }
    if (0 == hiddenDepth && t->IsStartTag() && t->GetAttrByName("hidden"))
        hiddenDepth = tagNesting.Count() + 1;
    if (hiddenDepth > 0)
        UpdateTagNesting(t);
    else if (Tag_Image == t->tag || Tag_Svg_Image == t->tag)
        HandleTagSvgImage(t);
    else
        HtmlFormatter::HandleHtmlTag(t);
}
Ejemplo n.º 2
0
void HtmlFormatter::HandleTagStyle(HtmlToken* t) {
    if (!t->IsStartTag())
        return;
    AttrInfo* attr = t->GetAttrByName("type");
    if (attr && !attr->ValIs("text/css"))
        return;

    const char* start = t->s + t->sLen + 1;
    while (t && !t->IsError() && (!t->IsEndTag() || t->tag != Tag_Style)) {
        t = htmlParser->Next();
    }
    if (!t || !t->IsEndTag() || Tag_Style != t->tag)
        return;
    const char* end = t->s - 2;
    CrashIf(start > end);
    ParseStyleSheet(start, end - start);
    UpdateTagNesting(t);
}
Ejemplo n.º 3
0
// Returns next part of html or NULL if finished
HtmlToken *HtmlPullParser::Next()
{
    if (currPos >= end)
        return NULL;

Next:
    const char *start = currPos;
    if (*currPos != '<') {
        // this must text between tags
        if (!SkipUntil(currPos, end, '<') && IsSpaceOnly(start, currPos)) {
            // ignore whitespace after the last tag
            return NULL;
        }
        currToken.SetText(start, currPos);
        return &currToken;
    }

    // '<' - tag begins
    ++start;

    // skip <? and <! (processing instructions and comments)
    if (('?' == *start) || ('!' == *start)) {
        if ('!' == *start && start + 2 < end && str::StartsWith(start, "!--")) {
            currPos = start + 2;
            if (!SkipUntil(currPos, end, "-->")) {
                currToken.SetError(HtmlToken::UnclosedTag, start);
                return &currToken;
            }
            currPos += 2;
        }
        else if (!SkipUntil(currPos, end, '>')) {
            currToken.SetError(HtmlToken::UnclosedTag, start);
            return &currToken;
        }
        ++currPos;
        goto Next;
    }

    if (!SkipUntilTagEnd(currPos, end)) {
        currToken.SetError(HtmlToken::UnclosedTag, start);
        return &currToken;
    }

    CrashIf('>' != *currPos);
    if (currPos == start || currPos == start + 1 && *start == '/') {
        // skip empty tags (<> and </>), because we're lenient
        ++currPos;
        goto Next;
    }

    if (('/' == *start) && ('/' == currPos[-1])) { // </foo/>
        currToken.SetError(HtmlToken::InvalidTag, start);
    } else if ('/' == *start) { // </foo>
        currToken.SetTag(HtmlToken::EndTag, start + 1, currPos);
    } else if ('/' == currPos[-1]) { // <foo/>
        currToken.SetTag(HtmlToken::EmptyElementTag, start, currPos - 1);
    } else {
        currToken.SetTag(HtmlToken::StartTag, start, currPos);
    }
    ++currPos;
    UpdateTagNesting(&tagNesting, &currToken);
    return &currToken;
}
Ejemplo n.º 4
0
void HtmlFormatter::HandleHtmlTag(HtmlToken* t) {
    CrashIf(!t->IsTag());

    UpdateTagNesting(t);

    HtmlTag tag = t->tag;
    if (Tag_P == tag) {
        HandleTagP(t);
    } else if (Tag_Hr == tag) {
        EmitHr();
    } else if ((Tag_B == tag) || (Tag_Strong == tag)) {
        ChangeFontStyle(FontStyleBold, t->IsStartTag());
    } else if ((Tag_I == tag) || (Tag_Em == tag)) {
        ChangeFontStyle(FontStyleItalic, t->IsStartTag());
    } else if (Tag_U == tag) {
        if (!currLinkIdx)
            ChangeFontStyle(FontStyleUnderline, t->IsStartTag());
    } else if (Tag_Strike == tag) {
        ChangeFontStyle(FontStyleStrikeout, t->IsStartTag());
    } else if (Tag_Br == tag) {
        HandleTagBr();
    } else if (Tag_Font == tag) {
        HandleTagFont(t);
    } else if (Tag_A == tag) {
        HandleTagA(t);
    } else if (Tag_Blockquote == tag) {
        // TODO: implement me
        HandleTagList(t);
    } else if (Tag_Div == tag) {
        // TODO: implement me
        HandleTagP(t, true);
    } else if (IsTagH(tag)) {
        HandleTagHx(t);
    } else if (Tag_Sup == tag) {
        // TODO: implement me
    } else if (Tag_Sub == tag) {
        // TODO: implement me
    } else if (Tag_Span == tag) {
        // TODO: implement me
    } else if (Tag_Center == tag) {
        HandleTagP(t, true);
        if (!t->IsEndTag())
            CurrStyle()->align = Align_Center;
    } else if ((Tag_Ul == tag) || (Tag_Ol == tag)) {
        HandleTagList(t);
    } else if (Tag_Li == tag) {
        // TODO: display bullet/number
        FlushCurrLine(true);
    } else if (Tag_Dt == tag) {
        FlushCurrLine(true);
        ChangeFontStyle(FontStyleBold, t->IsStartTag());
        if (t->IsStartTag())
            CurrStyle()->align = Align_Left;
    } else if (Tag_Dd == tag) {
        // TODO: separate indentation from list depth
        HandleTagList(t);
    } else if (Tag_Table == tag) {
        // TODO: implement me
        HandleTagList(t);
    } else if (Tag_Tr == tag) {
        // display tables row-by-row for now
        FlushCurrLine(true);
        if (t->IsStartTag())
            SetAlignment(Align_Left);
        else if (t->IsEndTag())
            RevertStyleChange();
    } else if (Tag_Code == tag || Tag_Tt == tag) {
        if (t->IsStartTag())
            SetFont(L"Courier New", (FontStyle)CurrFont()->GetStyle());
        else if (t->IsEndTag())
            RevertStyleChange();
    } else if (Tag_Pre == tag) {
        HandleTagPre(t);
    } else if (Tag_Img == tag) {
        HandleTagImg(t);
    } else if (Tag_Pagebreak == tag) {
        // not really a HTML tag, but many ebook
        // formats use it
        HandleTagPagebreak(t);
    } else if (Tag_Link == tag) {
        HandleTagLink(t);
    } else if (Tag_Style == tag) {
        HandleTagStyle(t);
    } else {
        // TODO: temporary debugging
        // lf("unhandled tag: %d", tag);
    }

    // any tag could contain anchor information
    HandleAnchorAttr(t);
    // any tag could contain a reading direction change
    HandleDirAttr(t);
}