Beispiel #1
0
// a text run is a string of consecutive text with uniform style
void HtmlFormatter::EmitTextRun(const char* s, const char* end) {
    currReparseIdx = s - htmlParser->Start();
    CrashIf(!ValidReparseIdx(currReparseIdx, htmlParser));
    CrashIf(IsSpaceOnly(s, end) && !preFormatted);
    const char* tmp = ResolveHtmlEntities(s, end, textAllocator);
    bool resolved = tmp != s;
    if (resolved) {
        s = tmp;
        end = s + str::Len(s);
    }

    while (s < end) {
        // don't update the reparseIdx if s doesn't point into the original source
        if (!resolved)
            currReparseIdx = s - htmlParser->Start();

        size_t strLen = str::Utf8ToWcharBuf(s, end - s, buf, dimof(buf));
        // soft hyphens should not be displayed
        strLen -= str::RemoveChars(buf, L"\xad");
        if (0 == strLen)
            break;
        textMeasure->SetFont(CurrFont());
        RectF bbox = textMeasure->Measure(buf, strLen);
        EnsureDx(bbox.Width);
        if (bbox.Width <= pageDx - currX) {
            AppendInstr(DrawInstr::Str(s, end - s, bbox, dirRtl));
            currX += bbox.Width;
            break;
        }

        size_t lenThatFits = StringLenForWidth(textMeasure, buf, strLen, pageDx - NewLineX());
        // try to prevent a break in the middle of a word
        if (iswalnum(buf[lenThatFits])) {
            for (size_t len = lenThatFits; len > 0; len--) {
                if (!iswalnum(buf[len - 1])) {
                    lenThatFits = len;
                    break;
                }
            }
        }
        textMeasure->SetFont(CurrFont());
        bbox = textMeasure->Measure(buf, lenThatFits);
        CrashIf(bbox.Width > pageDx);
        // s is UTF-8 and buf is UTF-16, so one
        // WCHAR doesn't always equal one char
        // TODO: this usually fails for non-BMP characters (i.e. hardly ever)
        for (size_t i = lenThatFits; i > 0; i--) {
            lenThatFits += buf[i - 1] < 0x80 ? 0 : buf[i - 1] < 0x800 ? 1 : 2;
        }
        AppendInstr(DrawInstr::Str(s, lenThatFits, bbox, dirRtl));
        currX += bbox.Width;
        s += lenThatFits;
    }
}
Beispiel #2
0
// Returns next part of html or NULL if finished
HtmlToken *HtmlPullParser::Next()
{
    if (currPos >= end)
        return NULL;

Next:
    const char *start = currPos;
    if (*currPos != '<' || currPos + 1 < end && !IsValidTagStart(*++currPos)) {
        // this must be text between tags
        if (!SkipUntil(currPos, end, '<') && IsSpaceOnly(start, currPos)) {
            // ignore whitespace after the last tag
            return NULL;
        }
        currToken.SetText(start, currPos);
        return &currToken;
    }

    // '<' - tag begins
    ++start;

    // skip <? and <! (processing instructions and comments)
    if (start < end && (('?' == *start) || ('!' == *start))) {
        if ('!' == *start && start + 2 < end && str::StartsWith(start, "!--")) {
            currPos = start + 3;
            if (!SkipUntil(currPos, end, "-->")) {
                currToken.SetError(HtmlToken::UnclosedTag, start);
                return &currToken;
            }
            currPos += 2;
        }
        else if (!SkipUntil(currPos, end, '>')) {
            currToken.SetError(HtmlToken::UnclosedTag, start);
            return &currToken;
        }
        ++currPos;
        goto Next;
    }

    if (!SkipUntilTagEnd(currPos, end)) {
        currToken.SetError(HtmlToken::UnclosedTag, start);
        return &currToken;
    }

    CrashIf('>' != *currPos);
    if (currPos == start || currPos == start + 1 && *start == '/') {
        // skip empty tags (</>), because we're lenient
        ++currPos;
        goto Next;
    }

    if (('/' == *start) && ('/' == currPos[-1])) { // </foo/>
        currToken.SetError(HtmlToken::InvalidTag, start);
    } else if ('/' == *start) { // </foo>
        currToken.SetTag(HtmlToken::EndTag, start + 1, currPos);
    } else if ('/' == currPos[-1]) { // <foo/>
        currToken.SetTag(HtmlToken::EmptyElementTag, start, currPos - 1);
    } else {
        currToken.SetTag(HtmlToken::StartTag, start, currPos);
    }
    ++currPos;
    return &currToken;
}