Esempio n. 1
0
// We expect:
// whitespace | attribute name | = | attribute value
// where attribute value can be quoted
AttrInfo *HtmlToken::NextAttr()
{
    // start after the last attribute found (or the beginning)
    const char *curr = nextAttr;
    if (!curr)
        curr = s + nLen;
    const char *end = s + sLen;

    // parse attribute name
    SkipWs(curr, end);
    if (curr == end) {
NoNextAttr:
        nextAttr = NULL;
        return NULL;
    }
    attrInfo.name = curr;
    SkipName(curr, end);
    attrInfo.nameLen = curr - attrInfo.name;
    if (0 == attrInfo.nameLen)
        goto NoNextAttr;
    SkipWs(curr, end);
    if ((curr == end) || ('=' != *curr)) {
        // attributes without values get their names as value in HTML
        attrInfo.val = attrInfo.name;
        attrInfo.valLen = attrInfo.nameLen;
        nextAttr = curr;
        return &attrInfo;
    }

    // parse attribute value
    ++curr; // skip '='
    SkipWs(curr, end);
    if (curr == end) {
        // attribute with implicit empty value
        attrInfo.val = curr;
        attrInfo.valLen = 0;
    } else if (('\'' == *curr) || ('\"' == *curr)) {
        // attribute with quoted value
        ++curr;
        attrInfo.val = curr;
        if (!SkipUntil(curr, end, *(curr - 1)))
            goto NoNextAttr;
        attrInfo.valLen = curr - attrInfo.val;
        ++curr;
    } else {
        attrInfo.val = curr;
        SkipNonWs(curr, end);
        attrInfo.valLen = curr - attrInfo.val;
    }
    nextAttr = curr;
    return &attrInfo;
}
Esempio n. 2
0
void HtmlFormatter::HandleText(const char* s, size_t sLen) {
    const char* curr = s;
    const char* end = s + sLen;

    if (preFormatted) {
        // don't collapse whitespace and respect text newlines
        while (curr < end) {
            const char* text = curr;
            currReparseIdx = curr - htmlParser->Start();
            // skip to the next newline
            for (; curr < end && *curr != '\n'; curr++)
                ;
            if (curr < end && curr > text && *(curr - 1) == '\r')
                curr--;
            EmitTextRun(text, curr);
            if ('\n' == *curr || '\r' == *curr) {
                curr += '\r' == *curr ? 2 : 1;
                HandleTagBr();
            }
        }
        return;
    }

    // break text into runs i.e. chunks that are either all
    // whitespace or all non-whitespace
    while (curr < end) {
        // collapse multiple, consecutive white-spaces into a single space
        currReparseIdx = curr - htmlParser->Start();
        bool skipped = SkipWs(curr, end);
        if (skipped)
            EmitElasticSpace();

        const char* text = curr;
        currReparseIdx = curr - htmlParser->Start();
        skipped = SkipNonWs(curr, end);
        if (skipped)
            EmitTextRun(text, curr);
    }
}