static bool consumeNamedEntity(SegmentedString& source, StringBuilder& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter, UChar& cc) { StringBuilder consumedCharacters; HTMLEntitySearch entitySearch; while (!source.isEmpty()) { cc = source.currentChar(); entitySearch.advance(cc); if (!entitySearch.isEntityPrefix()) break; consumedCharacters.append(cc); source.advance(); } notEnoughCharacters = source.isEmpty(); if (notEnoughCharacters) { // We can't an entity because there might be a longer entity // that we could match if we had more data. unconsumeCharacters(source, consumedCharacters); return false; } if (!entitySearch.mostRecentMatch()) { unconsumeCharacters(source, consumedCharacters); return false; } if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) { // We've consumed too many characters. We need to walk the // source back to the point at which we had consumed an // actual entity. unconsumeCharacters(source, consumedCharacters); consumedCharacters.clear(); const int length = entitySearch.mostRecentMatch()->length; const LChar* reference = entitySearch.mostRecentMatch()->entity; for (int i = 0; i < length; ++i) { cc = source.currentChar(); ASSERT_UNUSED(reference, cc == *reference++); consumedCharacters.append(cc); source.advance(); ASSERT(!source.isEmpty()); } cc = source.currentChar(); } if (entitySearch.mostRecentMatch()->lastCharacter() == ';' || !additionalAllowedCharacter || !(isASCIIAlphanumeric(cc) || cc == '=')) { decodedEntity.append(entitySearch.mostRecentMatch()->firstValue); if (entitySearch.mostRecentMatch()->secondValue) decodedEntity.append(entitySearch.mostRecentMatch()->secondValue); return true; } unconsumeCharacters(source, consumedCharacters); return false; }
void TextTokenizer::write(const SegmentedString& s, bool) { ExceptionCode ec; m_dest = m_buffer; SegmentedString str = s; while (!str.isEmpty()) { UChar c = *str; if (c == '\r') { *m_dest++ = '\n'; // possibly skip an LF in the case of an CRLF sequence m_skipLF = true; } else if (c == '\n') { if (!m_skipLF) *m_dest++ = c; else m_skipLF = false; } else { *m_dest++ = c; m_skipLF = false; } str.advance(); // Maybe enlarge the buffer checkBuffer(); } if (!m_preElement && !inViewSourceMode()) { RefPtr<Element> rootElement = m_doc->createElement(htmlTag, false); m_doc->appendChild(rootElement, ec); RefPtr<Element> body = m_doc->createElement(bodyTag, false); rootElement->appendChild(body, ec); RefPtr<Element> preElement = m_doc->createElement(preTag, false); preElement->setAttribute("style", "word-wrap: break-word; white-space: pre-wrap;", ec); body->appendChild(preElement, ec); m_preElement = preElement.get(); } String string = String(m_buffer, m_dest - m_buffer); if (inViewSourceMode()) { static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceText(string); return; } unsigned charsLeft = string.length(); while (charsLeft) { // split large text to nodes of manageable size RefPtr<Text> text = Text::createWithLengthLimit(m_doc, string, charsLeft); m_preElement->appendChild(text, ec); } }
bool TextTokenizer::write(const SegmentedString& s, bool appendData) { ExceptionCode ec; m_dest = m_buffer; SegmentedString str = s; while (!str.isEmpty()) { UChar c = *str; if (c == '\r') { *m_dest++ = '\n'; // possibly skip an LF in the case of an CRLF sequence m_skipLF = true; } else if (c == '\n') { if (!m_skipLF) *m_dest++ = c; else m_skipLF = false; } else { *m_dest++ = c; m_skipLF = false; } ++str; // Maybe enlarge the buffer checkBuffer(); } if (!m_preElement) { RefPtr<Element> rootElement = m_doc->createElementNS(xhtmlNamespaceURI, "html", ec); m_doc->appendChild(rootElement, ec); RefPtr<Element> body = m_doc->createElementNS(xhtmlNamespaceURI, "body", ec); rootElement->appendChild(body, ec); RefPtr<Element> preElement = m_doc->createElementNS(xhtmlNamespaceURI, "pre", ec); body->appendChild(preElement, ec); m_preElement = preElement.get(); } String string = String(m_buffer, m_dest - m_buffer); RefPtr<Text> text = m_doc->createTextNode(string); m_preElement->appendChild(text, ec); return false; }
bool FTPDirectoryTokenizer::write(const SegmentedString& s, bool appendData) { // Make sure we have the table element to append to by loading the template set in the pref, or // creating a very basic document with the appropriate table if (!m_tableElement) { if (!loadDocumentTemplate()) createBasicDocument(); ASSERT(m_tableElement); } bool foundNewLine = false; m_dest = m_buffer; SegmentedString str = s; while (!str.isEmpty()) { UChar c = *str; if (c == '\r') { *m_dest++ = '\n'; // possibly skip an LF in the case of an CRLF sequence m_skipLF = true; } else if (c == '\n') { if (!m_skipLF) *m_dest++ = c; else m_skipLF = false; foundNewLine = true; } else { *m_dest++ = c; m_skipLF = false; } str.advance(0); // Maybe enlarge the buffer checkBuffer(); } if (!foundNewLine) { m_dest = m_buffer; return false; } UChar* start = m_buffer; UChar* cursor = start; while (cursor < m_dest) { if (*cursor == '\n') { m_carryOver.append(String(start, cursor - start)); LOG(FTP, "%s", m_carryOver.ascii().data()); parseAndAppendOneLine(m_carryOver); m_carryOver = String(); start = ++cursor; } else cursor++; } // Copy the partial line we have left to the carryover buffer if (cursor - start > 1) m_carryOver.append(String(start, cursor - start - 1)); return false; }
bool consumeHTMLEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter) { ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>'); ASSERT(!notEnoughCharacters); ASSERT(decodedEntity.isEmpty()); enum EntityState { Initial, Number, MaybeHexLowerCaseX, MaybeHexUpperCaseX, Hex, Decimal, Named }; EntityState entityState = Initial; UChar32 result = 0; Vector<UChar, 10> consumedCharacters; while (!source.isEmpty()) { UChar cc = *source; switch (entityState) { case Initial: { if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&') return false; if (additionalAllowedCharacter && cc == additionalAllowedCharacter) return false; if (cc == '#') { entityState = Number; break; } if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) { entityState = Named; continue; } return false; } case Number: { if (cc == 'x') { entityState = MaybeHexLowerCaseX; break; } if (cc == 'X') { entityState = MaybeHexUpperCaseX; break; } if (cc >= '0' && cc <= '9') { entityState = Decimal; continue; } source.push('#'); return false; } case MaybeHexLowerCaseX: { if (isHexDigit(cc)) { entityState = Hex; continue; } source.push('#'); source.push('x'); return false; } case MaybeHexUpperCaseX: { if (isHexDigit(cc)) { entityState = Hex; continue; } source.push('#'); source.push('X'); return false; } case Hex: { if (cc >= '0' && cc <= '9') result = result * 16 + cc - '0'; else if (cc >= 'a' && cc <= 'f') result = result * 16 + 10 + cc - 'a'; else if (cc >= 'A' && cc <= 'F') result = result * 16 + 10 + cc - 'A'; else { if (cc == ';') source.advanceAndASSERT(cc); return convertToUTF16(legalEntityFor(result), decodedEntity); } break; } case Decimal: { if (cc >= '0' && cc <= '9') result = result * 10 + cc - '0'; else { if (cc == ';') source.advanceAndASSERT(cc); return convertToUTF16(legalEntityFor(result), decodedEntity); } break; } case Named: { HTMLEntitySearch entitySearch; while (!source.isEmpty()) { cc = *source; entitySearch.advance(cc); if (!entitySearch.isEntityPrefix()) break; consumedCharacters.append(cc); source.advanceAndASSERT(cc); } notEnoughCharacters = source.isEmpty(); if (notEnoughCharacters) { // We can't an entity because there might be a longer entity // that we could match if we had more data. unconsumeCharacters(source, consumedCharacters); return false; } if (!entitySearch.mostRecentMatch()) { ASSERT(!entitySearch.currentValue()); unconsumeCharacters(source, consumedCharacters); return false; } if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) { // We've consumed too many characters. We need to walk the // source back to the point at which we had consumed an // actual entity. unconsumeCharacters(source, consumedCharacters); consumedCharacters.clear(); const int length = entitySearch.mostRecentMatch()->length; const UChar* reference = entitySearch.mostRecentMatch()->entity; for (int i = 0; i < length; ++i) { cc = *source; ASSERT_UNUSED(reference, cc == *reference++); consumedCharacters.append(cc); source.advanceAndASSERT(cc); ASSERT(!source.isEmpty()); } cc = *source; } if (entitySearch.mostRecentMatch()->lastCharacter() == ';' || !additionalAllowedCharacter || !(isAlphaNumeric(cc) || cc == '=')) { return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity); } unconsumeCharacters(source, consumedCharacters); return false; } } consumedCharacters.append(cc); source.advanceAndASSERT(cc); } ASSERT(source.isEmpty()); notEnoughCharacters = true; unconsumeCharacters(source, consumedCharacters); return false; }