void DOMTokenList::setValue(const String& value) { // Clear tokens but not capacity. m_tokens.shrink(0); HashSet<AtomicString> addedTokens; // https://dom.spec.whatwg.org/#ordered%20sets for (unsigned start = 0; ; ) { while (start < value.length() && isHTMLSpace(value[start])) ++start; if (start >= value.length()) break; unsigned end = start + 1; while (end < value.length() && !isHTMLSpace(value[end])) ++end; AtomicString token = value.substring(start, end - start); if (!addedTokens.contains(token)) { m_tokens.append(token); addedTokens.add(token); } start = end + 1; } m_tokens.shrinkToFit(); m_cachedValue = nullAtom; }
SandboxFlags SecurityContext::parseSandboxPolicy(const String& policy, String& invalidTokensErrorMessage) { // http://www.w3.org/TR/html5/the-iframe-element.html#attr-iframe-sandbox // Parse the unordered set of unique space-separated tokens. SandboxFlags flags = SandboxAll; const UChar* characters = policy.characters(); unsigned length = policy.length(); unsigned start = 0; unsigned numberOfTokenErrors = 0; StringBuilder tokenErrors; while (true) { while (start < length && isHTMLSpace(characters[start])) ++start; if (start >= length) break; unsigned end = start + 1; while (end < length && !isHTMLSpace(characters[end])) ++end; // Turn off the corresponding sandbox flag if it's set as "allowed". String sandboxToken = policy.substring(start, end - start); if (equalIgnoringCase(sandboxToken, "allow-same-origin")) flags &= ~SandboxOrigin; else if (equalIgnoringCase(sandboxToken, "allow-forms")) flags &= ~SandboxForms; else if (equalIgnoringCase(sandboxToken, "allow-scripts")) { flags &= ~SandboxScripts; flags &= ~SandboxAutomaticFeatures; } else if (equalIgnoringCase(sandboxToken, "allow-top-navigation")) flags &= ~SandboxTopNavigation; else if (equalIgnoringCase(sandboxToken, "allow-popups")) flags &= ~SandboxPopups; else if (equalIgnoringCase(sandboxToken, "allow-pointer-lock")) flags &= ~SandboxPointerLock; else { if (numberOfTokenErrors) tokenErrors.appendLiteral(", '"); else tokenErrors.append('\''); tokenErrors.append(sandboxToken); tokenErrors.append('\''); numberOfTokenErrors++; } start = end + 1; } if (numberOfTokenErrors) { if (numberOfTokenErrors > 1) tokenErrors.appendLiteral(" are invalid sandbox flags."); else tokenErrors.appendLiteral(" is an invalid sandbox flag."); invalidTokensErrorMessage = tokenErrors.toString(); } return flags; }
static void tokenizeDescriptors(const CharType* attributeStart, const CharType*& position, const CharType* attributeEnd, Vector<DescriptorToken>& descriptors) { DescriptorTokenizerState state = Start; const CharType* descriptorsStart = position; const CharType* currentDescriptorStart = descriptorsStart; while (true) { switch (state) { case Start: if (isEOF(position, attributeEnd)) { appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors); return; } if (isComma(*position)) { appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors); ++position; return; } if (isHTMLSpace(*position)) { appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors); currentDescriptorStart = position + 1; state = AfterToken; } else if (*position == '(') { appendCharacter(currentDescriptorStart, position); state = InParenthesis; } else { appendCharacter(currentDescriptorStart, position); } break; case InParenthesis: if (isEOF(position, attributeEnd)) { appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors); return; } if (*position == ')') { appendCharacter(currentDescriptorStart, position); state = Start; } else { appendCharacter(currentDescriptorStart, position); } break; case AfterToken: if (isEOF(position, attributeEnd)) return; if (!isHTMLSpace(*position)) { state = Start; currentDescriptorStart = position; --position; } break; } ++position; } }
void CSSTokenizerInputStream::advanceUntilNonWhitespace() { // Using HTML space here rather than CSS space since we don't do preprocessing if (m_string->is8Bit()) { const LChar* characters = m_string->characters8(); while (m_offset < m_stringLength && isHTMLSpace(characters[m_offset])) ++m_offset; } else { const UChar* characters = m_string->characters16(); while (m_offset < m_stringLength && isHTMLSpace(characters[m_offset])) ++m_offset; } }
void SpaceSplitStringData::createVector() { ASSERT(!m_createdVector); ASSERT(m_vector.isEmpty()); if (m_shouldFoldCase && hasNonASCIIOrUpper(m_string)) m_string = m_string.foldCase(); const UChar* characters = m_string.characters(); unsigned length = m_string.length(); unsigned start = 0; while (true) { while (start < length && isHTMLSpace(characters[start])) ++start; if (start >= length) break; unsigned end = start + 1; while (end < length && isNotHTMLSpace(characters[end])) ++end; m_vector.append(AtomicString(characters + start, end - start)); start = end + 1; } m_string = String(); m_createdVector = true; }
String DOMTokenList::removeTokens(const AtomicString& input, const Vector<String>& tokens) { // Algorithm defined at http://www.whatwg.org/specs/web-apps/current-work/multipage/common-microsyntaxes.html#remove-a-token-from-a-string // New spec is at http://dom.spec.whatwg.org/#remove-a-token-from-a-string unsigned inputLength = input.length(); StringBuilder output; // 3 output.reserveCapacity(inputLength); unsigned position = 0; // 4 // Step 5 while (position < inputLength) { if (isHTMLSpace(input[position])) { // 6 output.append(input[position++]); // 6.1, 6.2 continue; // 6.3 } // Step 7 StringBuilder tokenBuilder; while (position < inputLength && isNotHTMLSpace(input[position])) tokenBuilder.append(input[position++]); // Step 8 String token = tokenBuilder.toString(); if (tokens.contains(token)) { // Step 8.1 while (position < inputLength && isHTMLSpace(input[position])) ++position; // Step 8.2 size_t j = output.length(); while (j > 0 && isHTMLSpace(output[j - 1])) --j; output.resize(j); // Step 8.3 if (position < inputLength && !output.isEmpty()) output.append(' '); } else { output.append(token); // Step 9 } } return output.toString(); }
void CSSTokenizer::consumeSingleWhitespaceIfNext() { // We check for \r\n and HTML spaces since we don't do preprocessing UChar c = m_input.nextInputChar(); if (c == '\r' && m_input.peek(1) == '\n') consume(2); else if (isHTMLSpace(c)) consume(); }
SandboxFlags SecurityContext::parseSandboxPolicy(const String& policy) { // http://www.w3.org/TR/html5/the-iframe-element.html#attr-iframe-sandbox // Parse the unordered set of unique space-separated tokens. SandboxFlags flags = SandboxAll; const UChar* characters = policy.characters(); unsigned length = policy.length(); unsigned start = 0; while (true) { while (start < length && isHTMLSpace(characters[start])) ++start; if (start >= length) break; unsigned end = start + 1; while (end < length && !isHTMLSpace(characters[end])) ++end; // Turn off the corresponding sandbox flag if it's set as "allowed". String sandboxToken = policy.substring(start, end - start); if (equalIgnoringCase(sandboxToken, "allow-same-origin")) flags &= ~SandboxOrigin; else if (equalIgnoringCase(sandboxToken, "allow-forms")) flags &= ~SandboxForms; else if (equalIgnoringCase(sandboxToken, "allow-scripts")) { flags &= ~SandboxScripts; flags &= ~SandboxAutomaticFeatures; } else if (equalIgnoringCase(sandboxToken, "allow-top-navigation")) flags &= ~SandboxTopNavigation; else if (equalIgnoringCase(sandboxToken, "allow-popups")) flags &= ~SandboxPopups; else if (equalIgnoringCase(sandboxToken, "allow-pointer-lock")) flags &= ~SandboxPointerLock; start = end + 1; } return flags; }
// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers bool parseHTMLInteger(const String& input, int& value) { // Step 1 // Step 2 const UChar* position = input.characters(); const UChar* end = position + input.length(); // Step 3 int sign = 1; // Step 4 while (position < end) { if (!isHTMLSpace(*position)) break; ++position; } // Step 5 if (position == end) return false; ASSERT(position < end); // Step 6 if (*position == '-') { sign = -1; ++position; } else if (*position == '+') ++position; if (position == end) return false; ASSERT(position < end); // Step 7 if (!isASCIIDigit(*position)) return false; // Step 8 StringBuilder digits; while (position < end) { if (!isASCIIDigit(*position)) break; digits.append(*position++); } // Step 9 bool ok; value = sign * charactersToIntStrict(digits.characters(), digits.length(), &ok); return ok; }
static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value) { // Step 3 int sign = 1; // Step 4 while (position < end) { if (!isHTMLSpace(*position)) break; ++position; } // Step 5 if (position == end) return false; ASSERT(position < end); // Step 6 if (*position == '-') { sign = -1; ++position; } else if (*position == '+') ++position; if (position == end) return false; ASSERT(position < end); // Step 7 if (!isASCIIDigit(*position)) return false; // Step 8 StringBuilder digits; while (position < end) { if (!isASCIIDigit(*position)) break; digits.append(*position++); } // Step 9 bool ok; if (digits.is8Bit()) value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok); else value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok); return ok; }
static inline void tokenizeSpaceSplitString(TokenProcessor& tokenProcessor, const CharacterType* characters, unsigned length) { for (unsigned start = 0; ; ) { while (start < length && isHTMLSpace(characters[start])) ++start; if (start >= length) break; unsigned end = start + 1; while (end < length && isNotHTMLSpace(characters[end])) ++end; if (!tokenProcessor.processToken(characters + start, end - start)) return; start = end + 1; } }
// http://dev.w3.org/csswg/css-syntax/#consume-url-token CSSParserToken CSSTokenizer::consumeUrlToken() { consumeUntilNonWhitespace(); // URL tokens without escapes get handled without allocations for (unsigned size = 0; ; size++) { UChar cc = m_input.peekWithoutReplacement(size); if (cc == ')') { unsigned startOffset = m_input.offset(); m_input.advance(size + 1); return CSSParserToken(UrlToken, m_input.rangeAsCSSParserString(startOffset, size)); } if (cc <= ' ' || cc == '\\' || cc == '"' || cc == '\'' || cc == '(' || cc == '\x7f') break; } StringBuilder result; while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); if (isHTMLSpace(cc)) { consumeUntilNonWhitespace(); if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker) return CSSParserToken(UrlToken, registerString(result.toString())); break; } if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc)) break; if (cc == '\\') { if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } break; } result.append(cc); } consumeBadUrlRemnants(); return CSSParserToken(BadUrlToken); }
bool DOMTokenList::validateToken(const AtomicString& token, ExceptionCode& ec) { if (token.isEmpty()) { ec = SYNTAX_ERR; return false; } unsigned length = token.length(); for (unsigned i = 0; i < length; ++i) { if (isHTMLSpace(token[i])) { ec = INVALID_CHARACTER_ERR; return false; } } return true; }
bool DOMTokenList::validateToken(const AtomicString& token, ExceptionState& es) { if (token.isEmpty()) { es.throwDOMException(SyntaxError); return false; } unsigned length = token.length(); for (unsigned i = 0; i < length; ++i) { if (isHTMLSpace(token[i])) { es.throwDOMException(InvalidCharacterError); return false; } } return true; }
// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers bool parseHTMLNonNegativeInteger(const String& input, unsigned int& value) { // Step 1 // Step 2 const UChar* position = input.characters(); const UChar* end = position + input.length(); // Step 3 while (position < end) { if (!isHTMLSpace(*position)) break; ++position; } // Step 4 if (position == end) return false; ASSERT(position < end); // Step 5 if (*position == '+') ++position; // Step 6 if (position == end) return false; ASSERT(position < end); // Step 7 if (!isASCIIDigit(*position)) return false; // Step 8 StringBuilder digits; while (position < end) { if (!isASCIIDigit(*position)) break; digits.append(*position++); } // Step 9 bool ok; value = charactersToUIntStrict(digits.characters(), digits.length(), &ok); return ok; }
static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value) { // Step 3 while (position < end) { if (!isHTMLSpace(*position)) break; ++position; } // Step 4 if (position == end) return false; ASSERT_WITH_SECURITY_IMPLICATION(position < end); // Step 5 if (*position == '+') ++position; // Step 6 if (position == end) return false; ASSERT_WITH_SECURITY_IMPLICATION(position < end); // Step 7 if (!isASCIIDigit(*position)) return false; // Step 8 StringBuilder digits; while (position < end) { if (!isASCIIDigit(*position)) break; digits.append(*position++); } // Step 9 bool ok; if (digits.is8Bit()) value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok); else value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok); return ok; }
String DOMTokenList::addTokens(const AtomicString& input, const Vector<String>& tokens) { bool needsSpace = false; StringBuilder builder; if (!input.isEmpty()) { builder.append(input); needsSpace = !isHTMLSpace(input[input.length() - 1]); } for (size_t i = 0; i < tokens.size(); ++i) { if (needsSpace) builder.append(' '); builder.append(tokens[i]); needsSpace = true; } return builder.toString(); }
// http://dev.w3.org/csswg/css-syntax/#consume-url-token CSSParserToken CSSTokenizer::consumeUrlToken() { consumeUntilNonWhitespace(); StringBuilder result; while (true) { UChar cc = consume(); if (cc == ')' || cc == kEndOfFileMarker) { // The "reconsume" here deviates from the spec, but is required to avoid consuming past the EOF if (cc == kEndOfFileMarker) reconsume(cc); return CSSParserToken(UrlToken, result.toString()); } if (isHTMLSpace(cc)) { consumeUntilNonWhitespace(); if (consumeIfNext(')') || m_input.nextInputChar() == kEndOfFileMarker) return CSSParserToken(UrlToken, result.toString()); break; } if (cc == '"' || cc == '\'' || cc == '(' || isNonPrintableCodePoint(cc)) break; if (cc == '\\') { if (twoCharsAreValidEscape(cc, m_input.nextInputChar())) { result.append(consumeEscape()); continue; } break; } result.append(cc); } consumeBadUrlRemnants(); return CSSParserToken(BadUrlToken); }
static String parseCSSStringOrURL(const UChar* characters, size_t length) { size_t offset = 0; size_t reducedLength = length; while (reducedLength && isHTMLSpace(characters[offset])) { ++offset; --reducedLength; } while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1])) --reducedLength; if (reducedLength >= 5 && (characters[offset] == 'u' || characters[offset] == 'U') && (characters[offset + 1] == 'r' || characters[offset + 1] == 'R') && (characters[offset + 2] == 'l' || characters[offset + 2] == 'L') && characters[offset + 3] == '(' && characters[offset + reducedLength - 1] == ')') { offset += 4; reducedLength -= 5; } while (reducedLength && isHTMLSpace(characters[offset])) { ++offset; --reducedLength; } while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1])) --reducedLength; if (reducedLength < 2 || characters[offset] != characters[offset + reducedLength - 1] || !(characters[offset] == '\'' || characters[offset] == '"')) return String(); offset++; reducedLength -= 2; while (reducedLength && isHTMLSpace(characters[offset])) { ++offset; --reducedLength; } while (reducedLength && isHTMLSpace(characters[offset + reducedLength - 1])) --reducedLength; return String(characters + offset, reducedLength); }
inline void CSSPreloadScanner::tokenize(UChar c) { // We are just interested in @import rules, no need for real tokenization here // Searching for other types of resources is probably low payoff. switch (m_state) { case Initial: if (isHTMLSpace(c)) break; if (c == '@') m_state = RuleStart; else if (c == '/') m_state = MaybeComment; else m_state = DoneParsingImportRules; break; case MaybeComment: if (c == '*') m_state = Comment; else m_state = Initial; break; case Comment: if (c == '*') m_state = MaybeCommentEnd; break; case MaybeCommentEnd: if (c == '*') break; if (c == '/') m_state = Initial; else m_state = Comment; break; case RuleStart: if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { m_rule.clear(); m_ruleValue.clear(); m_rule.append(c); m_state = Rule; } else m_state = Initial; break; case Rule: if (isHTMLSpace(c)) m_state = AfterRule; else if (c == ';') m_state = Initial; else m_rule.append(c); break; case AfterRule: if (isHTMLSpace(c)) break; if (c == ';') m_state = Initial; else if (c == '{') m_state = DoneParsingImportRules; else { m_state = RuleValue; m_ruleValue.append(c); } break; case RuleValue: if (isHTMLSpace(c)) m_state = AfterRuleValue; else if (c == ';') emitRule(); else m_ruleValue.append(c); break; case AfterRuleValue: if (isHTMLSpace(c)) break; if (c == ';') emitRule(); else if (c == '{') m_state = DoneParsingImportRules; else { // FIXME: media rules m_state = Initial; } break; case DoneParsingImportRules: ASSERT_NOT_REACHED(); break; } }
// See the specifications for more details about the algorithm to follow. // http://www.w3.org/TR/2013/WD-html-srcset-20130228/#processing-the-image-candidates. static void parseImagesWithScaleFromSrcsetAttribute(const String& srcsetAttribute, ImageCandidates& imageCandidates) { ASSERT(imageCandidates.isEmpty()); size_t imageCandidateStart = 0; unsigned srcsetAttributeLength = srcsetAttribute.length(); while (imageCandidateStart < srcsetAttributeLength) { float imageScaleFactor = 1; size_t separator; // 4. Splitting loop: Skip whitespace. size_t imageURLStart = srcsetAttribute.find(isNotHTMLSpace, imageCandidateStart); if (imageURLStart == notFound) break; // If The current candidate is either totally empty or only contains space, skipping. if (srcsetAttribute[imageURLStart] == ',') { imageCandidateStart = imageURLStart + 1; continue; } // 5. Collect a sequence of characters that are not space characters, and let that be url. size_t imageURLEnd = srcsetAttribute.find(isHTMLSpace, imageURLStart + 1); if (imageURLEnd == notFound) { imageURLEnd = srcsetAttributeLength; separator = srcsetAttributeLength; } else if (srcsetAttribute[imageURLEnd - 1] == ',') { --imageURLEnd; separator = imageURLEnd; } else { // 7. Collect a sequence of characters that are not "," (U+002C) characters, and let that be descriptors. size_t imageScaleStart = srcsetAttribute.find(isNotHTMLSpace, imageURLEnd + 1); if (imageScaleStart == notFound) separator = srcsetAttributeLength; else if (srcsetAttribute[imageScaleStart] == ',') separator = imageScaleStart; else { // This part differs from the spec as the current implementation only supports pixel density descriptors for now. size_t imageScaleEnd = srcsetAttribute.find(isHTMLSpaceOrComma, imageScaleStart + 1); imageScaleEnd = (imageScaleEnd == notFound) ? srcsetAttributeLength : imageScaleEnd; size_t commaPosition = imageScaleEnd; // Make sure there are no other descriptors. while ((commaPosition < srcsetAttributeLength - 1) && isHTMLSpace(srcsetAttribute[commaPosition])) ++commaPosition; // If the first not html space character after the scale modifier is not a comma, // the current candidate is an invalid input. if ((commaPosition < srcsetAttributeLength - 1) && srcsetAttribute[commaPosition] != ',') { // Find the nearest comma and skip the input. commaPosition = srcsetAttribute.find(',', commaPosition + 1); if (commaPosition == notFound) break; imageCandidateStart = commaPosition + 1; continue; } separator = commaPosition; if (srcsetAttribute[imageScaleEnd - 1] != 'x') { imageCandidateStart = separator + 1; continue; } bool validScaleFactor = false; size_t scaleFactorLengthWithoutUnit = imageScaleEnd - imageScaleStart - 1; imageScaleFactor = charactersToFloat(srcsetAttribute.characters() + imageScaleStart, scaleFactorLengthWithoutUnit, &validScaleFactor); if (!validScaleFactor) { imageCandidateStart = separator + 1; continue; } } } ImageWithScale image; image.imageURL = String(srcsetAttribute.characters() + imageURLStart, imageURLEnd - imageURLStart); image.scaleFactor = imageScaleFactor; imageCandidates.append(image); // 11. Return to the step labeled splitting loop. imageCandidateStart = separator + 1; } }
static inline bool isHTMLSpaceOrComma(UChar character) { return isHTMLSpace(character) || character == ','; }
void InspectorStyleTextEditor::insertProperty(unsigned index, const String& propertyText, unsigned styleBodyLength) { long propertyStart = 0; bool insertLast = true; if (index < m_allProperties->size()) { const InspectorStyleProperty& property = m_allProperties->at(index); if (property.hasSource) { propertyStart = property.sourceData.range.start; // If inserting before a disabled property, it should be shifted, too. insertLast = false; } } bool insertFirstInSource = true; for (unsigned i = 0, size = m_allProperties->size(); i < index && i < size; ++i) { const InspectorStyleProperty& property = m_allProperties->at(i); if (property.hasSource && !property.disabled) { insertFirstInSource = false; break; } } bool insertLastInSource = true; for (unsigned i = index, size = m_allProperties->size(); i < size; ++i) { const InspectorStyleProperty& property = m_allProperties->at(i); if (property.hasSource && !property.disabled) { insertLastInSource = false; break; } } String textToSet = propertyText; int formattingPrependOffset = 0; if (insertLast && !insertFirstInSource) { propertyStart = styleBodyLength; if (propertyStart && textToSet.length()) { const UChar* characters = m_styleText.characters(); long curPos = propertyStart - 1; // The last position of style declaration, since propertyStart points past one. while (curPos && isHTMLSpace(characters[curPos])) --curPos; if (curPos && characters[curPos] != ';') { // Prepend a ";" to the property text if appending to a style declaration where // the last property has no trailing ";". textToSet.insert(";", 0); formattingPrependOffset = 1; } } } const String& formatLineFeed = m_format.first; const String& formatPropertyPrefix = m_format.second; if (insertLastInSource) { long formatPropertyPrefixLength = formatPropertyPrefix.length(); if (!formattingPrependOffset && (propertyStart < formatPropertyPrefixLength || m_styleText.substring(propertyStart - formatPropertyPrefixLength, formatPropertyPrefixLength) != formatPropertyPrefix)) { textToSet.insert(formatPropertyPrefix, formattingPrependOffset); if (!propertyStart || !isHTMLLineBreak(m_styleText[propertyStart - 1])) textToSet.insert(formatLineFeed, formattingPrependOffset); } if (!isHTMLLineBreak(m_styleText[propertyStart])) textToSet.append(formatLineFeed); } else { String fullPrefix = formatLineFeed + formatPropertyPrefix; long fullPrefixLength = fullPrefix.length(); textToSet.append(fullPrefix); if (insertFirstInSource && (propertyStart < fullPrefixLength || m_styleText.substring(propertyStart - fullPrefixLength, fullPrefixLength) != fullPrefix)) textToSet.insert(fullPrefix, formattingPrependOffset); } m_styleText.insert(textToSet, propertyStart); // Recompute disabled property ranges after an inserted property. long propertyLengthDelta = textToSet.length(); shiftDisabledProperties(disabledIndexByOrdinal(index, true), propertyLengthDelta); }