CString String::utf8() const { return UTF8Encoding().encode(characters(), length(), QuestionMarksForUnencodables); }
String String::fromUTF8(const char* string, size_t size) { return UTF8Encoding().decode(string, size); }
const TextEncoding& TextEncoding::closestByteBasedEquivalent() const { if (isNonByteBasedEncoding()) return UTF8Encoding(); return *this; }
// HTML5 specifies that UTF-8 be used in form submission when a form is // is a part of a document in UTF-16 probably because UTF-16 is not a // byte-based encoding and can contain 0x00. By extension, the same // should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, // but it's fraught with problems and we'd rather steer clear of it. const TextEncoding& TextEncoding::encodingForFormSubmission() const { if (isNonByteBasedEncoding() || isUTF7Encoding()) return UTF8Encoding(); return *this; }
String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError) { ASSERT(!m_checkedForBOM); // Check to see if we found a BOM. size_t numBufferedBytes = m_numBufferedBytes; size_t buf1Len = numBufferedBytes; size_t buf2Len = length; const unsigned char* buf1 = m_bufferedBytes; const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; const TextEncoding* encodingConsideringBOM = &m_encoding; bool foundBOM = true; size_t lengthOfBOM = 0; if (c1 == 0xFF && c2 == 0xFE) { if (c3 != 0 || c4 != 0) { encodingConsideringBOM = &UTF16LittleEndianEncoding(); lengthOfBOM = 2; } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) { encodingConsideringBOM = &UTF32LittleEndianEncoding(); lengthOfBOM = 4; } else foundBOM = false; } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { encodingConsideringBOM = &UTF8Encoding(); lengthOfBOM = 3; } else if (c1 == 0xFE && c2 == 0xFF) { encodingConsideringBOM = &UTF16BigEndianEncoding(); lengthOfBOM = 2; } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) { encodingConsideringBOM = &UTF32BigEndianEncoding(); lengthOfBOM = 4; } else foundBOM = false; if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) { // Continue to look for the BOM. memcpy(&m_bufferedBytes[numBufferedBytes], data, length); m_numBufferedBytes += length; return ""; } // Done checking for BOM. m_codec.set(newTextCodec(*encodingConsideringBOM).release()); if (!m_codec) return String(); m_checkedForBOM = true; // Skip the BOM. if (foundBOM) { ASSERT(numBufferedBytes < lengthOfBOM); size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes; ASSERT(numUnbufferedBOMBytes <= length); data += numUnbufferedBOMBytes; length -= numUnbufferedBOMBytes; numBufferedBytes = 0; m_numBufferedBytes = 0; } // Handle case where we have some buffered bytes to deal with. if (numBufferedBytes) { char bufferedBytes[sizeof(m_bufferedBytes)]; memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes); m_numBufferedBytes = 0; String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError); if (stopOnError && sawError) return bufferedResult; return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError); } return m_codec->decode(data, length, flush, stopOnError, sawError); }
KURL::KURL(const QUrl& url) { *this = KURL(KURL(), url.toEncoded().constData(), UTF8Encoding()); }
void FileReaderLoader::convertToText() { m_isRawDataConverted = true; if (!m_bytesLoaded) { m_stringResult = ""; return; } // Decode the data. // The File API spec says that we should use the supplied encoding if it is valid. However, we choose to ignore this // requirement in order to be consistent with how WebKit decodes the web content: always has the BOM override the // provided encoding. // FIXME: consider supporting incremental decoding to improve the perf. StringBuilder builder; if (!m_decoder) m_decoder = TextResourceDecoder::create("text/plain", m_encoding.isValid() ? m_encoding : UTF8Encoding()); builder.append(m_decoder->decode(static_cast<const char*>(m_rawData->data()), m_rawData->byteLength())); if (m_finishedLoading) builder.append(m_decoder->flush()); m_stringResult = builder.toString(); }
bool WebFrameSerializerImpl::serialize() { bool didSerialization = false; Document* document = m_specifiedWebLocalFrameImpl->frame()->document(); const KURL& url = document->url(); if (url.isValid()) { didSerialization = true; const WTF::TextEncoding& textEncoding = document->encoding().isValid() ? document->encoding() : UTF8Encoding(); if (textEncoding.isNonByteBasedEncoding()) { const UChar byteOrderMark = 0xFEFF; m_dataBuffer.append(byteOrderMark); } SerializeDomParam param(url, textEncoding, document); Element* documentElement = document->documentElement(); if (documentElement) buildContentForNode(documentElement, ¶m); encodeAndFlushBuffer(WebFrameSerializerClient::CurrentFrameIsFinished, ¶m, ForceFlush); } else { // Report empty contents for invalid URLs. m_client->didSerializeDataForFrame( WebCString(), WebFrameSerializerClient::CurrentFrameIsFinished); } ASSERT(m_dataBuffer.isEmpty()); return didSerialization; }
void XMLHttpRequest::send(const String& body, ExceptionCode& ec) { if (!m_doc) return; if (m_state != Open) { ec = INVALID_STATE_ERR; return; } // FIXME: Should this abort or raise an exception instead if we already have a m_loader going? if (m_loader) return; m_aborted = false; ResourceRequest request(m_url); request.setHTTPMethod(m_method); if (!body.isNull() && m_method != "GET" && m_method != "HEAD" && (m_url.protocol().lower() == "http" || m_url.protocol().lower() == "https")) { String contentType = getRequestHeader("Content-Type"); if (contentType.isEmpty()) { ExceptionCode ec = 0; setRequestHeader("Content-Type", "application/xml", ec); ASSERT(ec == 0); } // FIXME: must use xmlEncoding for documents. String charset = "UTF-8"; TextEncoding m_encoding(charset); if (!m_encoding.isValid()) // FIXME: report an error? m_encoding = UTF8Encoding(); request.setHTTPBody(PassRefPtr<FormData>(new FormData(m_encoding.encode(body.characters(), body.length())))); } if (m_requestHeaders.size() > 0) request.addHTTPHeaderFields(m_requestHeaders); if (!m_async) { Vector<char> data; ResourceError error; ResourceResponse response; { // avoid deadlock in case the loader wants to use JS on a background thread #ifdef __OWB_JS__ KJS::JSLock::DropAllLocks dropLocks; if (m_doc->frame()) m_doc->frame()->loader()->loadResourceSynchronously(request, error, response, data); #endif } m_loader = 0; if (error.isNull()) processSyncLoadResults(data, response); else ec = NETWORK_ERR; return; } // Neither this object nor the JavaScript wrapper should be deleted while // a request is in progress because we need to keep the listeners alive, // and they are referenced by the JavaScript wrapper. ref(); { #ifdef __OWB_JS__ KJS::JSLock lock; gcProtectNullTolerant(KJS::ScriptInterpreter::getDOMObject(this)); #endif } // create can return null here, for example if we're no longer attached to a page. // this is true while running onunload handlers // FIXME: Maybe create can return false for other reasons too? m_loader = SubresourceLoader::create(m_doc->frame(), this, request); }
void FileReader::convertToText() { if (!m_rawData.size()) { m_builder.clear(); return; } // Decode the data. // The File API spec says that we should use the supplied encoding if it is valid. However, we choose to ignore this // requirement in order to be consistent with how WebKit decodes the web content: always has the BOM override the // provided encoding. // FIXME: consider supporting incremental decoding to improve the perf. if (!m_decoder) m_decoder = TextResourceDecoder::create("text/plain", m_encoding.isValid() ? m_encoding : UTF8Encoding()); m_builder.clear(); m_builder.append(m_decoder->decode(&m_rawData.at(0), m_rawData.size())); if (m_state == Completed && !m_error) m_builder.append(m_decoder->flush()); }
String WebPageSerializerImpl::preActionBeforeSerializeOpenTag( const Element* element, SerializeDomParam* param, bool* needSkip) { StringBuilder result; *needSkip = false; if (param->isHTMLDocument) { // Skip the open tag of original META tag which declare charset since we // have overrided the META which have correct charset declaration after // serializing open tag of HEAD element. if (element->hasTagName(HTMLNames::metaTag)) { const HTMLMetaElement* meta = static_cast<const HTMLMetaElement*>(element); // Check whether the META tag has declared charset or not. String equiv = meta->httpEquiv(); if (equalIgnoringCase(equiv, "content-type")) { String content = meta->content(); if (content.length() && content.contains("charset", false)) { // Find META tag declared charset, we need to skip it when // serializing DOM. param->skipMetaElement = element; *needSkip = true; } } } else if (element->hasTagName(HTMLNames::htmlTag)) { // Check something before processing the open tag of HEAD element. // First we add doc type declaration if original document has it. if (!param->haveSeenDocType) { param->haveSeenDocType = true; result.append(createMarkup(param->document->doctype())); } // Add MOTW declaration before html tag. // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx. result.append(WebPageSerializer::generateMarkOfTheWebDeclaration(param->url)); } else if (element->hasTagName(HTMLNames::baseTag)) { // Comment the BASE tag when serializing dom. result.append("<!--"); } } else { // Write XML declaration. if (!param->haveAddedXMLProcessingDirective) { param->haveAddedXMLProcessingDirective = true; // Get encoding info. String xmlEncoding = param->document->xmlEncoding(); if (xmlEncoding.isEmpty()) xmlEncoding = param->document->loader()->writer()->encoding(); if (xmlEncoding.isEmpty()) xmlEncoding = UTF8Encoding().name(); result.append("<?xml version=\""); result.append(param->document->xmlVersion()); result.append("\" encoding=\""); result.append(xmlEncoding); if (param->document->xmlStandalone()) result.append("\" standalone=\"yes"); result.append("\"?>\n"); } // Add doc type declaration if original document has it. if (!param->haveSeenDocType) { param->haveSeenDocType = true; result.append(createMarkup(param->document->doctype())); } } return result.toString(); }
static bool isUnicodeEncoding(const WTF::TextEncoding* encoding) { return encoding->encodingForFormSubmission() == UTF8Encoding(); }
String decodeURLEscapeSequences(const String& string) { return decodeURLEscapeSequences(string, UTF8Encoding()); }
String String::fromUTF8(const char* string) { return UTF8Encoding().decode(string, strlen(string)); }
CString String::utf8() const { return UTF8Encoding().encode(characters(), length()); }