Esempio n. 1
0
void TxtReaderCoreUtf16::readDocument(ZLInputStream &stream) {
	const size_t BUFSIZE = 2048;
	char *buffer = new char[BUFSIZE];
	std::string str;
	size_t length;
	do {
		length = stream.read(buffer, BUFSIZE);
		char *start = buffer;
		const char *end = buffer + length;
		for (char *ptr = start; ptr < end; ptr += 2) {
			const char chr = getAscii(ptr);
			if (chr == '\n' || chr == '\r') {
				bool skipNewLine = false;
				if (chr == '\r' && ptr + 2 != end && getAscii(ptr + 2) == '\n') {
					skipNewLine = true;
					setAscii(ptr, '\n');
				}
				if (start != ptr) {
					str.erase();
					myReader.myConverter->convert(str, start, ptr + 2);
					myReader.characterDataHandler(str);
				}
				if (skipNewLine) {
					ptr += 2;
				}
				start = ptr + 2;
				myReader.newLineHandler();
			} else if (chr != 0 && isspace(chr)) {
				if (chr != '\t') {
					setAscii(ptr, ' ');
				}
			}
		}
		if (start != end) {
			str.erase();
			myReader.myConverter->convert(str, start, end);
			myReader.characterDataHandler(str);
		}
	} while (length == BUFSIZE);
	delete[] buffer;
}
Esempio n. 2
0
void KURLGooglePrivate::replaceComponents(const Replacements& replacements)
{
    url_canon::RawCanonOutputT<char> output;
    url_parse::Parsed newParsed;

    m_isValid = url_util::ReplaceComponents(utf8String().data(),
                                            utf8String().length(), m_parsed, replacements, 0, &output, &newParsed);

    m_parsed = newParsed;
    if (m_parsed.ref.is_nonempty())
        setUtf8(CString(output.data(), output.length()));
    else
        setAscii(CString(output.data(), output.length()));
}
Esempio n. 3
0
// Note: code mostly duplicated below.
void KURLGooglePrivate::init(const KURL& base, const char* rel, int relLength,
                             const TextEncoding* queryEncoding)
{
    // As a performance optimization, we do not use the charset converter if
    // encoding is UTF-8 or other Unicode encodings. Note that this is
    // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be
    // more efficient with no charset converter object because it
    // can do UTF-8 internally with no extra copies.

    // We feel free to make the charset converter object every time since it's
    // just a wrapper around a reference.
    KURLCharsetConverter charsetConverterObject(queryEncoding);
    KURLCharsetConverter* charsetConverter =
        (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 :
        &charsetConverterObject;

    url_canon::RawCanonOutputT<char> output;
    const CString& baseStr = base.m_url.utf8String();
    m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),
                                          base.m_url.m_parsed, rel, relLength,
                                          charsetConverter,
                                          &output, &m_parsed);

    // See FIXME in KURLGooglePrivate in the header. If canonicalization has not
    // changed the string, we can avoid an extra allocation by using assignment.
    //
    // When KURL encounters an error such that the URL is invalid and empty
    // (for example, resolving a relative URL on a non-hierarchical base), it
    // will produce an isNull URL, and calling setUtf8 will produce an empty
    // non-null URL. This is unlikely to affect anything, but we preserve this
    // just in case.
    if (m_isValid || output.length()) {
        // Without ref, the whole url is guaranteed to be ASCII-only.
        if (m_parsed.ref.is_nonempty())
            setUtf8(CString(output.data(), output.length()));
        else
            setAscii(CString(output.data(), output.length()));
    } else {
        // WebCore expects resolved URLs to be empty rather than NULL.
        setUtf8(CString("", 0));
    }
}
Esempio n. 4
0
// Note: code mostly duplicated above. See FIXMEs and comments there.
void KURLGooglePrivate::init(const KURL& base, const UChar* rel, int relLength,
                             const TextEncoding* queryEncoding)
{
    KURLCharsetConverter charsetConverterObject(queryEncoding);
    KURLCharsetConverter* charsetConverter =
        (!queryEncoding || isUnicodeEncoding(queryEncoding)) ? 0 :
        &charsetConverterObject;

    url_canon::RawCanonOutputT<char> output;
    const CString& baseStr = base.m_url.utf8String();
    m_isValid = url_util::ResolveRelative(baseStr.data(), baseStr.length(),
                                          base.m_url.m_parsed, rel, relLength,
                                          charsetConverter,
                                          &output, &m_parsed);


    if (m_isValid || output.length()) {
        if (m_parsed.ref.is_nonempty())
            setUtf8(CString(output.data(), output.length()));
        else
            setAscii(CString(output.data(), output.length()));
    } else
        setUtf8(CString("", 0));
}