QString KEncodingDetector::decodeWithBuffering(const char *data, int len) { #ifdef DECODE_DEBUG kWarning() << "KEncodingDetector: decoding "<<len<<" bytes"; #endif if (d->m_writtingHappened) { #ifdef DECODE_DEBUG kWarning() << "KEncodingDetector: d->m_writtingHappened "<< d->m_codec->name(); #endif processNull(const_cast<char *>(data),len); return d->m_decoder->toUnicode(data, len); } else { if (d->m_bufferForDefferedEncDetection.isEmpty()) { // If encoding detection produced something, and we either got to the body or // actually saw the encoding explicitly, we're done. if (analyze(data,len) && (d->m_seenBody || d->isExplicitlySpecifiedEncoding())) { #ifdef DECODE_DEBUG kWarning() << "KEncodingDetector: m_writtingHappened first time "<< d->m_codec->name(); #endif processNull(const_cast<char *>(data),len); d->m_writtingHappened=true; return d->m_decoder->toUnicode(data, len); } else { #ifdef DECODE_DEBUG kWarning() << "KEncodingDetector: begin deffer"; #endif d->m_bufferForDefferedEncDetection=data; } } else { d->m_bufferForDefferedEncDetection+=data; // As above, but also limit the buffer size. We must use the entire buffer here, // since the boundaries might split the meta tag, etc. bool detected = analyze(d->m_bufferForDefferedEncDetection.constData(), d->m_bufferForDefferedEncDetection.length()); if ((detected && (d->m_seenBody || d->isExplicitlySpecifiedEncoding())) || d->m_bufferForDefferedEncDetection.length() > MAX_BUFFER) { d->m_writtingHappened=true; d->m_bufferForDefferedEncDetection.replace('\0',' '); QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection)); d->m_bufferForDefferedEncDetection.clear(); #ifdef DECODE_DEBUG kWarning() << "KEncodingDetector: m_writtingHappened in the middle " << d->m_codec->name(); #endif return result; } } } return QString(); }
QString KEncodingDetector::decode(const QByteArray &data) { processNull(const_cast<char *>(data.data()), data.size()); if (!d->m_analyzeCalled) { analyze(data.data(), data.size()); d->m_analyzeCalled = true; } return d->m_decoder->toUnicode(data); }
QString KEncodingDetector::decode(const char *data, int len) { processNull(const_cast<char *>(data), len); if (!d->m_analyzeCalled) { analyze(data, len); d->m_analyzeCalled = true; } return d->m_decoder->toUnicode(data, len); }
/** * open file, read first chunk of data, detect eol */ bool open () { if (m_file.open (IO_ReadOnly)) { int c = m_file.readBlock (m_buffer.data(), m_buffer.size()); if (c > 0) { // fix utf16 LE, stolen from tdehtml ;) if ((c >= 2) && (m_codec->mibEnum() == 1000) && (m_buffer[1] == 0x00)) { // utf16LE, we need to put the decoder in LE mode char reverseUtf16[3] = {0xFF, 0xFE, 0x00}; m_decoder->toUnicode(reverseUtf16, 2); } processNull (c); m_text = m_decoder->toUnicode (m_buffer, c); } m_eof = (c == -1) || (c == 0) || (m_text.length() == 0) || m_file.atEnd(); for (uint i=0; i < m_text.length(); i++) { if (m_text[i] == '\n') { m_eol = KateDocumentConfig::eolUnix; break; } else if ((m_text[i] == '\r')) { if (((i+1) < m_text.length()) && (m_text[i+1] == '\n')) { m_eol = KateDocumentConfig::eolDos; break; } else { m_eol = KateDocumentConfig::eolMac; break; } } } return true; } return false; }
// read a line, return length + offset in unicode data void readLine (uint &offset, uint &length) { length = 0; offset = 0; while (m_position <= m_text.length()) { if (m_position == m_text.length()) { // try to load more text if something is around if (!m_eof) { int c = m_file.readBlock (m_buffer.data(), m_buffer.size()); uint readString = 0; if (c > 0) { processNull (c); TQString str (m_decoder->toUnicode (m_buffer, c)); readString = str.length(); m_text = m_text.mid (m_lastLineStart, m_position-m_lastLineStart) + str; } else m_text = m_text.mid (m_lastLineStart, m_position-m_lastLineStart); // is file completly read ? m_eof = (c == -1) || (c == 0) || (readString == 0) || m_file.atEnd(); // recalc current pos and last pos m_position -= m_lastLineStart; m_lastLineStart = 0; } // oh oh, end of file, escape ! if (m_eof && (m_position == m_text.length())) { lastWasEndOfLine = false; // line data offset = m_lastLineStart; length = m_position-m_lastLineStart; m_lastLineStart = m_position; return; } } if (m_text[m_position] == '\n') { lastWasEndOfLine = true; if (lastWasR) { m_lastLineStart++; lastWasR = false; } else { // line data offset = m_lastLineStart; length = m_position-m_lastLineStart; m_lastLineStart = m_position+1; m_position++; return; } } else if (m_text[m_position] == '\r') { lastWasEndOfLine = true; lastWasR = true; // line data offset = m_lastLineStart; length = m_position-m_lastLineStart; m_lastLineStart = m_position+1; m_position++; return; } else { lastWasEndOfLine = false; lastWasR = false; } m_position++; } }