Ejemplo n.º 1
0
QString KEncodingDetector::decodeWithBuffering(const char *data, int len)
{
#ifdef DECODE_DEBUG
        kWarning() << "KEncodingDetector: decoding "<<len<<" bytes";
#endif
    if (d->m_writtingHappened)
    {
#ifdef DECODE_DEBUG
        kWarning() << "KEncodingDetector: d->m_writtingHappened "<< d->m_codec->name();
#endif
        processNull(const_cast<char *>(data),len);
        return d->m_decoder->toUnicode(data, len);
    }
    else
    {
        if (d->m_bufferForDefferedEncDetection.isEmpty())
        {
            // If encoding detection produced something, and we either got to the body or
            // actually saw the encoding explicitly, we're done.
            if (analyze(data,len) && (d->m_seenBody || d->isExplicitlySpecifiedEncoding()))
            {
#ifdef DECODE_DEBUG
                kWarning() << "KEncodingDetector: m_writtingHappened first time "<< d->m_codec->name();
#endif
                processNull(const_cast<char *>(data),len);
                d->m_writtingHappened=true;
                return d->m_decoder->toUnicode(data, len);
            }
            else
            {
#ifdef DECODE_DEBUG
                kWarning() << "KEncodingDetector: begin deffer";
#endif
                d->m_bufferForDefferedEncDetection=data;
            }
        }
        else
        {
            d->m_bufferForDefferedEncDetection+=data;
            // As above, but also limit the buffer size. We must use the entire buffer here,
            // since the boundaries might split the meta tag, etc.
            bool detected = analyze(d->m_bufferForDefferedEncDetection.constData(), d->m_bufferForDefferedEncDetection.length());
            if ((detected && (d->m_seenBody || d->isExplicitlySpecifiedEncoding())) ||
                 d->m_bufferForDefferedEncDetection.length() > MAX_BUFFER)
            {
                d->m_writtingHappened=true;
                d->m_bufferForDefferedEncDetection.replace('\0',' ');
                QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection));
                d->m_bufferForDefferedEncDetection.clear();
#ifdef DECODE_DEBUG
                kWarning() << "KEncodingDetector: m_writtingHappened in the middle " << d->m_codec->name();
#endif
                return result;
            }
        }
    }

    return QString();
}
Ejemplo n.º 2
0
QString KEncodingDetector::decode(const QByteArray &data)
{
    processNull(const_cast<char *>(data.data()), data.size());
    if (!d->m_analyzeCalled) {
        analyze(data.data(), data.size());
        d->m_analyzeCalled = true;
    }

    return d->m_decoder->toUnicode(data);
}
Ejemplo n.º 3
0
QString KEncodingDetector::decode(const char *data, int len)
{
    processNull(const_cast<char *>(data), len);
    if (!d->m_analyzeCalled) {
        analyze(data, len);
        d->m_analyzeCalled = true;
    }

    return d->m_decoder->toUnicode(data, len);
}
Ejemplo n.º 4
0
    /**
     * open file, read first chunk of data, detect eol
     */
    bool open ()
    {
      if (m_file.open (IO_ReadOnly))
      {
        int c = m_file.readBlock (m_buffer.data(), m_buffer.size());

        if (c > 0)
        {
          // fix utf16 LE, stolen from tdehtml ;)
          if ((c >= 2) && (m_codec->mibEnum() == 1000) && (m_buffer[1] == 0x00))
          {
            // utf16LE, we need to put the decoder in LE mode
            char reverseUtf16[3] = {0xFF, 0xFE, 0x00};
            m_decoder->toUnicode(reverseUtf16, 2);
          }

          processNull (c);
          m_text = m_decoder->toUnicode (m_buffer, c);
        }

        m_eof = (c == -1) || (c == 0) || (m_text.length() == 0) || m_file.atEnd();

        for (uint i=0; i < m_text.length(); i++)
        {
          if (m_text[i] == '\n')
          {
            m_eol = KateDocumentConfig::eolUnix;
            break;
          }
          else if ((m_text[i] == '\r'))
          {
            if (((i+1) < m_text.length()) && (m_text[i+1] == '\n'))
            {
              m_eol = KateDocumentConfig::eolDos;
              break;
            }
            else
            {
              m_eol = KateDocumentConfig::eolMac;
              break;
            }
          }
        }

        return true;
      }

      return false;
    }
Ejemplo n.º 5
0
    // read a line, return length + offset in unicode data
    void readLine (uint &offset, uint &length)
    {
      length = 0;
      offset = 0;

      while (m_position <= m_text.length())
      {
        if (m_position == m_text.length())
        {
          // try to load more text if something is around
          if (!m_eof)
          {
            int c = m_file.readBlock (m_buffer.data(), m_buffer.size());

            uint readString = 0;
            if (c > 0)
            {
              processNull (c);

              TQString str (m_decoder->toUnicode (m_buffer, c));
              readString = str.length();

              m_text = m_text.mid (m_lastLineStart, m_position-m_lastLineStart)
                       + str;
            }
            else
              m_text = m_text.mid (m_lastLineStart, m_position-m_lastLineStart);

            // is file completly read ?
            m_eof = (c == -1) || (c == 0) || (readString == 0) || m_file.atEnd();

            // recalc current pos and last pos
            m_position -= m_lastLineStart;
            m_lastLineStart = 0;
          }

          // oh oh, end of file, escape !
          if (m_eof && (m_position == m_text.length()))
          {
            lastWasEndOfLine = false;

            // line data
            offset = m_lastLineStart;
            length = m_position-m_lastLineStart;

            m_lastLineStart = m_position;

            return;
          }
        }

        if (m_text[m_position] == '\n')
        {
          lastWasEndOfLine = true;

          if (lastWasR)
          {
            m_lastLineStart++;
            lastWasR = false;
          }
          else
          {
            // line data
            offset = m_lastLineStart;
            length = m_position-m_lastLineStart;

            m_lastLineStart = m_position+1;
            m_position++;

            return;
          }
        }
        else if (m_text[m_position] == '\r')
        {
          lastWasEndOfLine = true;
          lastWasR = true;

          // line data
          offset = m_lastLineStart;
          length = m_position-m_lastLineStart;

          m_lastLineStart = m_position+1;
          m_position++;

          return;
        }
        else
        {
          lastWasEndOfLine = false;
          lastWasR = false;
        }

        m_position++;
      }
    }