C++ (Cpp) Codec::valid Beispiele

Programmiersprache: C++ (Cpp)

Klasse / Typ: Codec

Methode / Funktion: valid

Beispiele auf hotexamples.com: 3

C++ (Cpp) Codec::valid - 3 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die Codec::valid, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

decode(8)

serialize(8)

raw(8)

deserialize(8)

encode(7)

isNull(5)

name(4)

wellformed(3)

fromUnicode(3)

valid(3)

toUnicode(3)

type(2)

canEncode(2)

getName(2)

canDecode(2)

codeToFile(2)

error(1)

reset(1)

code(1)

compress(1)

supportedSamplerates(1)

supportedSampleFormats(1)

supportedChannelLayouts(1)

state(1)

setStreamSamplerate(1)

setSourceSamplerate(1)

setQuality(1)

setCompleteFrames(1)

setChannels(1)

setBitrate(1)

dealMsg(1)

decodeFrame(1)

generateHeader(1)

decompress(1)

messageCallback_(1)

matchedParens(1)

longName(1)

isValid(1)

dfs(1)

dumpStream(1)

getType(1)

getPixelFormat(1)

encodeToFile(1)

getMaxCompressedSize(1)

getFrameSize(1)

getFrameCount(1)

getCodecName(1)

init(1)

Beispiel #1

Datei anzeigen

static Codec * guessTextCodec( const EString & body )
{
    // step 1. try iso-2022-jp. this goes first because it's so
    // restrictive, and because 2022 strings also match the ascii and
    // utf-8 tests.
    if ( body[0] == 0x1B &&
         ( body[1] == '(' || body[1] == '$' ) &&
         ( body[2] == 'B' || body[2] == 'J' || body[2] == '@' ) ) {
        Codec * c = new Iso2022JpCodec;
        c->toUnicode( body );
        if ( c->wellformed() )
            return c;
    }

    // step 2. could it be pure ascii?
    Codec * a = new AsciiCodec;
    (void)a->toUnicode( body );
    if ( a->wellformed() )
        return a;

    // some multibyte encodings have to go before utf-8, or else utf-8
    // will match. this applies at least to iso-2002-jp, but may also
    // apply to other encodings that use octet values 0x01-0x07f
    // exclusively.

    // step 3. does it look good as utf-8?
    Codec * u = new Utf8Codec;
    (void)u->toUnicode( body );
    if ( u->wellformed() ) {
        // if it's actually ascii, return that.
        if ( a->valid() )
            return a;
        return u;
    }

    // step 4. guess a codec based on the bodypart content.
    Codec * g = Codec::byString( body );
    if ( g ) {
        // this probably isn't necessary... but it doesn't hurt to be sure.
        (void)g->toUnicode( body );
        if ( g->wellformed() )
            return g;
    }

    // step 5. is utf-8 at all plausible?
    if ( u->valid() )
        return u;
    // should we use g here if valid()?

    return 0;
}

Beispiel #2

Datei anzeigen

Bodypart * Bodypart::parseBodypart( uint start, uint end,
                                    const EString & rfc2822,
                                    Header * h, Multipart * parent )
{
    if ( rfc2822[start] == 13 )
        start++;
    if ( rfc2822[start] == 10 )
        start++;

    Bodypart * bp = new Bodypart;
    bp->setParent( parent );
    bp->setHeader( h );

    EString body;
    if ( end > start )
        body = rfc2822.mid( start, end-start );
    if ( !body.contains( '=' ) ) {
        // sometimes people send c-t-e: q-p _and_ c-t-e: 7bit or 8bit.
        // if they are equivalent we can accept it.
        uint i = 0;
        bool any = false;
        HeaderField * f = 0;
        while ( (f=h->field(HeaderField::ContentTransferEncoding,i)) != 0 ) {
            if ( ((ContentTransferEncoding*)f)->encoding() == EString::QP )
                any = true;
            i++;
        }
        if ( any && i > 1 )
            h->removeField( HeaderField::ContentTransferEncoding );
    }

    EString::Encoding e = EString::Binary;
    ContentTransferEncoding * cte = h->contentTransferEncoding();
    if ( cte )
        e = cte->encoding();
    if ( !body.isEmpty() ) {
        if ( e == EString::Base64 || e == EString::Uuencode )
            body = body.decoded( e );
        else
            body = body.crlf().decoded( e );
    }

    ContentType * ct = h->contentType();
    if ( !ct ) {
        switch ( h->defaultType() ) {
        case Header::TextPlain:
            h->add( "Content-Type", "text/plain" );
            break;
        case Header::MessageRfc822:
            h->add( "Content-Type", "message/rfc822" );
            break;
        }
        ct = h->contentType();
    }
    if ( ct->type() == "text" ) {
        bool specified = false;
        bool unknown = false;
        Codec * c = 0;

        if ( ct ) {
            EString csn = ct->parameter( "charset" );
            if ( csn.lower() == "default" )
                csn = "";
            if ( !csn.isEmpty() )
                specified = true;
            c = Codec::byName( csn );
            if ( !c )
                unknown = true;
            if ( c && c->name().lower() == "us-ascii" ) {
                // Some MTAs appear to say this in case there is no
                // Content-Type field - without checking whether the
                // body actually is ASCII. If it isn't, we'd better
                // call our charset guesser.
                (void)c->toUnicode( body );
                if ( !c->valid() )
                    specified = false;
                // Not pretty.
            }
        }

        if ( !c )
            c = new AsciiCodec;

        bp->d->hasText = true;
        bp->d->text = c->toUnicode( body.crlf() );

        if ( c->name() == "GB2312" || c->name() == "ISO-2022-JP" ||
             c->name() == "KS_C_5601-1987" ) {
            // undefined code point usage in GB2312 spam is much too
            // common. (GB2312 spam is much too common, but that's
            // another matter.) Gb2312Codec turns all undefined code
            // points into U+FFFD, so here, we can take the unicode
            // form and say it's the canonical form. when a client
            // later reads the message, it gets the text in unicode,
            // including U+FFFD.

            bool bad = !c->valid();

            // the header may contain some unencoded gb2312. we bang
            // it by hand, ignoring errors.
            List<HeaderField>::Iterator hf( h->fields() );
            while ( hf ) {
                if ( !hf->valid() &&
                     hf->type() == HeaderField::Subject ) {
                    // is it right to bang only Subject?
                    c->reset();
                    hf->setValue( c->toUnicode( hf->unparsedValue() ) );
                }
                ++hf;
            }

            // if the body was bad, we prefer the (unicode) in
            // bp->d->text and pretend it arrived as UTF-8:
            if ( bad ) {
                c = new Utf8Codec;
                body = c->fromUnicode( bp->d->text );
            }
        }

        if ( ( !specified && ( !c->wellformed() ||
                               ct->subtype() == "html" ) ) ||
             ( specified &&  ( !c->valid() ) ) ) {
            Codec * g = 0;
            if ( ct->subtype() == "html" )
                g = guessHtmlCodec( body );
            else
                g = guessTextCodec( body );
            UString guessed;
            if ( g )
                guessed = g->toUnicode( body.crlf() );
            if ( !g ) {
                // if we couldn't guess anything, keep what we had if
                // it's valid or explicitly specified, else use
                // unknown-8bit.
                if ( !specified && !c->valid() ) {
                    c = new Unknown8BitCodec;
                    bp->d->text = c->toUnicode( body.crlf() );
                }
            }
            else {
                // if we could guess something, is our guess better
                // than what we had?
                if ( g->wellformed() && !c->wellformed() ) {
                    c = g;
                    bp->d->text = guessed;
                }
            }
        }

        if ( specified && c->state() == Codec::Invalid ) {
            // the codec was specified, and the specified codec
            // resulted in an error, but did not abort conversion. we
            // respond by forgetting the error, using the conversion
            // result (probably including one or more U+FFFD) and
            // labelling the message as UTF-8.
            c = new Utf8Codec;
            body = c->fromUnicode( bp->d->text );
        }
        else if ( !specified && c->state() == Codec::Invalid ) {
            // the codec was not specified, and we couldn't find
            // anything. we call it unknown-8bit.
            c = new Unknown8BitCodec;
            bp->d->text = c->toUnicode( body );
        }

        // if we ended up using a 16-bit codec and were using q-p, we
        // need to reevaluate without any trailing CRLF
        if ( e == EString::QP && c->name().startsWith( "UTF-16" ) )
            bp->d->text = c->toUnicode( body.stripCRLF() );

        if ( !c->valid() && bp->d->error.isEmpty() ) {
            bp->d->error = "Could not convert body to Unicode";
            if ( specified ) {
                EString cs;
                if ( ct )
                    cs = ct->parameter( "charset" );
                if ( cs.isEmpty() )
                    cs = c->name();
                bp->d->error.append( " from " + cs );
            }
            if ( specified && unknown )
                bp->d->error.append( ": Character set not implemented" );
            else if ( !c->error().isEmpty() )
                bp->d->error.append( ": " + c->error() );
        }

        if ( c->name().lower() != "us-ascii" )
            ct->addParameter( "charset", c->name().lower() );
        else if ( ct )
            ct->removeParameter( "charset" );

        body = c->fromUnicode( bp->d->text );
        bool qp = body.needsQP();

        if ( cte ) {
            if ( !qp ) {
                h->removeField( HeaderField::ContentTransferEncoding );
                cte = 0;
            }
            else if ( cte->encoding() != EString::QP ) {
                cte->setEncoding( EString::QP );
            }
        }
        else if ( qp ) {
            h->add( "Content-Transfer-Encoding", "quoted-printable" );
            cte = h->contentTransferEncoding();
        }
    }
    else {
        bp->d->data = body;
        if ( ct->type() != "multipart" && ct->type() != "message" ) {
            e = EString::Base64;
            // there may be exceptions. cases where some format really
            // needs another content-transfer-encoding:
            if ( ct->type() == "application" &&
                 ct->subtype().startsWith( "pgp-" ) &&
                 !body.needsQP() ) {
                // seems some PGP things need "Version: 1" unencoded
                e = EString::Binary;
            }
            else if ( ct->type() == "application" &&
                      ct->subtype() == "octet-stream" &&
                      body.contains( "BEGIN PGP MESSAGE" ) ) {
                // mutt cannot handle PGP in base64 (what a crock)
                e = EString::Binary;
            }
            // change c-t-e to match the encoding decided above
            if ( e == EString::Binary ) {
                h->removeField( HeaderField::ContentTransferEncoding );
                cte = 0;
            }
            else if ( cte ) {
                cte->setEncoding( e );
            }
            else {
                h->add( "Content-Transfer-Encoding", "base64" );
                cte = h->contentTransferEncoding();
            }
        }
    }

    if ( ct->type() == "multipart" ) {
        parseMultipart( start, end, rfc2822,
                        ct->parameter( "boundary" ),
                        ct->subtype() == "digest",
                        bp->children(), bp, false );
    }
    else if ( ct->type() == "message" && ct->subtype() == "rfc822" ) {
        // There are sometimes blank lines before the message.
        while ( rfc2822[start] == 13 || rfc2822[start] == 10 )
            start++;
        Message * m = new Message;
        m->setParent( bp );
        m->parse( rfc2822.mid( start, end-start ) );
        List<Bodypart>::Iterator it( m->children() );
        while ( it ) {
            bp->children()->append( it );
            it->setParent( bp );
            ++it;
        }
        bp->setMessage( m );
        body = m->rfc822( false );
    }

    bp->d->numBytes = body.length();
    if ( cte )
        body = body.encoded( cte->encoding(), 72 );
    bp->d->numEncodedBytes = body.length();
    if ( bp->d->hasText ||
         ( ct->type() == "message" && ct->subtype() == "rfc822" ) ) {
        uint n = 0;
        uint i = 0;
        uint l = body.length();
        while ( i < l ) {
            if ( body[i] == '\n' )
                n++;
            i++;
        }
        if ( l && body[l-1] != '\n' )
            n++;
        bp->setNumEncodedLines( n );
    }

    h->simplify();

    return bp;
}

Beispiel #3

Datei anzeigen

static Codec * guessHtmlCodec( const EString & body )
{
    // Let's see if the general function has something for us.
    Codec * guess = guessTextCodec( body );

    // HTML prescribes that 8859-1 is the default. Let's see if 8859-1
    // works.
    if ( !guess ) {
        guess = new Iso88591Codec;
        (void)guess->toUnicode( body );
        if ( !guess->valid() )
            guess = 0;
    }

    if ( !guess ||
         ( !guess->wellformed() &&
           ( guess->name() == "ISO-8859-1" ||
             guess->name() == "ISO-8859-15" ) ) ) {
        // Some people believe that Windows codepage 1252 is
        // ISO-8859-1. Let's see if that works.
        Codec * windoze = new Cp1252Codec;
        (void)windoze->toUnicode( body );
        if ( windoze->wellformed() )
            guess = windoze;
    }


    // Some user-agents add a <meta http-equiv="content-type"> instead
    // of the Content-Type field. Maybe that exists? And if it exists,
    // is it more likely to be correct than our guess above?

    EString b = body.lower().simplified();
    int i = 0;
    while ( i >= 0 ) {
        EString tag( "<meta http-equiv=\"content-type\" content=\"" );
        i = b.find( tag, i );
        if ( i >= 0 ) {
            i = i + tag.length();
            int j = i;
            while ( j < (int)b.length() && b[j] != '"' )
                j++;
            HeaderField * hf
                = HeaderField::create( "Content-Type",
                                       b.mid( i, j-i ) );
            EString cs = ((MimeField*)hf)->parameter( "charset" );
            Codec * meta = 0;
            if ( !cs.isEmpty() )
                meta = Codec::byName( cs );
            UString m;
            if ( meta )
                m = meta->toUnicode( body );
            UString g;
            if ( guess )
                g = guess->toUnicode( body );
            if ( meta &&
                 ( ( !m.isEmpty() && m == g ) ||
                   ( meta->wellformed() &&
                     ( !guess || !guess->wellformed() ) ) ||
                   ( meta->valid() && !guess ) ||
                   ( meta->valid() && guess &&
                     guess->name() == "ISO-8859-1" ) ||
                   ( meta->valid() && guess && !guess->valid() ) ) &&
                 meta->toUnicode( b ).ascii().contains( tag ) ) {
                guess = meta;
            }
        }
    }

    return guess;
}