C++ (Cpp) Codec::name 예제들

프로그래밍 언어: C++ (Cpp)

클래스/타입: Codec

메소드/함수: name

hotexamples.com에서의 예제들: 4

C++ (Cpp) Codec::name - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C++ (Cpp)의 Codec::name에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

decode(8)

serialize(8)

raw(8)

deserialize(8)

encode(7)

isNull(5)

name(4)

wellformed(3)

fromUnicode(3)

valid(3)

toUnicode(3)

type(2)

canEncode(2)

getName(2)

canDecode(2)

codeToFile(2)

error(1)

reset(1)

code(1)

compress(1)

supportedSamplerates(1)

supportedSampleFormats(1)

supportedChannelLayouts(1)

state(1)

setStreamSamplerate(1)

setSourceSamplerate(1)

setQuality(1)

setCompleteFrames(1)

setChannels(1)

setBitrate(1)

dealMsg(1)

decodeFrame(1)

generateHeader(1)

decompress(1)

messageCallback_(1)

matchedParens(1)

longName(1)

isValid(1)

dfs(1)

dumpStream(1)

getType(1)

getPixelFormat(1)

encodeToFile(1)

getMaxCompressedSize(1)

getFrameSize(1)

getFrameCount(1)

getCodecName(1)

init(1)

예제 #1

파일 보기

파일: mimefields.cpp 프로젝트: copernicus/aox

void ContentType::parse( const EString &s )
{
    EmailParser p( s );
    p.whitespace();
    while ( p.present( ":" ) )
        p.whitespace();

    bool mustGuess = false;

    if ( p.atEnd() ) {
        t = "text";
        st = "plain";
    }
    else {
        uint x = p.mark();
        if ( p.nextChar() == '/' )
            mustGuess = true;
        else
            t = p.mimeToken().lower();
        if ( p.atEnd() ) {
            if ( s == "text" ) {
                t = "text"; // elm? mailtool? someone does this, anyway.
                st = "plain";
            }
            // the remainder is from RFC 1049
            else if ( s == "postscript" ) {
                t = "application";
                st = "postscript";
            }
            else if ( s == "postscript" ) {
                t = "application";
                st = "postscript";
            }
            else if ( s == "sgml" ) {
                t = "text";
                st = "sgml";
            }
            else if ( s == "tex" ) {
                t = "application";
                st = "x-tex";
            }
            else if ( s == "troff" ) {
                t = "application";
                st = "x-troff";
            }
            else if ( s == "dvi" ) {
                t = "application";
                st = "x-dvi";
            }
            else if ( s.startsWith( "x-" ) ) {
                st = "x-rfc1049-" + s;
                t = "application";
            }
            else {
                // scribe and undefined types
                setError( "Invalid Content-Type: " + s.quoted() );
            }
        }
        else {
            if ( p.nextChar() == '/' ) {
                p.step();
                if ( !p.atEnd() || p.nextChar() != ';' )
                    st = p.mimeToken().lower();
                if ( st.isEmpty() )
                    mustGuess = true;
            }
            else if ( p.nextChar() == '=' ) {
                // oh no. someone skipped the content-type and
                // supplied only some parameters. we'll assume it's
                // text/plain and parse the parameters.
                t = "text";
                st = "plain";
                p.restore( x );
                mustGuess = true;
            }
            else {
                addParameter( "original-type", t + "/" + st );
                t = "application";
                st = "octet-stream";
                mustGuess = true;
            }
            parseParameters( &p );
        }
    }

    if ( mustGuess ) {
        EString fn = parameter( "name" );
        if ( fn.isEmpty() )
            fn = parameter( "filename" );
        while ( fn.endsWith( "." ) )
            fn.truncate( fn.length() - 1 );
        fn = fn.lower();
        if ( fn.endsWith( "jpg" ) || fn.endsWith( "jpeg" ) ) {
            t = "image";
            st = "jpeg";
        }
        else if ( fn.endsWith( "htm" ) || fn.endsWith( "html" ) ) {
            t = "text";
            st = "html";
        }
        else if ( fn.isEmpty() && st.isEmpty() && t == "text" ) {
            st = "plain";
        }
        else if ( t == "text" ) {
            addParameter( "original-type", t + "/" + st );
            st = "plain";
        }
        else {
            addParameter( "original-type", t + "/" + st );
            t = "application";
            st = "octet-stream";
        }
    }

    if ( t.isEmpty() || st.isEmpty() )
        setError( "Both type and subtype must be nonempty: " + s.quoted() );

    if ( valid() && t == "multipart" && st == "appledouble" &&
         parameter( "boundary" ).isEmpty() ) {
        // some people send appledouble without the header. what can
        // we do? let's just call it application/octet-stream. whoever
        // wants to decode can try, or reply.
        t = "application";
        st = "octet-steam";
    }

    if ( valid() && !p.atEnd() &&
         t == "multipart" && parameter( "boundary" ).isEmpty() &&
         s.lower().containsWord( "boundary" ) ) {
        EmailParser csp( s.mid( s.lower().find( "boundary" ) ) );
        csp.require( "boundary" );
        csp.whitespace();
        if ( csp.present( "=" ) )
            csp.whitespace();
        uint m = csp.mark();
        EString b = csp.string();
        if ( b.isEmpty() || !csp.ok() ) {
            csp.restore( m );
            b = csp.input().mid( csp.pos() ).section( ";", 1 ).simplified();
            if ( !b.isQuoted() )
                b.replace( "\\", "" );
            if ( b.isQuoted() )
                b = b.unquoted();
            else if ( b.isQuoted( '\'' ) )
                b = b.unquoted( '\'' );
        }
        if ( !b.isEmpty() )
            addParameter( "boundary", b );
    }

    if ( valid() && t == "multipart" && parameter( "boundary" ).isEmpty() )
        setError( "Multipart entities must have a boundary parameter." );

    if ( !parameter( "charset" ).isEmpty() ) {
        Codec * c = Codec::byName( parameter( "charset" ) );
        if ( c ) {
            EString cs = c->name().lower();
            if ( t == "text" && cs == "us-ascii" )
                removeParameter( "charset" );
            else if ( cs != parameter( "charset" ).lower() )
                addParameter( "charset", cs );
        }
    }

    if ( valid() && !p.atEnd() &&
         t == "text" && parameter( "charset" ).isEmpty() &&
         s.mid( p.pos() ).lower().containsWord( "charset" ) ) {
        EmailParser csp( s.mid( s.lower().find( "charset" ) ) );
        csp.require( "charset" );
        csp.whitespace();
        if ( csp.present( "=" ) )
            csp.whitespace();
        Codec * c = Codec::byName( csp.dotAtom() );
        if ( c )
            addParameter( "charset", c->name().lower() );
    }

    if ( !valid() )
        setUnparsedValue( s );
}

예제 #2

파일 보기

Bodypart * Bodypart::parseBodypart( uint start, uint end,
                                    const EString & rfc2822,
                                    Header * h, Multipart * parent )
{
    if ( rfc2822[start] == 13 )
        start++;
    if ( rfc2822[start] == 10 )
        start++;

    Bodypart * bp = new Bodypart;
    bp->setParent( parent );
    bp->setHeader( h );

    EString body;
    if ( end > start )
        body = rfc2822.mid( start, end-start );
    if ( !body.contains( '=' ) ) {
        // sometimes people send c-t-e: q-p _and_ c-t-e: 7bit or 8bit.
        // if they are equivalent we can accept it.
        uint i = 0;
        bool any = false;
        HeaderField * f = 0;
        while ( (f=h->field(HeaderField::ContentTransferEncoding,i)) != 0 ) {
            if ( ((ContentTransferEncoding*)f)->encoding() == EString::QP )
                any = true;
            i++;
        }
        if ( any && i > 1 )
            h->removeField( HeaderField::ContentTransferEncoding );
    }

    EString::Encoding e = EString::Binary;
    ContentTransferEncoding * cte = h->contentTransferEncoding();
    if ( cte )
        e = cte->encoding();
    if ( !body.isEmpty() ) {
        if ( e == EString::Base64 || e == EString::Uuencode )
            body = body.decoded( e );
        else
            body = body.crlf().decoded( e );
    }

    ContentType * ct = h->contentType();
    if ( !ct ) {
        switch ( h->defaultType() ) {
        case Header::TextPlain:
            h->add( "Content-Type", "text/plain" );
            break;
        case Header::MessageRfc822:
            h->add( "Content-Type", "message/rfc822" );
            break;
        }
        ct = h->contentType();
    }
    if ( ct->type() == "text" ) {
        bool specified = false;
        bool unknown = false;
        Codec * c = 0;

        if ( ct ) {
            EString csn = ct->parameter( "charset" );
            if ( csn.lower() == "default" )
                csn = "";
            if ( !csn.isEmpty() )
                specified = true;
            c = Codec::byName( csn );
            if ( !c )
                unknown = true;
            if ( c && c->name().lower() == "us-ascii" ) {
                // Some MTAs appear to say this in case there is no
                // Content-Type field - without checking whether the
                // body actually is ASCII. If it isn't, we'd better
                // call our charset guesser.
                (void)c->toUnicode( body );
                if ( !c->valid() )
                    specified = false;
                // Not pretty.
            }
        }

        if ( !c )
            c = new AsciiCodec;

        bp->d->hasText = true;
        bp->d->text = c->toUnicode( body.crlf() );

        if ( c->name() == "GB2312" || c->name() == "ISO-2022-JP" ||
             c->name() == "KS_C_5601-1987" ) {
            // undefined code point usage in GB2312 spam is much too
            // common. (GB2312 spam is much too common, but that's
            // another matter.) Gb2312Codec turns all undefined code
            // points into U+FFFD, so here, we can take the unicode
            // form and say it's the canonical form. when a client
            // later reads the message, it gets the text in unicode,
            // including U+FFFD.

            bool bad = !c->valid();

            // the header may contain some unencoded gb2312. we bang
            // it by hand, ignoring errors.
            List<HeaderField>::Iterator hf( h->fields() );
            while ( hf ) {
                if ( !hf->valid() &&
                     hf->type() == HeaderField::Subject ) {
                    // is it right to bang only Subject?
                    c->reset();
                    hf->setValue( c->toUnicode( hf->unparsedValue() ) );
                }
                ++hf;
            }

            // if the body was bad, we prefer the (unicode) in
            // bp->d->text and pretend it arrived as UTF-8:
            if ( bad ) {
                c = new Utf8Codec;
                body = c->fromUnicode( bp->d->text );
            }
        }

        if ( ( !specified && ( !c->wellformed() ||
                               ct->subtype() == "html" ) ) ||
             ( specified &&  ( !c->valid() ) ) ) {
            Codec * g = 0;
            if ( ct->subtype() == "html" )
                g = guessHtmlCodec( body );
            else
                g = guessTextCodec( body );
            UString guessed;
            if ( g )
                guessed = g->toUnicode( body.crlf() );
            if ( !g ) {
                // if we couldn't guess anything, keep what we had if
                // it's valid or explicitly specified, else use
                // unknown-8bit.
                if ( !specified && !c->valid() ) {
                    c = new Unknown8BitCodec;
                    bp->d->text = c->toUnicode( body.crlf() );
                }
            }
            else {
                // if we could guess something, is our guess better
                // than what we had?
                if ( g->wellformed() && !c->wellformed() ) {
                    c = g;
                    bp->d->text = guessed;
                }
            }
        }

        if ( specified && c->state() == Codec::Invalid ) {
            // the codec was specified, and the specified codec
            // resulted in an error, but did not abort conversion. we
            // respond by forgetting the error, using the conversion
            // result (probably including one or more U+FFFD) and
            // labelling the message as UTF-8.
            c = new Utf8Codec;
            body = c->fromUnicode( bp->d->text );
        }
        else if ( !specified && c->state() == Codec::Invalid ) {
            // the codec was not specified, and we couldn't find
            // anything. we call it unknown-8bit.
            c = new Unknown8BitCodec;
            bp->d->text = c->toUnicode( body );
        }

        // if we ended up using a 16-bit codec and were using q-p, we
        // need to reevaluate without any trailing CRLF
        if ( e == EString::QP && c->name().startsWith( "UTF-16" ) )
            bp->d->text = c->toUnicode( body.stripCRLF() );

        if ( !c->valid() && bp->d->error.isEmpty() ) {
            bp->d->error = "Could not convert body to Unicode";
            if ( specified ) {
                EString cs;
                if ( ct )
                    cs = ct->parameter( "charset" );
                if ( cs.isEmpty() )
                    cs = c->name();
                bp->d->error.append( " from " + cs );
            }
            if ( specified && unknown )
                bp->d->error.append( ": Character set not implemented" );
            else if ( !c->error().isEmpty() )
                bp->d->error.append( ": " + c->error() );
        }

        if ( c->name().lower() != "us-ascii" )
            ct->addParameter( "charset", c->name().lower() );
        else if ( ct )
            ct->removeParameter( "charset" );

        body = c->fromUnicode( bp->d->text );
        bool qp = body.needsQP();

        if ( cte ) {
            if ( !qp ) {
                h->removeField( HeaderField::ContentTransferEncoding );
                cte = 0;
            }
            else if ( cte->encoding() != EString::QP ) {
                cte->setEncoding( EString::QP );
            }
        }
        else if ( qp ) {
            h->add( "Content-Transfer-Encoding", "quoted-printable" );
            cte = h->contentTransferEncoding();
        }
    }
    else {
        bp->d->data = body;
        if ( ct->type() != "multipart" && ct->type() != "message" ) {
            e = EString::Base64;
            // there may be exceptions. cases where some format really
            // needs another content-transfer-encoding:
            if ( ct->type() == "application" &&
                 ct->subtype().startsWith( "pgp-" ) &&
                 !body.needsQP() ) {
                // seems some PGP things need "Version: 1" unencoded
                e = EString::Binary;
            }
            else if ( ct->type() == "application" &&
                      ct->subtype() == "octet-stream" &&
                      body.contains( "BEGIN PGP MESSAGE" ) ) {
                // mutt cannot handle PGP in base64 (what a crock)
                e = EString::Binary;
            }
            // change c-t-e to match the encoding decided above
            if ( e == EString::Binary ) {
                h->removeField( HeaderField::ContentTransferEncoding );
                cte = 0;
            }
            else if ( cte ) {
                cte->setEncoding( e );
            }
            else {
                h->add( "Content-Transfer-Encoding", "base64" );
                cte = h->contentTransferEncoding();
            }
        }
    }

    if ( ct->type() == "multipart" ) {
        parseMultipart( start, end, rfc2822,
                        ct->parameter( "boundary" ),
                        ct->subtype() == "digest",
                        bp->children(), bp, false );
    }
    else if ( ct->type() == "message" && ct->subtype() == "rfc822" ) {
        // There are sometimes blank lines before the message.
        while ( rfc2822[start] == 13 || rfc2822[start] == 10 )
            start++;
        Message * m = new Message;
        m->setParent( bp );
        m->parse( rfc2822.mid( start, end-start ) );
        List<Bodypart>::Iterator it( m->children() );
        while ( it ) {
            bp->children()->append( it );
            it->setParent( bp );
            ++it;
        }
        bp->setMessage( m );
        body = m->rfc822( false );
    }

    bp->d->numBytes = body.length();
    if ( cte )
        body = body.encoded( cte->encoding(), 72 );
    bp->d->numEncodedBytes = body.length();
    if ( bp->d->hasText ||
         ( ct->type() == "message" && ct->subtype() == "rfc822" ) ) {
        uint n = 0;
        uint i = 0;
        uint l = body.length();
        while ( i < l ) {
            if ( body[i] == '\n' )
                n++;
            i++;
        }
        if ( l && body[l-1] != '\n' )
            n++;
        bp->setNumEncodedLines( n );
    }

    h->simplify();

    return bp;
}

예제 #3

파일 보기

파일: api2-decode-encode-audio.cpp 프로젝트: adarovsky/avcpp

int main(int argc, char **argv)
{
    if (argc < 3)
        return 1;

    av::init();
    av::setFFmpegLoggingLevel(AV_LOG_TRACE);

    string uri (argv[1]);
    string out (argv[2]);

    ssize_t      audioStream = -1;
    AudioDecoderContext adec;
    Stream       ast;
    error_code   ec;

    int count = 0;

    {

        //
        // INPUT
        //
        FormatContext ictx;

        ictx.openInput(uri, ec);
        if (ec) {
            cerr << "Can't open input\n";
            return 1;
        }

        ictx.findStreamInfo();

        for (size_t i = 0; i < ictx.streamsCount(); ++i) {
            auto st = ictx.stream(i);
            if (st.isAudio()) {
                audioStream = i;
                ast = st;
                break;
            }
        }

        cerr << audioStream << endl;

        if (ast.isNull()) {
            cerr << "Audio stream not found\n";
            return 1;
        }

        if (ast.isValid()) {
            adec = AudioDecoderContext(ast);

            //Codec codec = findDecodingCodec(adec.raw()->codec_id);

            //adec.setCodec(codec);
            //adec.setRefCountedFrames(true);

            adec.open(ec);

            if (ec) {
                cerr << "Can't open codec\n";
                return 1;
            }
        }

        //
        // OUTPUT
        //
        OutputFormat  ofmt;
        FormatContext octx;

        ofmt = av::guessOutputFormat(out, out);
        clog << "Output format: " << ofmt.name() << " / " << ofmt.longName() << '\n';
        octx.setFormat(ofmt);

        Codec        ocodec = av::findEncodingCodec(ofmt, false);
        Stream      ost    = octx.addStream(ocodec);
        AudioEncoderContext enc (ost);

        clog << ocodec.name() << " / " << ocodec.longName() << ", audio: " << (ocodec.type()==AVMEDIA_TYPE_AUDIO) << '\n';

        auto sampleFmts  = ocodec.supportedSampleFormats();
        auto sampleRates = ocodec.supportedSamplerates();
        auto layouts     = ocodec.supportedChannelLayouts();

        clog << "Supported sample formats:\n";
        for (const auto &fmt : sampleFmts) {
            clog << "  " << av_get_sample_fmt_name(fmt) << '\n';
        }

        clog << "Supported sample rates:\n";
        for (const auto &rate : sampleRates) {
            clog << "  " << rate << '\n';
        }

        clog << "Supported sample layouts:\n";
        for (const auto &lay : layouts) {
            char buf[128] = {0};
            av_get_channel_layout_string(buf,
                                         sizeof(buf),
                                         av_get_channel_layout_nb_channels(lay),
                                         lay);

            clog << "  " << buf << '\n';
        }

        //return 0;


        // Settings
#if 1
        enc.setSampleRate(48000);
        enc.setSampleFormat(sampleFmts[0]);
        // Layout
        //enc.setChannelLayout(adec.channelLayout());
        enc.setChannelLayout(AV_CH_LAYOUT_STEREO);
        //enc.setChannelLayout(AV_CH_LAYOUT_MONO);
        enc.setTimeBase(Rational(1, enc.sampleRate()));
        enc.setBitRate(adec.bitRate());
#else
        enc.setSampleRate(adec.sampleRate());
        enc.setSampleFormat(adec.sampleFormat());
        enc.setChannelLayout(adec.channelLayout());
        enc.setTimeBase(adec.timeBase());
        enc.setBitRate(adec.bitRate());
#endif

        octx.openOutput(out, ec);
        if (ec) {
            cerr << "Can't open output\n";
            return 1;
        }

        enc.open(ec);
        if (ec) {
            cerr << "Can't open encoder\n";
            return 1;
        }

        clog << "Encoder frame size: " << enc.frameSize() << '\n';

        octx.dump();
        octx.writeHeader();
        octx.flush();

        //
        // RESAMPLER
        //
        AudioResampler resampler(enc.channelLayout(),  enc.sampleRate(),  enc.sampleFormat(),
                                 adec.channelLayout(), adec.sampleRate(), adec.sampleFormat());

        //
        // PROCESS
        //
        while (true) {
            Packet pkt = ictx.readPacket(ec);
            if (ec)
            {
                clog << "Packet reading error: " << ec << ", " << ec.message() << endl;
                break;
            }

            if (pkt.streamIndex() != audioStream) {
                continue;
            }

            clog << "Read packet: isNull=" << (bool)!pkt << ", " << pkt.pts() << "(nopts:" << pkt.pts().isNoPts() << ")" << " / " << pkt.pts().seconds() << " / " << pkt.timeBase() << " / st: " << pkt.streamIndex() << endl;
#if 0
            if (pkt.pts() == av::NoPts && pkt.timeBase() == Rational())
            {
                clog << "Skip invalid timestamp packet: data=" << (void*)pkt.data()
                     << ", size=" << pkt.size()
                     << ", flags=" << pkt.flags() << " (corrupt:" << (pkt.flags() & AV_PKT_FLAG_CORRUPT) << ";key:" << (pkt.flags() & AV_PKT_FLAG_KEY) << ")"
                     << ", side_data=" << (void*)pkt.raw()->side_data
                     << ", side_data_count=" << pkt.raw()->side_data_elems
                     << endl;
                //continue;
            }
#endif

            auto samples = adec.decode(pkt, ec);
            count++;
            //if (count > 200)
            //    break;

            if (ec) {
                cerr << "Decode error: " << ec << ", " << ec.message() << endl;
                return 1;
            } else if (!samples) {
                cerr << "Empty samples set\n";

                //if (!pkt) // decoder flushed here
                //   break;
                //continue;
            }

            clog << "  Samples [in]: " << samples.samplesCount()
                 << ", ch: " << samples.channelsCount()
                 << ", freq: " << samples.sampleRate()
                 << ", name: " << samples.channelsLayoutString()
                 << ", pts: " << samples.pts().seconds()
                 << ", ref=" << samples.isReferenced() << ":" << samples.refCount()
                 << endl;

            // Empty samples set should not be pushed to the resampler, but it is valid case for the
            // end of reading: during samples empty, some cached data can be stored at the resampler
            // internal buffer, so we should consume it.
            if (samples)
            {
                resampler.push(samples, ec);
                if (ec) {
                    clog << "Resampler push error: " << ec << ", text: " << ec.message() << endl;
                    continue;
                }
            }

            // Pop resampler data
            bool getAll = !samples;
            while (true) {
                AudioSamples ouSamples(enc.sampleFormat(),
                                       enc.frameSize(),
                                       enc.channelLayout(),
                                       enc.sampleRate());

                // Resample:
                bool hasFrame = resampler.pop(ouSamples, getAll, ec);
                if (ec) {
                    clog << "Resampling status: " << ec << ", text: " << ec.message() << endl;
                    break;
                } else if (!hasFrame) {
                    break;
                } else
                    clog << "  Samples [ou]: " << ouSamples.samplesCount()
                         << ", ch: " << ouSamples.channelsCount()
                         << ", freq: " << ouSamples.sampleRate()
                         << ", name: " << ouSamples.channelsLayoutString()
                         << ", pts: " << ouSamples.pts().seconds()
                         << ", ref=" << ouSamples.isReferenced() << ":" << ouSamples.refCount()
                         << endl;

                // ENCODE
                ouSamples.setStreamIndex(0);
                ouSamples.setTimeBase(enc.timeBase());

                Packet opkt = enc.encode(ouSamples, ec);
                if (ec) {
                    cerr << "Encoding error: " << ec << ", " << ec.message() << endl;
                    return 1;
                } else if (!opkt) {
                    //cerr << "Empty packet\n";
                    continue;
                }

                opkt.setStreamIndex(0);

                clog << "Write packet: pts=" << opkt.pts() << ", dts=" << opkt.dts() << " / " << opkt.pts().seconds() << " / " << opkt.timeBase() << " / st: " << opkt.streamIndex() << endl;

                octx.writePacket(opkt, ec);
                if (ec) {
                    cerr << "Error write packet: " << ec << ", " << ec.message() << endl;
                    return 1;
                }
            }

            // For the first packets samples can be empty: decoder caching
            if (!pkt && !samples)
                break;
        }

        //
        // Is resampler flushed?
        //
        cerr << "Delay: " << resampler.delay() << endl;

        //
        // Flush encoder queue
        //
        clog << "Flush encoder:\n";
        while (true) {
            AudioSamples null(nullptr);
            Packet        opkt = enc.encode(null, ec);
            if (ec || !opkt)
                break;

            opkt.setStreamIndex(0);

            clog << "Write packet: pts=" << opkt.pts() << ", dts=" << opkt.dts() << " / " << opkt.pts().seconds() << " / " << opkt.timeBase() << " / st: " << opkt.streamIndex() << endl;

            octx.writePacket(opkt, ec);
            if (ec) {
                cerr << "Error write packet: " << ec << ", " << ec.message() << endl;
                return 1;
            }

        }

        octx.flush();
        octx.writeTrailer();
    }
}

예제 #4

파일 보기

static Codec * guessHtmlCodec( const EString & body )
{
    // Let's see if the general function has something for us.
    Codec * guess = guessTextCodec( body );

    // HTML prescribes that 8859-1 is the default. Let's see if 8859-1
    // works.
    if ( !guess ) {
        guess = new Iso88591Codec;
        (void)guess->toUnicode( body );
        if ( !guess->valid() )
            guess = 0;
    }

    if ( !guess ||
         ( !guess->wellformed() &&
           ( guess->name() == "ISO-8859-1" ||
             guess->name() == "ISO-8859-15" ) ) ) {
        // Some people believe that Windows codepage 1252 is
        // ISO-8859-1. Let's see if that works.
        Codec * windoze = new Cp1252Codec;
        (void)windoze->toUnicode( body );
        if ( windoze->wellformed() )
            guess = windoze;
    }


    // Some user-agents add a <meta http-equiv="content-type"> instead
    // of the Content-Type field. Maybe that exists? And if it exists,
    // is it more likely to be correct than our guess above?

    EString b = body.lower().simplified();
    int i = 0;
    while ( i >= 0 ) {
        EString tag( "<meta http-equiv=\"content-type\" content=\"" );
        i = b.find( tag, i );
        if ( i >= 0 ) {
            i = i + tag.length();
            int j = i;
            while ( j < (int)b.length() && b[j] != '"' )
                j++;
            HeaderField * hf
                = HeaderField::create( "Content-Type",
                                       b.mid( i, j-i ) );
            EString cs = ((MimeField*)hf)->parameter( "charset" );
            Codec * meta = 0;
            if ( !cs.isEmpty() )
                meta = Codec::byName( cs );
            UString m;
            if ( meta )
                m = meta->toUnicode( body );
            UString g;
            if ( guess )
                g = guess->toUnicode( body );
            if ( meta &&
                 ( ( !m.isEmpty() && m == g ) ||
                   ( meta->wellformed() &&
                     ( !guess || !guess->wellformed() ) ) ||
                   ( meta->valid() && !guess ) ||
                   ( meta->valid() && guess &&
                     guess->name() == "ISO-8859-1" ) ||
                   ( meta->valid() && guess && !guess->valid() ) ) &&
                 meta->toUnicode( b ).ascii().contains( tag ) ) {
                guess = meta;
            }
        }
    }

    return guess;
}