int main(int argc, char **argv) { int nb_samples, total_samples=0, nb_encoded; int c; int option_index = 0; char *inFile, *outFile; FILE *fin, *fout; short input[MAX_FRAME_SIZE]; celt_int32 frame_size = 960; int quiet=0; int nbBytes; CELTMode *mode; void *st; unsigned char bits[MAX_FRAME_BYTES]; int with_cbr = 0; int with_cvbr = 0; int with_skeleton = 0; int total_bytes = 0; int peak_bytes = 0; struct option long_options[] = { {"bitrate", required_argument, NULL, 0}, {"cbr",no_argument,NULL, 0}, {"cvbr",no_argument,NULL, 0}, {"comp", required_argument, NULL, 0}, {"nopf", no_argument, NULL, 0}, {"independent", no_argument, NULL, 0}, {"framesize", required_argument, NULL, 0}, {"skeleton",no_argument,NULL, 0}, {"help", no_argument, NULL, 0}, {"quiet", no_argument, NULL, 0}, {"le", no_argument, NULL, 0}, {"be", no_argument, NULL, 0}, {"8bit", no_argument, NULL, 0}, {"16bit", no_argument, NULL, 0}, {"mono", no_argument, NULL, 0}, {"stereo", no_argument, NULL, 0}, {"rate", required_argument, NULL, 0}, {"version", no_argument, NULL, 0}, {"version-short", no_argument, NULL, 0}, {"comment", required_argument, NULL, 0}, {"author", required_argument, NULL, 0}, {"title", required_argument, NULL, 0}, {0, 0, 0, 0} }; int print_bitrate=0; celt_int32 rate=48000; celt_int32 size; int chan=1; int fmt=16; int lsb=1; ogg_stream_state os; ogg_stream_state so; /* ogg stream for skeleton bitstream */ ogg_page og; ogg_packet op; int bytes_written=0, ret, result; int id=-1; CELTHeader header; char vendor_string[64]; char *comments; int comments_length; int close_in=0, close_out=0; int eos=0; float bitrate=-1; char first_bytes[12]; int wave_input=0; celt_int32 lookahead = 0; int bytes_per_packet=-1; int complexity=-127; int prediction=2; /*Process command-line options*/ while(1) { c = getopt_long (argc, argv, "hvV", long_options, &option_index); if (c==-1) break; switch(c) { case 0: if (strcmp(long_options[option_index].name,"bitrate")==0) { bitrate = atof (optarg); } else if (strcmp(long_options[option_index].name,"cbr")==0) { with_cbr=1; } else if (strcmp(long_options[option_index].name,"cvbr")==0) { with_cvbr=1; } else if (strcmp(long_options[option_index].name,"skeleton")==0) { with_skeleton=1; } else if (strcmp(long_options[option_index].name,"help")==0) { usage(); exit(0); } else if (strcmp(long_options[option_index].name,"quiet")==0) { quiet = 1; } else if (strcmp(long_options[option_index].name,"version")==0) { version(); exit(0); } else if (strcmp(long_options[option_index].name,"version-short")==0) { version_short(); exit(0); } else if (strcmp(long_options[option_index].name,"le")==0) { lsb=1; } else if (strcmp(long_options[option_index].name,"be")==0) { lsb=0; } else if (strcmp(long_options[option_index].name,"8bit")==0) { fmt=8; } else if (strcmp(long_options[option_index].name,"16bit")==0) { fmt=16; } else if (strcmp(long_options[option_index].name,"stereo")==0) { chan=2; } else if (strcmp(long_options[option_index].name,"mono")==0) { chan=1; } else if (strcmp(long_options[option_index].name,"rate")==0) { rate=atoi (optarg); } else if (strcmp(long_options[option_index].name,"comp")==0) { complexity=atoi (optarg); } else if (strcmp(long_options[option_index].name,"framesize")==0) { frame_size=atoi (optarg); } else if (strcmp(long_options[option_index].name,"nopf")==0) { if (prediction>1) prediction=1; } else if (strcmp(long_options[option_index].name,"independent")==0) { prediction=0; } else if (strcmp(long_options[option_index].name,"comment")==0) { if (!strchr(optarg, '=')) { fprintf (stderr, "Invalid comment: %s\n", optarg); fprintf (stderr, "Comments must be of the form name=value\n"); exit(1); } comment_add(&comments, &comments_length, NULL, optarg); } else if (strcmp(long_options[option_index].name,"author")==0) { comment_add(&comments, &comments_length, "author=", optarg); } else if (strcmp(long_options[option_index].name,"title")==0) { comment_add(&comments, &comments_length, "title=", optarg); } break; case 'h': usage(); exit(0); break; case 'v': version(); exit(0); break; case 'V': print_bitrate=1; break; case '?': usage(); exit(1); break; } } if (argc-optind!=2) { usage(); exit(1); } inFile=argv[optind]; outFile=argv[optind+1]; /*Initialize Ogg stream struct*/ srand(time(NULL)); if (ogg_stream_init(&os, rand())==-1) { fprintf(stderr,"Error: stream init failed\n"); exit(1); } if (with_skeleton && ogg_stream_init(&so, rand())==-1) { fprintf(stderr,"Error: stream init failed\n"); exit(1); } if (strcmp(inFile, "-")==0) { #if defined WIN32 || defined _WIN32 _setmode(_fileno(stdin), _O_BINARY); #elif defined OS2 _fsetmode(stdin,"b"); #endif fin=stdin; } else { fin = fopen(inFile, "rb"); if (!fin) { perror(inFile); exit(1); } close_in=1; } { fread(first_bytes, 1, 12, fin); if (strncmp(first_bytes,"RIFF",4)==0 && strncmp(first_bytes,"RIFF",4)==0) { if (read_wav_header(fin, &rate, &chan, &fmt, &size)==-1) exit(1); wave_input=1; lsb=1; /* CHECK: exists big-endian .wav ?? */ } } if (bitrate<=0.005) if (chan==1) bitrate=64.0; else bitrate=128.0; bytes_per_packet = MAX_FRAME_BYTES; mode = celt_mode_create(rate, frame_size, NULL); if (!mode) return 1; snprintf(vendor_string, sizeof(vendor_string), "Encoded with CELT %s\n",CELT_VERSION); comment_init(&comments, &comments_length, vendor_string); /*celt_mode_info(mode, CELT_GET_FRAME_SIZE, &frame_size);*/ celt_header_init(&header, mode, frame_size, chan); header.nb_channels = chan; { char *st_string="mono"; if (chan==2) st_string="stereo"; if (!quiet) if (with_cbr) fprintf (stderr, "Encoding %.0f kHz %s audio in %.0fms packets at %0.3fkbit/sec (%d bytes per packet, CBR)\n", header.sample_rate/1000., st_string, frame_size/(float)header.sample_rate*1000., bitrate, bytes_per_packet); else fprintf (stderr, "Encoding %.0f kHz %s audio in %.0fms packets at %0.3fkbit/sec (%d bytes per packet maximum)\n", header.sample_rate/1000., st_string, frame_size/(float)header.sample_rate*1000., bitrate, bytes_per_packet); } /*Initialize CELT encoder*/ st = celt_encoder_create_custom(mode, chan, NULL); { int tmp = (bitrate*1000); if (celt_encoder_ctl(st, CELT_SET_BITRATE(tmp)) != CELT_OK) { fprintf (stderr, "bitrate request failed\n"); return 1; } } if (!with_cbr) { if (celt_encoder_ctl(st, CELT_SET_VBR(1)) != CELT_OK) { fprintf (stderr, "VBR request failed\n"); return 1; } if (!with_cvbr) { if (celt_encoder_ctl(st, CELT_SET_VBR_CONSTRAINT(0)) != CELT_OK) { fprintf (stderr, "VBR constraint failed\n"); return 1; } } } if (celt_encoder_ctl(st, CELT_SET_PREDICTION(prediction)) != CELT_OK) { fprintf (stderr, "Prediction request failed\n"); return 1; } if (complexity!=-127) { if (celt_encoder_ctl(st, CELT_SET_COMPLEXITY(complexity)) != CELT_OK) { fprintf (stderr, "Only complexity 0 through 10 is supported\n"); return 1; } } if (strcmp(outFile,"-")==0) { #if defined WIN32 || defined _WIN32 _setmode(_fileno(stdout), _O_BINARY); #endif fout=stdout; } else { fout = fopen(outFile, "wb"); if (!fout) { perror(outFile); exit(1); } close_out=1; } if (with_skeleton) { fprintf (stderr, "Warning: Enabling skeleton output may cause some decoders to fail.\n"); } /* first packet should be the skeleton header. */ if (with_skeleton) { add_fishead_packet(&so); if ((ret = flush_ogg_stream_to_file(&so, fout))) { fprintf (stderr,"Error: failed skeleton (fishead) header to output stream\n"); exit(1); } else bytes_written += ret; } /*Write header*/ { unsigned char header_data[100]; int packet_size = celt_header_to_packet(&header, header_data, 100); op.packet = header_data; op.bytes = packet_size; op.b_o_s = 1; op.e_o_s = 0; op.granulepos = 0; op.packetno = 0; ogg_stream_packetin(&os, &op); while((result = ogg_stream_flush(&os, &og))) { if(!result) break; ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } op.packet = (unsigned char *)comments; op.bytes = comments_length; op.b_o_s = 0; op.e_o_s = 0; op.granulepos = 0; op.packetno = 1; ogg_stream_packetin(&os, &op); } /* fisbone packet should be write after all bos pages */ if (with_skeleton) { add_fisbone_packet(&so, os.serialno, &header); if ((ret = flush_ogg_stream_to_file(&so, fout))) { fprintf (stderr,"Error: failed writing skeleton (fisbone )header to output stream\n"); exit(1); } else bytes_written += ret; } /* writing the rest of the celt header packets */ while((result = ogg_stream_flush(&os, &og))) { if(!result) break; ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } free(comments); /* write the skeleton eos packet */ if (with_skeleton) { add_eos_packet_to_stream(&so); if ((ret = flush_ogg_stream_to_file(&so, fout))) { fprintf (stderr,"Error: failed writing skeleton header to output stream\n"); exit(1); } else bytes_written += ret; } if (!wave_input) { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, first_bytes, NULL); } else { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size); } if (nb_samples==0) eos=1; total_samples += nb_samples; nb_encoded = -lookahead; /*Main encoding loop (one frame per iteration)*/ while (!eos || total_samples>nb_encoded) { id++; /*Encode current frame*/ nbBytes = celt_encode(st, input, frame_size, bits, bytes_per_packet); if (nbBytes<0) { fprintf(stderr, "Got error %d while encoding. Aborting.\n", nbBytes); break; } nb_encoded += frame_size; total_bytes += nbBytes; peak_bytes=IMAX(nbBytes,peak_bytes); if (wave_input) { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size); } else { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, NULL); } if (nb_samples==0) { eos=1; } if (eos && total_samples<=nb_encoded) op.e_o_s = 1; else op.e_o_s = 0; total_samples += nb_samples; op.packet = (unsigned char *)bits; op.bytes = nbBytes; op.b_o_s = 0; /*Is this redundent?*/ if (eos && total_samples<=nb_encoded) op.e_o_s = 1; else op.e_o_s = 0; op.granulepos = (id+1)*frame_size-lookahead; if (op.granulepos>total_samples) op.granulepos = total_samples; /*printf ("granulepos: %d %d %d %d %d %d\n", (int)op.granulepos, id, nframes, lookahead, 5, 6);*/ op.packetno = 2+id; ogg_stream_packetin(&os, &op); /*Write all new pages (most likely 0 or 1)*/ while (ogg_stream_pageout(&os,&og)) { ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } } /*Flush all pages left to be written*/ while (ogg_stream_flush(&os, &og)) { ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } if (!with_cbr && !quiet) fprintf (stderr, "Average rate %0.3fkbit/sec, %d peak bytes per packet\n", (total_bytes*8.0/((float)nb_encoded/header.sample_rate))/1000.0, peak_bytes); celt_encoder_destroy(st); celt_mode_destroy(mode); ogg_stream_clear(&os); if (close_in) fclose(fin); if (close_out) fclose(fout); return 0; }
int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, unsigned char *data, int max_data_bytes) { void *silk_enc; CELTEncoder *celt_enc; int i; int ret=0; SKP_int32 nBytes; ec_enc enc; int framerate, period; int silk_internal_bandwidth=-1; int bytes_target; int prefill=0; int start_band = 0; int redundancy = 0; int redundancy_bytes = 0; int celt_to_silk = 0; /* TODO: This is 60 only so we can handle 60ms speech/audio switching it shouldn't bee too hard to reduce to 20 ms if needed */ short pcm_buf[60*48*2]; int nb_compr_bytes; int to_celt = 0; celt_int32 mono_rate; silk_enc = (char*)st+st->silk_enc_offset; celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); if (st->user_bitrate_bps==OPUS_BITRATE_AUTO) st->bitrate_bps = 60*st->Fs/frame_size + st->Fs*st->channels; else st->bitrate_bps = st->user_bitrate_bps; /* Rate-dependent mono-stereo decision */ if (st->mode == MODE_CELT_ONLY && st->channels == 2) { celt_int32 decision_rate; decision_rate = st->bitrate_bps + st->voice_ratio*st->voice_ratio; /* Add some hysteresis */ if (st->stream_channels == 2) decision_rate += 4000; else decision_rate -= 4000; if (decision_rate>48000) st->stream_channels = 2; else st->stream_channels = 1; } /* Equivalent bit-rate for mono */ mono_rate = st->bitrate_bps; if (st->stream_channels==2) mono_rate = (mono_rate+10000)/2; /* Compensate for smaller frame sizes assuming an equivalent overhead of 60 bits/frame */ mono_rate -= 60*(st->Fs/frame_size - 50); /* Mode selection */ if (st->user_mode==OPUS_MODE_AUTO) { celt_int32 decision_rate; /* SILK/CELT threshold is higher for voice than for music */ decision_rate = mono_rate - 3*st->voice_ratio*st->voice_ratio; /* Hysteresis */ if (st->prev_mode == MODE_CELT_ONLY) decision_rate += 4000; else if (st->prev_mode>0) decision_rate -= 4000; if (decision_rate>24000) st->mode = MODE_CELT_ONLY; else st->mode = MODE_SILK_ONLY; } else if (st->user_mode==OPUS_MODE_VOICE) { st->mode = MODE_SILK_ONLY; } else {/* OPUS_AUDIO_MODE */ st->mode = MODE_CELT_ONLY; } /* Automatic (rate-dependent) bandwidth selection */ if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) { const int *bandwidth_thresholds; int bandwidth = BANDWIDTH_FULLBAND; bandwidth_thresholds = st->mode == MODE_CELT_ONLY ? audio_bandwidth_thresholds : voice_bandwidth_thresholds; do { int threshold, hysteresis; threshold = bandwidth_thresholds[2*(bandwidth-BANDWIDTH_MEDIUMBAND)]; hysteresis = bandwidth_thresholds[2*(bandwidth-BANDWIDTH_MEDIUMBAND)+1]; if (!st->first) { if (st->bandwidth >= bandwidth) threshold -= hysteresis; else threshold += hysteresis; } if (mono_rate >= threshold) break; } while (--bandwidth>BANDWIDTH_NARROWBAND); st->bandwidth = bandwidth; /* Prevents any transition to SWB/FB until the SILK layer has fully switched to WB mode and turned the variable LP filter off */ if (st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > BANDWIDTH_WIDEBAND) st->bandwidth = BANDWIDTH_WIDEBAND; } /* Prevents Opus from wasting bits on frequencies that are above the Nyquist rate of the input signal */ if (st->Fs <= 24000 && st->bandwidth > BANDWIDTH_SUPERWIDEBAND) st->bandwidth = BANDWIDTH_SUPERWIDEBAND; if (st->Fs <= 16000 && st->bandwidth > BANDWIDTH_WIDEBAND) st->bandwidth = BANDWIDTH_WIDEBAND; if (st->Fs <= 12000 && st->bandwidth > BANDWIDTH_MEDIUMBAND) st->bandwidth = BANDWIDTH_MEDIUMBAND; if (st->Fs <= 8000 && st->bandwidth > BANDWIDTH_NARROWBAND) st->bandwidth = BANDWIDTH_NARROWBAND; if (st->user_bandwidth != BANDWIDTH_AUTO) st->bandwidth = st->user_bandwidth; /* Prevents nonsensical configurations, i.e. modes that don't exist */ if (frame_size < st->Fs/100 && st->mode != MODE_CELT_ONLY) st->mode = MODE_CELT_ONLY; if (frame_size > st->Fs/50 && st->mode != MODE_SILK_ONLY) st->mode = MODE_SILK_ONLY; if (st->mode == MODE_CELT_ONLY && st->bandwidth == BANDWIDTH_MEDIUMBAND) st->bandwidth = BANDWIDTH_WIDEBAND; if (st->mode == MODE_SILK_ONLY && st->bandwidth > BANDWIDTH_WIDEBAND) st->mode = MODE_HYBRID; if (st->mode == MODE_HYBRID && st->bandwidth <= BANDWIDTH_WIDEBAND) st->mode = MODE_SILK_ONLY; bytes_target = st->bitrate_bps * frame_size / (st->Fs * 8) - 1; data += 1; if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) { silk_EncControlStruct dummy; silk_InitEncoder( st->silk_enc, &dummy); prefill=1; } if (st->prev_mode > 0 && ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY))) { redundancy = 1; celt_to_silk = (st->mode != MODE_CELT_ONLY); if (!celt_to_silk) { /* Switch to SILK/hybrid if frame size is 10 ms or more*/ if (frame_size >= st->Fs/100) { st->mode = st->prev_mode; to_celt = 1; } else { redundancy=0; } } } ec_enc_init(&enc, data, max_data_bytes-1); /* SILK processing */ if (st->mode != MODE_CELT_ONLY) { st->silk_mode.bitRate = st->bitrate_bps - 8*st->Fs/frame_size; if( st->mode == MODE_HYBRID ) { st->silk_mode.bitRate /= st->stream_channels; if( st->bandwidth == BANDWIDTH_SUPERWIDEBAND ) { if( st->Fs == 100 * frame_size ) { /* 24 kHz, 10 ms */ st->silk_mode.bitRate = ( ( st->silk_mode.bitRate + 2000 + st->use_vbr * 1000 ) * 2 ) / 3; } else { /* 24 kHz, 20 ms */ st->silk_mode.bitRate = ( ( st->silk_mode.bitRate + 1000 + st->use_vbr * 1000 ) * 2 ) / 3; } } else { if( st->Fs == 100 * frame_size ) { /* 48 kHz, 10 ms */ st->silk_mode.bitRate = ( st->silk_mode.bitRate + 8000 + st->use_vbr * 3000 ) / 2; } else { /* 48 kHz, 20 ms */ st->silk_mode.bitRate = ( st->silk_mode.bitRate + 9000 + st->use_vbr * 1000 ) / 2; } } st->silk_mode.bitRate *= st->stream_channels; /* don't let SILK use more than 80% */ if( st->silk_mode.bitRate > ( st->bitrate_bps - 8*st->Fs/frame_size ) * 4/5 ) { st->silk_mode.bitRate = ( st->bitrate_bps - 8*st->Fs/frame_size ) * 4/5; } } st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; st->silk_mode.nChannelsAPI = st->channels; st->silk_mode.nChannelsInternal = st->stream_channels; if (st->bandwidth == BANDWIDTH_NARROWBAND) { st->silk_mode.desiredInternalSampleRate = 8000; } else if (st->bandwidth == BANDWIDTH_MEDIUMBAND) { st->silk_mode.desiredInternalSampleRate = 12000; } else { SKP_assert( st->mode == MODE_HYBRID || st->bandwidth == BANDWIDTH_WIDEBAND ); st->silk_mode.desiredInternalSampleRate = 16000; } if( st->mode == MODE_HYBRID ) { /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */ st->silk_mode.minInternalSampleRate = 16000; } else { st->silk_mode.minInternalSampleRate = 8000; } st->silk_mode.maxInternalSampleRate = 16000; /* Call SILK encoder for the low band */ nBytes = max_data_bytes-1; if (prefill) { int zero=0; silk_Encode( silk_enc, &st->silk_mode, st->delay_buffer, st->encoder_buffer, NULL, &zero, 1 ); } ret = silk_Encode( silk_enc, &st->silk_mode, pcm, frame_size, &enc, &nBytes, 0 ); if( ret ) { fprintf (stderr, "SILK encode error: %d\n", ret); /* Handle error */ } if (nBytes==0) return 0; /* Extract SILK internal bandwidth for signaling in first byte */ if( st->mode == MODE_SILK_ONLY ) { if( st->silk_mode.internalSampleRate == 8000 ) { silk_internal_bandwidth = BANDWIDTH_NARROWBAND; } else if( st->silk_mode.internalSampleRate == 12000 ) { silk_internal_bandwidth = BANDWIDTH_MEDIUMBAND; } else if( st->silk_mode.internalSampleRate == 16000 ) { silk_internal_bandwidth = BANDWIDTH_WIDEBAND; } } else { SKP_assert( st->silk_mode.internalSampleRate == 16000 ); } } /* CELT processing */ { int endband=21; switch(st->bandwidth) { case BANDWIDTH_NARROWBAND: endband = 13; break; case BANDWIDTH_WIDEBAND: endband = 17; break; case BANDWIDTH_SUPERWIDEBAND: endband = 19; break; case BANDWIDTH_FULLBAND: endband = 21; break; } celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband)); celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels)); } if (st->mode != MODE_SILK_ONLY) { celt_encoder_ctl(celt_enc, CELT_SET_VBR(0)); celt_encoder_ctl(celt_enc, CELT_SET_BITRATE(510000)); if (st->prev_mode == MODE_SILK_ONLY) { unsigned char dummy[10]; celt_encoder_ctl(celt_enc, CELT_RESET_STATE); celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); /* TODO: This wastes CPU a bit compared to just prefilling the buffer */ celt_encode(celt_enc, &st->delay_buffer[(st->encoder_buffer-st->delay_compensation-st->Fs/400)*st->channels], st->Fs/400, dummy, 10); } else { celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(2)); } if (st->mode == MODE_HYBRID) { int len; len = (ec_tell(&enc)+7)>>3; if( st->use_vbr ) { nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs); } else { /* check if SILK used up too much */ nb_compr_bytes = len > bytes_target ? len : bytes_target; } } else {
void AudioInput::encodeAudioFrame() { int iArg; //ClientUser *p=ClientUser::get(g.uiSession); int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) { return; } /*sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psMic[i] * psMic[i]); iLevel = sqrtf(sum / static_cast<float>(iFrameSize)) * 9/32768.0f; dPeakMic=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakMic < -96.0f) dPeakMic = -96.0f; max = 1; for (i=0;i<iFrameSize;i++) max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max); dMaxMic = max; if (psSpeaker && (iEchoChannels > 0)) { sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]); dPeakSpeaker=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakSpeaker < -96.0f) dPeakSpeaker = -96.0f; } else { dPeakSpeaker = 0.0; }*/ MutexLocker l(&qmSpeex); bResetProcessor = false; if (bResetProcessor) { if (sppPreprocess) speex_preprocess_state_destroy(sppPreprocess); if (sesEcho) speex_echo_state_destroy(sesEcho); sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate); iArg = 1; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg); //speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg); iArg = 30000; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg); float v = 30000.0f / static_cast<float>(g_struct.s.iMinLoudness); iArg = (floorf(20.0f * log10f(v))); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg); iArg = g_struct.s.iNoiseSuppress; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (iEchoChannels > 0) { sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize*10, 1, bEchoMulti ? iEchoChannels : 1); iArg = iSampleRate; speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho); Trace("AudioInput: ECHO CANCELLER ACTIVE"); } else { sesEcho = NULL; } bResetProcessor = false; } int iIsSpeech=1; psSource = psMic; /* //回音消除和音质处理 if (bEcho && sesEcho && psSpeaker) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); iIsSpeech=speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { iIsSpeech=speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; }*/ /*sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal=20.0f*log10f(micLevel / 32768.0f); if (dPeakSignal < -96.0f) dPeakSignal = -96.0f; spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; float level = (g_struct.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f); if (level > g_struct.s.fVADmax) iIsSpeech = 1; else if (level > g_struct.s.fVADmin && bPreviousVoice) iIsSpeech = 1; else iIsSpeech = 0; if (! iIsSpeech) { iHoldFrames++; if (iHoldFrames < g_struct.s.iVoiceHold) iIsSpeech=1; } else { iHoldFrames = 0; }*/ //tIdle.restart(); /* int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL)); qWarning() << "Set Callback" << r; */ //编码 speex或者CELT unsigned char buffer[512]; int len; if (umtType != MessageHandler::UDPVoiceSpeex) { if (cCodec == NULL) { cCodec = new CELTCodec; umtType = MessageHandler::UDPVoiceCELT; ceEncoder = cCodec->encoderCreate(); } else if (cCodec && ! bPreviousVoice) { cCodec->encoder_ctl(ceEncoder, CELT_RESET_STATE); } cCodec->encoder_ctl(ceEncoder, CELT_SET_PREDICTION(0)); cCodec->encoder_ctl(ceEncoder,CELT_SET_BITRATE(iAudioQuality)); len = cCodec->encode(ceEncoder, psSource, SAMPLE_RATE / 100, buffer, 512); iBitrate = len * 100 * 8; } else { int vbr = 0; speex_encoder_ctl(esSpeex, SPEEX_GET_VBR_MAX_BITRATE, &vbr); if (vbr != iAudioQuality) { vbr = iAudioQuality; speex_encoder_ctl(esSpeex, SPEEX_SET_VBR_MAX_BITRATE, &vbr); } if (! bPreviousVoice) speex_encoder_ctl(esSpeex, SPEEX_RESET_STATE, NULL); speex_encode_int(esSpeex, psSource, &sbBits); len = speex_bits_write(&sbBits, reinterpret_cast<char *>(buffer), 127); iBitrate = len * 50 * 8; speex_bits_reset(&sbBits); } QByteArray qba; for(int i=0; i<len; i++) { qba.push_back(buffer[i]); } flushCheck(qba, false); if (! iIsSpeech) iBitrate = 0; bPreviousVoice = iIsSpeech; }
void AudioInput::encodeAudioFrame() { int iArg; //ClientUser *p=ClientUser::get(g.uiSession); int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) { return; } MutexLocker l(&qmSpeex); bResetProcessor = false; if (bResetProcessor) { if (sppPreprocess) speex_preprocess_state_destroy(sppPreprocess); if (sesEcho) speex_echo_state_destroy(sesEcho); sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate); iArg = 1; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg); //speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg); iArg = 30000; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg); float v = 30000.0f / static_cast<float>(g_struct.s.iMinLoudness); iArg = (floorf(20.0f * log10f(v))); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg); iArg = g_struct.s.iNoiseSuppress; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (iEchoChannels > 0) { sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize*10, 1, bEchoMulti ? iEchoChannels : 1); iArg = iSampleRate; speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho); Trace("AudioInput: ECHO CANCELLER ACTIVE"); } else { sesEcho = NULL; } bResetProcessor = false; } int iIsSpeech=1; psSource = psMic; /* //回音消除和音质处理 if (bEcho && sesEcho && psSpeaker) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); iIsSpeech=speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { iIsSpeech=speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; }*/ /*sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal=20.0f*log10f(micLevel / 32768.0f); if (dPeakSignal < -96.0f) dPeakSignal = -96.0f; spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; float level = (g_struct.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f); if (level > g_struct.s.fVADmax) iIsSpeech = 1; else if (level > g_struct.s.fVADmin && bPreviousVoice) iIsSpeech = 1; else iIsSpeech = 0; if (! iIsSpeech) { iHoldFrames++; if (iHoldFrames < g_struct.s.iVoiceHold) iIsSpeech=1; } else { iHoldFrames = 0; }*/ //tIdle.restart(); /* int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL)); qWarning() << "Set Callback" << r; */ //编码 speex或者CELT unsigned char buffer[512]; int len; if (umtType == MessageHandler::UDPVoiceCELT) { if (cCodec == NULL) { cCodec = CELTCodec::instance(); ceEncoder = cCodec->encoderCreate(); } else if (cCodec && ! bPreviousVoice) { cCodec->encoder_ctl(ceEncoder, CELT_RESET_STATE); } cCodec->encoder_ctl(ceEncoder, CELT_SET_PREDICTION(0)); cCodec->encoder_ctl(ceEncoder,CELT_SET_BITRATE(iAudioQuality)); len = cCodec->encode(ceEncoder, psSource, SAMPLE_RATE / 50, buffer, 512); iBitrate = len * 50 * 8; /*//////////////////////////////////////////////////////////////////////// if (m_de_cdDecoder == NULL) { m_de_cdDecoder = cCodec->decoderCreate(); } celt_int16 fout2[2560]={0}; if (cCodec) { int len3 = cCodec->decode(m_de_cdDecoder, buffer, len, fout2, SAMPLE_RATE / 50); len3++; UINT dwDataWrote; if( FAILED(g_pWaveFile.Write( SAMPLE_RATE / 50*2*2, (BYTE*)fout2, &dwDataWrote ) )) { int a=0; a++; } else { OutputDebugString(L"plushuwav g_pWaveFile.Write 3"); } } ///////////////////////////////////////////////////////////////////////*/ } else { assert(0); } QByteArray qba; for(int i=0; i<len; i++) { qba.push_back(buffer[i]); } flushCheck(qba, false); if (! iIsSpeech) iBitrate = 0; bPreviousVoice = iIsSpeech; }