Пример #1
0
int main(int argc, char **argv)
{
   int nb_samples, total_samples=0, nb_encoded;
   int c;
   int option_index = 0;
   char *inFile, *outFile;
   FILE *fin, *fout;
   short input[MAX_FRAME_SIZE];
   celt_int32 frame_size = 960;
   int quiet=0;
   int nbBytes;
   CELTMode *mode;
   void *st;
   unsigned char bits[MAX_FRAME_BYTES];
   int with_cbr = 0;
   int with_cvbr = 0;
   int with_skeleton = 0;
   int total_bytes = 0;
   int peak_bytes = 0;
   struct option long_options[] =
   {
      {"bitrate", required_argument, NULL, 0},
      {"cbr",no_argument,NULL, 0},
      {"cvbr",no_argument,NULL, 0},
      {"comp", required_argument, NULL, 0},
      {"nopf", no_argument, NULL, 0},
      {"independent", no_argument, NULL, 0},
      {"framesize", required_argument, NULL, 0},
      {"skeleton",no_argument,NULL, 0},
      {"help", no_argument, NULL, 0},
      {"quiet", no_argument, NULL, 0},
      {"le", no_argument, NULL, 0},
      {"be", no_argument, NULL, 0},
      {"8bit", no_argument, NULL, 0},
      {"16bit", no_argument, NULL, 0},
      {"mono", no_argument, NULL, 0},
      {"stereo", no_argument, NULL, 0},
      {"rate", required_argument, NULL, 0},
      {"version", no_argument, NULL, 0},
      {"version-short", no_argument, NULL, 0},
      {"comment", required_argument, NULL, 0},
      {"author", required_argument, NULL, 0},
      {"title", required_argument, NULL, 0},
      {0, 0, 0, 0}
   };
   int print_bitrate=0;
   celt_int32 rate=48000;
   celt_int32 size;
   int chan=1;
   int fmt=16;
   int lsb=1;
   ogg_stream_state os;
   ogg_stream_state so; /* ogg stream for skeleton bitstream */
   ogg_page 		 og;
   ogg_packet 		 op;
   int bytes_written=0, ret, result;
   int id=-1;
   CELTHeader header;
   char vendor_string[64];
   char *comments;
   int comments_length;
   int close_in=0, close_out=0;
   int eos=0;
   float bitrate=-1;
   char first_bytes[12];
   int wave_input=0;
   celt_int32 lookahead = 0;
   int bytes_per_packet=-1;
   int complexity=-127;
   int prediction=2;


   /*Process command-line options*/
   while(1)
   {
      c = getopt_long (argc, argv, "hvV",
                       long_options, &option_index);
      if (c==-1)
         break;
      
      switch(c)
      {
      case 0:
         if (strcmp(long_options[option_index].name,"bitrate")==0)
         {
            bitrate = atof (optarg);
         } else if (strcmp(long_options[option_index].name,"cbr")==0)
         {
            with_cbr=1;
         } else if (strcmp(long_options[option_index].name,"cvbr")==0)
         {
            with_cvbr=1;
         } else if (strcmp(long_options[option_index].name,"skeleton")==0)
         {
            with_skeleton=1;
         } else if (strcmp(long_options[option_index].name,"help")==0)
         {
            usage();
            exit(0);
         } else if (strcmp(long_options[option_index].name,"quiet")==0)
         {
            quiet = 1;
         } else if (strcmp(long_options[option_index].name,"version")==0)
         {
            version();
            exit(0);
         } else if (strcmp(long_options[option_index].name,"version-short")==0)
         {
            version_short();
            exit(0);
         } else if (strcmp(long_options[option_index].name,"le")==0)
         {
            lsb=1;
         } else if (strcmp(long_options[option_index].name,"be")==0)
         {
            lsb=0;
         } else if (strcmp(long_options[option_index].name,"8bit")==0)
         {
            fmt=8;
         } else if (strcmp(long_options[option_index].name,"16bit")==0)
         {
            fmt=16;
         } else if (strcmp(long_options[option_index].name,"stereo")==0)
         {
            chan=2;
         } else if (strcmp(long_options[option_index].name,"mono")==0)
         {
            chan=1;
         } else if (strcmp(long_options[option_index].name,"rate")==0)
         {
            rate=atoi (optarg);
         } else if (strcmp(long_options[option_index].name,"comp")==0)
         {
            complexity=atoi (optarg);
         } else if (strcmp(long_options[option_index].name,"framesize")==0)
         {
            frame_size=atoi (optarg);
         } else if (strcmp(long_options[option_index].name,"nopf")==0)
         {
            if (prediction>1)
              prediction=1;
         } else if (strcmp(long_options[option_index].name,"independent")==0)
         {
              prediction=0;
         } else if (strcmp(long_options[option_index].name,"comment")==0)
         {
	   if (!strchr(optarg, '='))
	   {
	     fprintf (stderr, "Invalid comment: %s\n", optarg);
	     fprintf (stderr, "Comments must be of the form name=value\n");
	     exit(1);
	   }
           comment_add(&comments, &comments_length, NULL, optarg); 
         } else if (strcmp(long_options[option_index].name,"author")==0)
         {
           comment_add(&comments, &comments_length, "author=", optarg); 
         } else if (strcmp(long_options[option_index].name,"title")==0)
         {
           comment_add(&comments, &comments_length, "title=", optarg); 
         }

         break;
      case 'h':
         usage();
         exit(0);
         break;
      case 'v':
         version();
         exit(0);
         break;
      case 'V':
         print_bitrate=1;
         break;
      case '?':
         usage();
         exit(1);
         break;
      }
   }
   if (argc-optind!=2)
   {
      usage();
      exit(1);
   }
   inFile=argv[optind];
   outFile=argv[optind+1];

   /*Initialize Ogg stream struct*/
   srand(time(NULL));
   if (ogg_stream_init(&os, rand())==-1)
   {
      fprintf(stderr,"Error: stream init failed\n");
      exit(1);
   }
   if (with_skeleton && ogg_stream_init(&so, rand())==-1)
   {
      fprintf(stderr,"Error: stream init failed\n");
      exit(1);
   }

   if (strcmp(inFile, "-")==0)
   {
#if defined WIN32 || defined _WIN32
         _setmode(_fileno(stdin), _O_BINARY);
#elif defined OS2
         _fsetmode(stdin,"b");
#endif
      fin=stdin;
   }
   else 
   {
      fin = fopen(inFile, "rb");
      if (!fin)
      {
         perror(inFile);
         exit(1);
      }
      close_in=1;
   }

   {
      fread(first_bytes, 1, 12, fin);
      if (strncmp(first_bytes,"RIFF",4)==0 && strncmp(first_bytes,"RIFF",4)==0)
      {
         if (read_wav_header(fin, &rate, &chan, &fmt, &size)==-1)
            exit(1);
         wave_input=1;
         lsb=1; /* CHECK: exists big-endian .wav ?? */
      }
   }

   if (bitrate<=0.005)
     if (chan==1)
       bitrate=64.0;
     else
       bitrate=128.0;
     
   bytes_per_packet = MAX_FRAME_BYTES;
   
   mode = celt_mode_create(rate, frame_size, NULL);
   if (!mode)
      return 1;

   snprintf(vendor_string, sizeof(vendor_string), "Encoded with CELT %s\n",CELT_VERSION);
   comment_init(&comments, &comments_length, vendor_string);

   /*celt_mode_info(mode, CELT_GET_FRAME_SIZE, &frame_size);*/
   
   celt_header_init(&header, mode, frame_size, chan);
   header.nb_channels = chan;

   {
      char *st_string="mono";
      if (chan==2)
         st_string="stereo";
      if (!quiet)
         if (with_cbr)
           fprintf (stderr, "Encoding %.0f kHz %s audio in %.0fms packets at %0.3fkbit/sec (%d bytes per packet, CBR)\n",
               header.sample_rate/1000., st_string, frame_size/(float)header.sample_rate*1000., bitrate, bytes_per_packet);
         else      
           fprintf (stderr, "Encoding %.0f kHz %s audio in %.0fms packets at %0.3fkbit/sec (%d bytes per packet maximum)\n",
               header.sample_rate/1000., st_string, frame_size/(float)header.sample_rate*1000., bitrate, bytes_per_packet);
   }

   /*Initialize CELT encoder*/
   st = celt_encoder_create_custom(mode, chan, NULL);

   {
      int tmp = (bitrate*1000);
      if (celt_encoder_ctl(st, CELT_SET_BITRATE(tmp)) != CELT_OK)
      {
         fprintf (stderr, "bitrate request failed\n");
         return 1;
      }
   }
   if (!with_cbr)
   {
     if (celt_encoder_ctl(st, CELT_SET_VBR(1)) != CELT_OK)
     {
        fprintf (stderr, "VBR request failed\n");
        return 1;
     }
     if (!with_cvbr)
     {
        if (celt_encoder_ctl(st, CELT_SET_VBR_CONSTRAINT(0)) != CELT_OK)
        {
           fprintf (stderr, "VBR constraint failed\n");
           return 1;
        }
     }
   }

   if (celt_encoder_ctl(st, CELT_SET_PREDICTION(prediction)) != CELT_OK)
   {
      fprintf (stderr, "Prediction request failed\n");
      return 1;
   }

   if (complexity!=-127) {
     if (celt_encoder_ctl(st, CELT_SET_COMPLEXITY(complexity)) != CELT_OK)
     {
        fprintf (stderr, "Only complexity 0 through 10 is supported\n");
        return 1;
     }
   }

   if (strcmp(outFile,"-")==0)
   {
#if defined WIN32 || defined _WIN32
      _setmode(_fileno(stdout), _O_BINARY);
#endif
      fout=stdout;
   }
   else 
   {
      fout = fopen(outFile, "wb");
      if (!fout)
      {
         perror(outFile);
         exit(1);
      }
      close_out=1;
   }

   if (with_skeleton) {
      fprintf (stderr, "Warning: Enabling skeleton output may cause some decoders to fail.\n");
   }

   /* first packet should be the skeleton header. */
   if (with_skeleton) {
      add_fishead_packet(&so);
      if ((ret = flush_ogg_stream_to_file(&so, fout))) {
	 fprintf (stderr,"Error: failed skeleton (fishead) header to output stream\n");
         exit(1);
      } else
	 bytes_written += ret;
   }

   /*Write header*/
   {
      unsigned char header_data[100];
      int packet_size = celt_header_to_packet(&header, header_data, 100);
      op.packet = header_data;
      op.bytes = packet_size;
      op.b_o_s = 1;
      op.e_o_s = 0;
      op.granulepos = 0;
      op.packetno = 0;
      ogg_stream_packetin(&os, &op);

      while((result = ogg_stream_flush(&os, &og)))
      {
         if(!result) break;
         ret = oe_write_page(&og, fout);
         if(ret != og.header_len + og.body_len)
         {
            fprintf (stderr,"Error: failed writing header to output stream\n");
            exit(1);
         }
         else
            bytes_written += ret;
      }

      op.packet = (unsigned char *)comments;
      op.bytes = comments_length;
      op.b_o_s = 0;
      op.e_o_s = 0;
      op.granulepos = 0;
      op.packetno = 1;
      ogg_stream_packetin(&os, &op);
   }

   /* fisbone packet should be write after all bos pages */
   if (with_skeleton) {
      add_fisbone_packet(&so, os.serialno, &header);
      if ((ret = flush_ogg_stream_to_file(&so, fout))) {
	 fprintf (stderr,"Error: failed writing skeleton (fisbone )header to output stream\n");
         exit(1);
      } else
	 bytes_written += ret;
   }

   /* writing the rest of the celt header packets */
   while((result = ogg_stream_flush(&os, &og)))
   {
      if(!result) break;
      ret = oe_write_page(&og, fout);
      if(ret != og.header_len + og.body_len)
      {
         fprintf (stderr,"Error: failed writing header to output stream\n");
         exit(1);
      }
      else
         bytes_written += ret;
   }

   free(comments);

   /* write the skeleton eos packet */
   if (with_skeleton) {
      add_eos_packet_to_stream(&so);
      if ((ret = flush_ogg_stream_to_file(&so, fout))) {
         fprintf (stderr,"Error: failed writing skeleton header to output stream\n");
         exit(1);
      } else
	 bytes_written += ret;
   }


   if (!wave_input)
   {
      nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, first_bytes, NULL);
   } else {
      nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size);
   }
   if (nb_samples==0)
      eos=1;
   total_samples += nb_samples;
   nb_encoded = -lookahead;
   /*Main encoding loop (one frame per iteration)*/
   while (!eos || total_samples>nb_encoded)
   {
      id++;
      /*Encode current frame*/

      nbBytes = celt_encode(st, input, frame_size, bits, bytes_per_packet);
      if (nbBytes<0)
      {
         fprintf(stderr, "Got error %d while encoding. Aborting.\n", nbBytes);
         break;
      }
      nb_encoded += frame_size;
      total_bytes += nbBytes;
      peak_bytes=IMAX(nbBytes,peak_bytes);

      if (wave_input)
      {
         nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size);
      } else {
         nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, NULL);
      }
      if (nb_samples==0)
      {
         eos=1;
      }
      if (eos && total_samples<=nb_encoded)
         op.e_o_s = 1;
      else
         op.e_o_s = 0;
      total_samples += nb_samples;

      op.packet = (unsigned char *)bits;
      op.bytes = nbBytes;
      op.b_o_s = 0;
      /*Is this redundent?*/
      if (eos && total_samples<=nb_encoded)
         op.e_o_s = 1;
      else
         op.e_o_s = 0;
      op.granulepos = (id+1)*frame_size-lookahead;
      if (op.granulepos>total_samples)
         op.granulepos = total_samples;
      /*printf ("granulepos: %d %d %d %d %d %d\n", (int)op.granulepos, id, nframes, lookahead, 5, 6);*/
      op.packetno = 2+id;
      ogg_stream_packetin(&os, &op);

      /*Write all new pages (most likely 0 or 1)*/
      while (ogg_stream_pageout(&os,&og))
      {
         ret = oe_write_page(&og, fout);
         if(ret != og.header_len + og.body_len)
         {
            fprintf (stderr,"Error: failed writing header to output stream\n");
            exit(1);
         }
         else
            bytes_written += ret;
      }
   }
   /*Flush all pages left to be written*/
   while (ogg_stream_flush(&os, &og))
   {
      ret = oe_write_page(&og, fout);
      if(ret != og.header_len + og.body_len)
      {
         fprintf (stderr,"Error: failed writing header to output stream\n");
         exit(1);
      }
      else
         bytes_written += ret;
   }

   if (!with_cbr && !quiet)
     fprintf (stderr, "Average rate %0.3fkbit/sec, %d peak bytes per packet\n", (total_bytes*8.0/((float)nb_encoded/header.sample_rate))/1000.0, peak_bytes);

   celt_encoder_destroy(st);
   celt_mode_destroy(mode);
   ogg_stream_clear(&os);

   if (close_in)
      fclose(fin);
   if (close_out)
      fclose(fout);
   return 0;
}
Пример #2
0
int opus_encode(OpusEncoder *st, const short *pcm, int frame_size,
		unsigned char *data, int max_data_bytes)
{
	void *silk_enc;
	CELTEncoder *celt_enc;
    int i;
	int ret=0;
	SKP_int32 nBytes;
	ec_enc enc;
	int framerate, period;
    int silk_internal_bandwidth=-1;
    int bytes_target;
    int prefill=0;
    int start_band = 0;
    int redundancy = 0;
    int redundancy_bytes = 0;
    int celt_to_silk = 0;
    /* TODO: This is 60 only so we can handle 60ms speech/audio switching 
       it shouldn't bee too hard to reduce to 20 ms if needed */
    short pcm_buf[60*48*2];
    int nb_compr_bytes;
    int to_celt = 0;
    celt_int32 mono_rate;

    silk_enc = (char*)st+st->silk_enc_offset;
    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);

    if (st->user_bitrate_bps==OPUS_BITRATE_AUTO)
        st->bitrate_bps = 60*st->Fs/frame_size + st->Fs*st->channels;
    else
        st->bitrate_bps = st->user_bitrate_bps;

    /* Rate-dependent mono-stereo decision */
    if (st->mode == MODE_CELT_ONLY && st->channels == 2)
    {
        celt_int32 decision_rate;
        decision_rate = st->bitrate_bps + st->voice_ratio*st->voice_ratio;
        /* Add some hysteresis */
        if (st->stream_channels == 2)
            decision_rate += 4000;
        else
            decision_rate -= 4000;
        if (decision_rate>48000)
            st->stream_channels = 2;
        else
            st->stream_channels = 1;
    }
    /* Equivalent bit-rate for mono */
    mono_rate = st->bitrate_bps;
    if (st->stream_channels==2)
        mono_rate = (mono_rate+10000)/2;
    /* Compensate for smaller frame sizes assuming an equivalent overhead
       of 60 bits/frame */
    mono_rate -= 60*(st->Fs/frame_size - 50);

    /* Mode selection */
    if (st->user_mode==OPUS_MODE_AUTO)
    {
        celt_int32 decision_rate;
        /* SILK/CELT threshold is higher for voice than for music */
        decision_rate = mono_rate - 3*st->voice_ratio*st->voice_ratio;
        /* Hysteresis */
        if (st->prev_mode == MODE_CELT_ONLY)
            decision_rate += 4000;
        else if (st->prev_mode>0)
            decision_rate -= 4000;
        if (decision_rate>24000)
            st->mode = MODE_CELT_ONLY;
        else
            st->mode = MODE_SILK_ONLY;
    } else if (st->user_mode==OPUS_MODE_VOICE)
    {
        st->mode = MODE_SILK_ONLY;
    } else {/* OPUS_AUDIO_MODE */
        st->mode = MODE_CELT_ONLY;
    }

    /* Automatic (rate-dependent) bandwidth selection */
    if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
    {
    	const int *bandwidth_thresholds;
    	int bandwidth = BANDWIDTH_FULLBAND;

    	bandwidth_thresholds = st->mode == MODE_CELT_ONLY ? audio_bandwidth_thresholds : voice_bandwidth_thresholds;
    	do {
    		int threshold, hysteresis;
    		threshold = bandwidth_thresholds[2*(bandwidth-BANDWIDTH_MEDIUMBAND)];
    		hysteresis = bandwidth_thresholds[2*(bandwidth-BANDWIDTH_MEDIUMBAND)+1];
    		if (!st->first)
    		{
    			if (st->bandwidth >= bandwidth)
    				threshold -= hysteresis;
    			else
    				threshold += hysteresis;
    		}
    		if (mono_rate >= threshold)
    			break;
    	} while (--bandwidth>BANDWIDTH_NARROWBAND);
    	st->bandwidth = bandwidth;
    	/* Prevents any transition to SWB/FB until the SILK layer has fully
    	   switched to WB mode and turned the variable LP filter off */
    	if (st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > BANDWIDTH_WIDEBAND)
    		st->bandwidth = BANDWIDTH_WIDEBAND;
    }

    /* Prevents Opus from wasting bits on frequencies that are above
       the Nyquist rate of the input signal */
    if (st->Fs <= 24000 && st->bandwidth > BANDWIDTH_SUPERWIDEBAND)
    	st->bandwidth = BANDWIDTH_SUPERWIDEBAND;
    if (st->Fs <= 16000 && st->bandwidth > BANDWIDTH_WIDEBAND)
    	st->bandwidth = BANDWIDTH_WIDEBAND;
    if (st->Fs <= 12000 && st->bandwidth > BANDWIDTH_MEDIUMBAND)
    	st->bandwidth = BANDWIDTH_MEDIUMBAND;
    if (st->Fs <= 8000 && st->bandwidth > BANDWIDTH_NARROWBAND)
    	st->bandwidth = BANDWIDTH_NARROWBAND;

    if (st->user_bandwidth != BANDWIDTH_AUTO)
    	st->bandwidth = st->user_bandwidth;

    /* Prevents nonsensical configurations, i.e. modes that don't exist */
    if (frame_size < st->Fs/100 && st->mode != MODE_CELT_ONLY)
        st->mode = MODE_CELT_ONLY;
    if (frame_size > st->Fs/50 && st->mode != MODE_SILK_ONLY)
        st->mode = MODE_SILK_ONLY;
    if (st->mode == MODE_CELT_ONLY && st->bandwidth == BANDWIDTH_MEDIUMBAND)
        st->bandwidth = BANDWIDTH_WIDEBAND;
    if (st->mode == MODE_SILK_ONLY && st->bandwidth > BANDWIDTH_WIDEBAND)
        st->mode = MODE_HYBRID;
    if (st->mode == MODE_HYBRID && st->bandwidth <= BANDWIDTH_WIDEBAND)
        st->mode = MODE_SILK_ONLY;

	bytes_target = st->bitrate_bps * frame_size / (st->Fs * 8) - 1;

	data += 1;
	if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY)
	{
		silk_EncControlStruct dummy;
		silk_InitEncoder( st->silk_enc, &dummy);
		prefill=1;
	}
	if (st->prev_mode > 0 &&
	       ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ||
	        (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)))
	{
	    redundancy = 1;
	    celt_to_silk = (st->mode != MODE_CELT_ONLY);
	    if (!celt_to_silk)
	    {
	        /* Switch to SILK/hybrid if frame size is 10 ms or more*/
	        if (frame_size >= st->Fs/100)
	        {
		        st->mode = st->prev_mode;
		        to_celt = 1;
	        } else {
	        	redundancy=0;
	        }
	    }
	}

	ec_enc_init(&enc, data, max_data_bytes-1);

	/* SILK processing */
    if (st->mode != MODE_CELT_ONLY)
    {
        st->silk_mode.bitRate = st->bitrate_bps - 8*st->Fs/frame_size;
        if( st->mode == MODE_HYBRID ) {
            st->silk_mode.bitRate /= st->stream_channels;
            if( st->bandwidth == BANDWIDTH_SUPERWIDEBAND ) {
                if( st->Fs == 100 * frame_size ) {
                    /* 24 kHz, 10 ms */
                    st->silk_mode.bitRate = ( ( st->silk_mode.bitRate + 2000 + st->use_vbr * 1000 ) * 2 ) / 3;
                } else {
                    /* 24 kHz, 20 ms */
                    st->silk_mode.bitRate = ( ( st->silk_mode.bitRate + 1000 + st->use_vbr * 1000 ) * 2 ) / 3;
                }
            } else {
                if( st->Fs == 100 * frame_size ) {
                    /* 48 kHz, 10 ms */
                    st->silk_mode.bitRate = ( st->silk_mode.bitRate + 8000 + st->use_vbr * 3000 ) / 2;
                } else {
                    /* 48 kHz, 20 ms */
                    st->silk_mode.bitRate = ( st->silk_mode.bitRate + 9000 + st->use_vbr * 1000 ) / 2;
                }
            }
            st->silk_mode.bitRate *= st->stream_channels;
            /* don't let SILK use more than 80% */
            if( st->silk_mode.bitRate > ( st->bitrate_bps - 8*st->Fs/frame_size ) * 4/5 ) {
                st->silk_mode.bitRate = ( st->bitrate_bps - 8*st->Fs/frame_size ) * 4/5;
            }
        }

        st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
        st->silk_mode.nChannelsAPI = st->channels;
        st->silk_mode.nChannelsInternal = st->stream_channels;
        if (st->bandwidth == BANDWIDTH_NARROWBAND) {
        	st->silk_mode.desiredInternalSampleRate = 8000;
        } else if (st->bandwidth == BANDWIDTH_MEDIUMBAND) {
        	st->silk_mode.desiredInternalSampleRate = 12000;
        } else {
            SKP_assert( st->mode == MODE_HYBRID || st->bandwidth == BANDWIDTH_WIDEBAND );
            st->silk_mode.desiredInternalSampleRate = 16000;
        }
        if( st->mode == MODE_HYBRID ) {
            /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */
            st->silk_mode.minInternalSampleRate = 16000;
        } else {
            st->silk_mode.minInternalSampleRate = 8000;
        }
        st->silk_mode.maxInternalSampleRate = 16000;

        /* Call SILK encoder for the low band */
        nBytes = max_data_bytes-1;
        if (prefill)
        {
            int zero=0;
        	silk_Encode( silk_enc, &st->silk_mode, st->delay_buffer, st->encoder_buffer, NULL, &zero, 1 );
        }

        ret = silk_Encode( silk_enc, &st->silk_mode, pcm, frame_size, &enc, &nBytes, 0 );
        if( ret ) {
            fprintf (stderr, "SILK encode error: %d\n", ret);
            /* Handle error */
        }
        if (nBytes==0)
            return 0;
        /* Extract SILK internal bandwidth for signaling in first byte */
        if( st->mode == MODE_SILK_ONLY ) {
            if( st->silk_mode.internalSampleRate == 8000 ) {
                silk_internal_bandwidth = BANDWIDTH_NARROWBAND;
            } else if( st->silk_mode.internalSampleRate == 12000 ) {
                silk_internal_bandwidth = BANDWIDTH_MEDIUMBAND;
            } else if( st->silk_mode.internalSampleRate == 16000 ) {
                silk_internal_bandwidth = BANDWIDTH_WIDEBAND;
            }
        } else {
            SKP_assert( st->silk_mode.internalSampleRate == 16000 );
        }
    }

    /* CELT processing */
	{
	    int endband=21;

	    switch(st->bandwidth)
	    {
	    case BANDWIDTH_NARROWBAND:
	    	endband = 13;
	    	break;
	    case BANDWIDTH_WIDEBAND:
	    	endband = 17;
	    	break;
	    case BANDWIDTH_SUPERWIDEBAND:
	    	endband = 19;
	    	break;
	    case BANDWIDTH_FULLBAND:
	    	endband = 21;
	    	break;
	    }
	    celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband));
	    celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels));
	}
	if (st->mode != MODE_SILK_ONLY)
	{
        celt_encoder_ctl(celt_enc, CELT_SET_VBR(0));
        celt_encoder_ctl(celt_enc, CELT_SET_BITRATE(510000));
        if (st->prev_mode == MODE_SILK_ONLY)
        {
        	unsigned char dummy[10];
        	celt_encoder_ctl(celt_enc, CELT_RESET_STATE);
        	celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
        	celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
        	/* TODO: This wastes CPU a bit compared to just prefilling the buffer */
        	celt_encode(celt_enc, &st->delay_buffer[(st->encoder_buffer-st->delay_compensation-st->Fs/400)*st->channels], st->Fs/400, dummy, 10);
        } else {
        	celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(2));
        }

        if (st->mode == MODE_HYBRID)
        {
            int len;

            len = (ec_tell(&enc)+7)>>3;
            if( st->use_vbr ) {
                nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs);
            } else {
                /* check if SILK used up too much */
                nb_compr_bytes = len > bytes_target ? len : bytes_target;
            }
        } else {
Пример #3
0
void AudioInput::encodeAudioFrame() {
	int iArg;
	//ClientUser *p=ClientUser::get(g.uiSession);
	int i;
	float sum;
	short max;

	short *psSource;

	iFrameCounter++;

	if (! bRunning) {
		return;
	}

	/*sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psMic[i] * psMic[i]);

	iLevel = sqrtf(sum / static_cast<float>(iFrameSize)) * 9/32768.0f;
	dPeakMic=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f);
	if (dPeakMic < -96.0f)
		dPeakMic = -96.0f;

	max = 1;
	for (i=0;i<iFrameSize;i++)
		max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max);
	dMaxMic = max;

	if (psSpeaker && (iEchoChannels > 0)) {
		sum=1.0f;
		for (i=0;i<iFrameSize;i++)
			sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]);
		dPeakSpeaker=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f);
		if (dPeakSpeaker < -96.0f)
			dPeakSpeaker = -96.0f;
	} else {
		dPeakSpeaker = 0.0;
	}*/

	MutexLocker l(&qmSpeex);

	bResetProcessor = false;

	if (bResetProcessor) {
		if (sppPreprocess)
			speex_preprocess_state_destroy(sppPreprocess);
		if (sesEcho)
			speex_echo_state_destroy(sesEcho);

		sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate);

		iArg = 1;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg);
		//speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg);
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg);
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg);

		iArg = 30000;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg);

		float v = 30000.0f / static_cast<float>(g_struct.s.iMinLoudness);
		iArg = (floorf(20.0f * log10f(v)));
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg);

		iArg = g_struct.s.iNoiseSuppress;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg);

		if (iEchoChannels > 0) {
			sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize*10, 1, bEchoMulti ? iEchoChannels : 1);
			iArg = iSampleRate;
			speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg);
			speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho);

			Trace("AudioInput: ECHO CANCELLER ACTIVE");
		} else {
			sesEcho = NULL;
		}

		bResetProcessor = false;
	}

	int iIsSpeech=1;
	psSource = psMic;
/*
	//回音消除和音质处理
	if (bEcho && sesEcho && psSpeaker)
	{
		speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean);
		iIsSpeech=speex_preprocess_run(sppPreprocess, psClean);
		psSource = psClean;
	} 
	else {
		iIsSpeech=speex_preprocess_run(sppPreprocess, psMic);
		psSource = psMic;
	}*/

	/*sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psSource[i] * psSource[i]);
	float micLevel = sqrtf(sum / static_cast<float>(iFrameSize));
	dPeakSignal=20.0f*log10f(micLevel / 32768.0f);
	if (dPeakSignal < -96.0f)
		dPeakSignal = -96.0f;

	spx_int32_t prob = 0;
	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob);
	fSpeechProb = static_cast<float>(prob) / 100.0f;

	float level = (g_struct.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f);

	if (level > g_struct.s.fVADmax)
		iIsSpeech = 1;
	else if (level > g_struct.s.fVADmin && bPreviousVoice)
		iIsSpeech = 1;
	else
		iIsSpeech = 0;

	if (! iIsSpeech) {
		iHoldFrames++;
		if (iHoldFrames < g_struct.s.iVoiceHold)
			iIsSpeech=1;
	} else {
		iHoldFrames = 0;
	}*/

	//tIdle.restart();
	/*
	int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL));
	qWarning() << "Set Callback" << r;
	*/

	//编码 speex或者CELT
	unsigned char buffer[512];
	int len;

	if (umtType != MessageHandler::UDPVoiceSpeex) {
		if (cCodec == NULL)
		{
			cCodec = new CELTCodec;
			umtType = MessageHandler::UDPVoiceCELT;
			ceEncoder = cCodec->encoderCreate();
		}
		else if (cCodec && ! bPreviousVoice) {
			cCodec->encoder_ctl(ceEncoder, CELT_RESET_STATE);
		}

		cCodec->encoder_ctl(ceEncoder, CELT_SET_PREDICTION(0));

		cCodec->encoder_ctl(ceEncoder,CELT_SET_BITRATE(iAudioQuality));
		len = cCodec->encode(ceEncoder, psSource, SAMPLE_RATE / 100, buffer, 512);
		iBitrate = len * 100 * 8;
	} 
	else {
		int vbr = 0;
		speex_encoder_ctl(esSpeex, SPEEX_GET_VBR_MAX_BITRATE, &vbr);
		if (vbr != iAudioQuality) {
			vbr = iAudioQuality;
			speex_encoder_ctl(esSpeex, SPEEX_SET_VBR_MAX_BITRATE, &vbr);
		}

		if (! bPreviousVoice)
			speex_encoder_ctl(esSpeex, SPEEX_RESET_STATE, NULL);

		speex_encode_int(esSpeex, psSource, &sbBits);
		len = speex_bits_write(&sbBits, reinterpret_cast<char *>(buffer), 127);
		iBitrate = len * 50 * 8;
		speex_bits_reset(&sbBits);
	}

	QByteArray qba;
	for(int i=0; i<len; i++)
	{
		qba.push_back(buffer[i]);
	}

	flushCheck(qba, false);

	if (! iIsSpeech)
		iBitrate = 0;

	bPreviousVoice = iIsSpeech;
}
Пример #4
0
void AudioInput::encodeAudioFrame() {
	int iArg;
	//ClientUser *p=ClientUser::get(g.uiSession);
	int i;
	float sum;
	short max;

	short *psSource;

	iFrameCounter++;

	if (! bRunning) {
		return;
	}

	MutexLocker l(&qmSpeex);

	bResetProcessor = false;

	if (bResetProcessor) {
		if (sppPreprocess)
			speex_preprocess_state_destroy(sppPreprocess);
		if (sesEcho)
			speex_echo_state_destroy(sesEcho);

		sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate);

		iArg = 1;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg);
		//speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg);
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg);
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg);

		iArg = 30000;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg);

		float v = 30000.0f / static_cast<float>(g_struct.s.iMinLoudness);
		iArg = (floorf(20.0f * log10f(v)));
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg);

		iArg = g_struct.s.iNoiseSuppress;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg);

		if (iEchoChannels > 0) {
			sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize*10, 1, bEchoMulti ? iEchoChannels : 1);
			iArg = iSampleRate;
			speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg);
			speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho);

			Trace("AudioInput: ECHO CANCELLER ACTIVE");
		} else {
			sesEcho = NULL;
		}

		bResetProcessor = false;
	}

	int iIsSpeech=1;
	psSource = psMic;
/*
	//回音消除和音质处理
	if (bEcho && sesEcho && psSpeaker)
	{
		speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean);
		iIsSpeech=speex_preprocess_run(sppPreprocess, psClean);
		psSource = psClean;
	} 
	else {
		iIsSpeech=speex_preprocess_run(sppPreprocess, psMic);
		psSource = psMic;
	}*/

	/*sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psSource[i] * psSource[i]);
	float micLevel = sqrtf(sum / static_cast<float>(iFrameSize));
	dPeakSignal=20.0f*log10f(micLevel / 32768.0f);
	if (dPeakSignal < -96.0f)
		dPeakSignal = -96.0f;

	spx_int32_t prob = 0;
	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob);
	fSpeechProb = static_cast<float>(prob) / 100.0f;

	float level = (g_struct.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f);

	if (level > g_struct.s.fVADmax)
		iIsSpeech = 1;
	else if (level > g_struct.s.fVADmin && bPreviousVoice)
		iIsSpeech = 1;
	else
		iIsSpeech = 0;

	if (! iIsSpeech) {
		iHoldFrames++;
		if (iHoldFrames < g_struct.s.iVoiceHold)
			iIsSpeech=1;
	} else {
		iHoldFrames = 0;
	}*/

	//tIdle.restart();
	/*
	int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL));
	qWarning() << "Set Callback" << r;
	*/

	//编码 speex或者CELT
	unsigned char buffer[512];
	int len;

	if (umtType == MessageHandler::UDPVoiceCELT) {
		if (cCodec == NULL)
		{
			cCodec = CELTCodec::instance();
			ceEncoder = cCodec->encoderCreate();
		}
		else if (cCodec && ! bPreviousVoice) {
			cCodec->encoder_ctl(ceEncoder, CELT_RESET_STATE);
		}

		cCodec->encoder_ctl(ceEncoder, CELT_SET_PREDICTION(0));

		cCodec->encoder_ctl(ceEncoder,CELT_SET_BITRATE(iAudioQuality));
		len = cCodec->encode(ceEncoder, psSource, SAMPLE_RATE / 50, buffer, 512);
		iBitrate = len * 50 * 8;
		
		/*////////////////////////////////////////////////////////////////////////

		if (m_de_cdDecoder == NULL) {
			m_de_cdDecoder = cCodec->decoderCreate();
		}
		
		celt_int16 fout2[2560]={0};

		if (cCodec)
		{
			int len3 = cCodec->decode(m_de_cdDecoder, buffer, len, fout2, SAMPLE_RATE / 50);
			len3++;

			UINT dwDataWrote;
			if( FAILED(g_pWaveFile.Write( SAMPLE_RATE / 50*2*2, (BYTE*)fout2, 
				&dwDataWrote ) ))
			{
				int a=0;
				a++;
			}
			else
			{
				OutputDebugString(L"plushuwav g_pWaveFile.Write 3");				
			}
		}

		///////////////////////////////////////////////////////////////////////*/
	} 
	else {
		assert(0);
	}

	QByteArray qba;
	for(int i=0; i<len; i++)
	{
		qba.push_back(buffer[i]);
	}

	flushCheck(qba, false);

	if (! iIsSpeech)
		iBitrate = 0;

	bPreviousVoice = iIsSpeech;
}