Example #1
0
static void sbr_autocorrelate_c(const float x[40][2], float phi[3][2][2])
{
#if 0
    /* This code is slower because it multiplies memory accesses.
     * It is left for educational purposes and because it may offer
     * a better reference for writing arch-specific DSP functions. */
    autocorrelate(x, phi, 0);
    autocorrelate(x, phi, 1);
    autocorrelate(x, phi, 2);
#else
    float real_sum2 = x[0][0] * x[2][0] + x[0][1] * x[2][1];
    float imag_sum2 = x[0][0] * x[2][1] - x[0][1] * x[2][0];
    float real_sum1 = 0.0f, imag_sum1 = 0.0f, real_sum0 = 0.0f;
    int   i;
    for (i = 1; i < 38; i++) {
        real_sum0 += x[i][0] * x[i    ][0] + x[i][1] * x[i    ][1];
        real_sum1 += x[i][0] * x[i + 1][0] + x[i][1] * x[i + 1][1];
        imag_sum1 += x[i][0] * x[i + 1][1] - x[i][1] * x[i + 1][0];
        real_sum2 += x[i][0] * x[i + 2][0] + x[i][1] * x[i + 2][1];
        imag_sum2 += x[i][0] * x[i + 2][1] - x[i][1] * x[i + 2][0];
    }
    phi[2 - 2][1][0] = real_sum2;
    phi[2 - 2][1][1] = imag_sum2;
    phi[2    ][1][0] = real_sum0 + x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1];
    phi[1    ][0][0] = real_sum0 + x[38][0] * x[38][0] + x[38][1] * x[38][1];
    phi[2 - 1][1][0] = real_sum1 + x[ 0][0] * x[ 1][0] + x[ 0][1] * x[ 1][1];
    phi[2 - 1][1][1] = imag_sum1 + x[ 0][0] * x[ 1][1] - x[ 0][1] * x[ 1][0];
    phi[0    ][0][0] = real_sum1 + x[38][0] * x[39][0] + x[38][1] * x[39][1];
    phi[0    ][0][1] = imag_sum1 + x[38][0] * x[39][1] - x[38][1] * x[39][0];
#endif
}
Example #2
0
void find_aks_for_lsp(
  float Sn[],	/* Nsam samples with order sample memory */
  float a[],	/* order+1 LPCs with first coeff 1.0 */
  int Nsam,	/* number of input speech samples */
  int order,	/* order of the LPC analysis */
  float *E	/* residual energy */
)
{
  float Wn[N];	/* windowed frame of Nsam speech samples */
  float R[P+1];	/* order+1 autocorrelation values of Sn[] */
  int i;

  hanning_window(Sn,Wn,Nsam);

  autocorrelate(Wn,R,Nsam,order);
  R[0] += LPC_FLOOR;
  assert(order == 10); /* lag window only defined for order == 10 */
  for(i=0; i<=order; i++)
      R[i] *= lag_window[i];
  levinson_durbin(R,a,order);

  *E = 0.0;
  for(i=0; i<=order; i++)
    *E += a[i]*R[i];
  if (*E < 0.0)
    *E = 1E-12;
}
Example #3
0
void find_aks(
  float Sn[],	/* Nsam samples with order sample memory */
  float a[],	/* order+1 LPCs with first coeff 1.0 */
  int Nsam,	/* number of input speech samples */
  int order,	/* order of the LPC analysis */
  float *E	/* residual energy */
)
{
  float Wn[LPC_MAX_N];	/* windowed frame of Nsam speech samples */
  float R[LPC_MAX+1];	/* order+1 autocorrelation values of Sn[] */
  int i;

  assert(order < LPC_MAX);
  assert(Nsam < LPC_MAX_N);

  hanning_window(Sn,Wn,Nsam);
  autocorrelate(Wn,R,Nsam,order);
  levinson_durbin(R,a,order);

  *E = 0.0;
  for(i=0; i<=order; i++)
    *E += a[i]*R[i];
  if (*E < 0.0)
    *E = 1E-12;
}
Example #4
0
void find_aks(
  scalar Sn[],	/* Nsam samples with order sample memory */
  scalar a[],	/* order+1 LPCs with first coeff 1.0 */
  int Nsam,	/* number of input speech samples */
  int order,	/* order of the LPC analysis */
  scalar *E	/* residual energy */
)
{
  scalar Wn[LPC_MAX_N];	/* windowed frame of Nsam speech samples */
  scalar R[order+1];	/* order+1 autocorrelation values of Sn[] */
  int i;

  assert(Nsam < LPC_MAX_N);

  hanning_window(Sn,Wn,Nsam);
  autocorrelate(Wn,R,Nsam,order);
  levinson_durbin(R,a,order);

  *E = fl_to_numb(0.0);
  for(i=0; i<=order; i++)
    *E = s_add(*E , s_mul(a[i],R[i]));
  if (*E < fl_to_numb(0.0)) {
	#ifdef MATH_Q16_16   
	 *E = 1;
	#else
	 *E = powf(2, -16);//fl_to_numb(1E-12); For debuging purposes. 
	#endif
  }
}
Example #5
0
float speech_to_uq_lsps(float lsp[],
			float ak[],
		        float Sn[],
		        float w[],
		        int   order
)
{
    int   i, roots;
    float Wn[M];
    float R[LPC_MAX+1];
    float e, E;

    e = 0.0;
    for(i=0; i<M; i++) {
	Wn[i] = Sn[i]*w[i];
	e += Wn[i]*Wn[i];
    }

    /* trap 0 energy case as LPC analysis will fail */

    if (e == 0.0) {
	for(i=0; i<order; i++)
	    lsp[i] = (PI/order)*(float)i;
	return 0.0;
    }

    autocorrelate(Wn, R, M, order);
    levinson_durbin(R, ak, order);

    E = 0.0;
    for(i=0; i<=order; i++)
	E += ak[i]*R[i];

    /* 15 Hz BW expansion as I can't hear the difference and it may help
       help occasional fails in the LSP root finding.  Important to do this
       after energy calculation to avoid -ve energy values.
    */

    for(i=0; i<=order; i++)
	ak[i] *= powf(0.994,(float)i);

    roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
    if (roots != order) {
	/* if root finding fails use some benign LSP values instead */
	for(i=0; i<order; i++)
	    lsp[i] = (PI/order)*(float)i;
    }

    return E;
}
Example #6
0
void Histogram::compute() {
    counts.resize( ceil( bin_size ) + 1 );
    std::fill( counts.begin(), counts.end(), 0 );

    Bins::Position low_border = Bins::Position::Constant( 0 * camera::pixel ),
                   high_border = bins.sizes().array() - 1 * camera::pixel;

    for ( Bins::iterator i = bins.begin(); i != bins.end(); ++i ) {
        if ( (i.position() == low_border).any() ||
             (i.position() == high_border).any() )
            continue;
        autocorrelate( *i );
        for (ForwardScan::iterator j = forward_scan.begin(); j != forward_scan.end(); ++j) {
            crosscorrelate( *i, bins( i.position() + *j ) );
        }
    }

    if ( periodic_boundary )
        counts.pop_back();
}
Example #7
0
float speech_to_uq_lsps(float lsp[],
			float ak[],
		        float Sn[], 
		        float w[],
		        int   order
)
{
    int   i, roots;
    float Wn[M];
    float R[LPC_MAX+1];
    float E;

    for(i=0; i<M; i++)
	Wn[i] = Sn[i]*w[i];
    autocorrelate(Wn, R, M, order);
    levinson_durbin(R, ak, order);
  
    E = 0.0;
    for(i=0; i<=order; i++)
	E += ak[i]*R[i];
 
    roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
    if (roots != order) {
	/* for some reason LSP roots could not be found   */
	/* some alpha testers are reporting this condition */
	fprintf(stderr, "LSP roots not found!\nroots = %d\n", roots);
	for(i=0; i<=order; i++)
	    fprintf(stderr, "a[%d] = %f\n", i, ak[i]);	
	
	/* some benign LSP values we can use instead */
	for(i=0; i<order; i++)
	    lsp[i] = (PI/order)*(float)i;
    }

    return E;
}
Example #8
0
static void sbr_autocorrelate_c(const float x[40][2], float phi[3][2][2])
{
    autocorrelate(x, phi, 0);
    autocorrelate(x, phi, 1);
    autocorrelate(x, phi, 2);
}
Example #9
0
int main(int argc, char *argv[])
{
    FILE *fout = NULL;	/* output speech file                    */
    FILE *fin;		/* input speech file                     */
    short buf[N];	/* input/output buffer                   */
    float buf_float[N];
    float Sn[M];	/* float input speech samples            */
    float Sn_pre[N];	/* pre-emphasised input speech samples   */
    COMP  Sw[FFT_ENC];	/* DFT of Sn[]                           */
    kiss_fft_cfg  fft_fwd_cfg;
    kiss_fft_cfg  fft_inv_cfg;
    float w[M];	        /* time domain hamming window            */
    COMP  W[FFT_ENC];	/* DFT of w[]                            */
    MODEL model;
    float Pn[2*N];	/* trapezoidal synthesis window          */
    float Sn_[2*N];	/* synthesised speech */
    int   i,m;		/* loop variable                         */
    int   frames;
    float prev_Wo, prev__Wo, prev_uq_Wo;
    float pitch;
    char  out_file[MAX_STR];
    char  ampexp_arg[MAX_STR];
    char  phaseexp_arg[MAX_STR];
    float snr;
    float sum_snr;
    int orderi;
    int lpc_model = 0, order = LPC_ORD;
    int lsp = 0, lspd = 0, lspvq = 0;
    int lspres = 0;
    int lspjvm = 0, lspjnd = 0, lspmel = 0;
    #ifdef __EXPERIMENTAL__
    int lspanssi = 0, 
    #endif
    int prede = 0;
    float pre_mem = 0.0, de_mem = 0.0;
    float ak[order];
    COMP  Sw_[FFT_ENC];
    COMP  Ew[FFT_ENC]; 
 
    int phase0 = 0;
    float ex_phase[MAX_AMP+1];

    int   postfilt;

    int   hand_voicing = 0, phaseexp = 0, ampexp = 0, hi = 0, simlpcpf = 0;
    int   lpcpf = 0;
    FILE *fvoicing = 0;

    MODEL prev_model;
    int dec;
    int decimate = 1;
    float lsps[order];
    float e, prev_e;
    int   lsp_indexes[order];
    float lsps_[order];
    float Woe_[2];

    float lsps_dec[4][LPC_ORD], e_dec[4], weight, weight_inc, ak_dec[4][LPC_ORD];
    MODEL model_dec[4], prev_model_dec;
    float prev_lsps_dec[order], prev_e_dec;

    void *nlp_states;
    float hpf_states[2];
    int   scalar_quant_Wo_e = 0;
    int   vector_quant_Wo_e = 0;
    int   dump_pitch_e = 0;
    FILE *fjvm = NULL;
    #ifdef DUMP
    int   dump;
    #endif
    struct PEXP *pexp = NULL;
    struct AEXP *aexp = NULL;
    float gain = 1.0;
    int   bpf_en = 0;
    float bpf_buf[BPF_N+N];

    char* opt_string = "ho:";
    struct option long_options[] = {
        { "lpc", required_argument, &lpc_model, 1 },
        { "lspjnd", no_argument, &lspjnd, 1 },
        { "lspmel", no_argument, &lspmel, 1 },
        { "lsp", no_argument, &lsp, 1 },
        { "lspd", no_argument, &lspd, 1 },
        { "lspvq", no_argument, &lspvq, 1 },
        { "lspres", no_argument, &lspres, 1 },
        { "lspjvm", no_argument, &lspjvm, 1 },
        #ifdef __EXPERIMENTAL__
        { "lspanssi", no_argument, &lspanssi, 1 },
        #endif
        { "phase0", no_argument, &phase0, 1 },
        { "phaseexp", required_argument, &phaseexp, 1 },
        { "ampexp", required_argument, &ampexp, 1 },
        { "postfilter", no_argument, &postfilt, 1 },
        { "hand_voicing", required_argument, &hand_voicing, 1 },
        { "dec", required_argument, &dec, 1 },
        { "hi", no_argument, &hi, 1 },
        { "simlpcpf", no_argument, &simlpcpf, 1 },
        { "lpcpf", no_argument, &lpcpf, 1 },
        { "prede", no_argument, &prede, 1 },
        { "dump_pitch_e", required_argument, &dump_pitch_e, 1 },
        { "sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1 },
        { "vq_pitch_e", no_argument, &vector_quant_Wo_e, 1 },
        { "rate", required_argument, NULL, 0 },
        { "gain", required_argument, NULL, 0 },
        { "bpf", no_argument, &bpf_en, 1 },
        #ifdef DUMP
        { "dump", required_argument, &dump, 1 },
        #endif
        { "help", no_argument, NULL, 'h' },
        { NULL, no_argument, NULL, 0 }
    };
    int num_opts=sizeof(long_options)/sizeof(struct option);
    COMP Aw[FFT_ENC];

    for(i=0; i<M; i++) {
	Sn[i] = 1.0;
	Sn_pre[i] = 1.0;
    }
    for(i=0; i<2*N; i++)
	Sn_[i] = 0;

    prev_uq_Wo = prev_Wo = prev__Wo = TWO_PI/P_MAX;

    prev_model.Wo = TWO_PI/P_MIN;
    prev_model.L = floor(PI/prev_model.Wo);
    for(i=1; i<=prev_model.L; i++) {
	prev_model.A[i] = 0.0;
	prev_model.phi[i] = 0.0;
    }
    for(i=1; i<=MAX_AMP; i++) {
	//ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX;
	ex_phase[i] = 0.0;
    }
    e = prev_e = 1;
    hpf_states[0] = hpf_states[1] = 0.0;

    nlp_states = nlp_create(M);

    if (argc < 2) {
        print_help(long_options, num_opts, argv);
    }

    /*----------------------------------------------------------------*\

                     Interpret Command Line Arguments

    \*----------------------------------------------------------------*/

    while(1) {
        int option_index = 0;
        int opt = getopt_long(argc, argv, opt_string, 
                    long_options, &option_index);
        if (opt == -1)
            break;
        switch (opt) {
         case 0:
            if(strcmp(long_options[option_index].name, "lpc") == 0) {
                orderi = atoi(optarg);
                if((orderi < 4) || (orderi > order)) {
                    fprintf(stderr, "Error in LPC order (4 to %d): %s\n", order, optarg);
                    exit(1);
                }
                order = orderi;
            #ifdef DUMP
            } else if(strcmp(long_options[option_index].name, "dump") == 0) {
                if (dump) 
	            dump_on(optarg);
            #endif
            } else if(strcmp(long_options[option_index].name, "lsp") == 0
                  || strcmp(long_options[option_index].name, "lspd") == 0
                  || strcmp(long_options[option_index].name, "lspvq") == 0) {
	        assert(order == LPC_ORD);
            } else if(strcmp(long_options[option_index].name, "dec") == 0) {
               
                decimate = atoi(optarg);
	        if ((decimate != 2) && (decimate != 4)) {
	            fprintf(stderr, "Error in --dec, must be 2 or 4\n");
	            exit(1);
	        }

                if (!phase0) {
                    printf("needs --phase0 to resample phase when using --dec\n");
                    exit(1);
                }
                if (!lpc_model) {
                    printf("needs --lpc [order] to resample amplitudes when using --dec\n");
                    exit(1);
                }
 
            } else if(strcmp(long_options[option_index].name, "hand_voicing") == 0) {
	        if ((fvoicing = fopen(optarg,"rt")) == NULL) {
	            fprintf(stderr, "Error opening voicing file: %s: %s.\n",
		        optarg, strerror(errno));
                    exit(1);
                }
	    } else if(strcmp(long_options[option_index].name, "dump_pitch_e") == 0) {
	        if ((fjvm = fopen(optarg,"wt")) == NULL) {
	            fprintf(stderr, "Error opening pitch & energy dump file: %s: %s.\n",
		        optarg, strerror(errno));
                    exit(1);
                }
	    } else if(strcmp(long_options[option_index].name, "phaseexp") == 0) {
		strcpy(phaseexp_arg, optarg);
	    } else if(strcmp(long_options[option_index].name, "ampexp") == 0) {
		strcpy(ampexp_arg, optarg);
	    } else if(strcmp(long_options[option_index].name, "gain") == 0) {
		gain = atof(optarg);
	    } else if(strcmp(long_options[option_index].name, "rate") == 0) {
                if(strcmp(optarg,"3200") == 0) {
	            lpc_model = 1;
		    scalar_quant_Wo_e = 1;
	            lspd = 1;
	            phase0 = 1;
	            postfilt = 1;
	            decimate = 1;
		    lpcpf = 1;
               } else if(strcmp(optarg,"2400") == 0) {
	            lpc_model = 1;
		    vector_quant_Wo_e = 1;
	            lsp = 1;
	            phase0 = 1;
	            postfilt = 1;
	            decimate = 2;
		    lpcpf = 1;
               } else if(strcmp(optarg,"1400") == 0) {
	            lpc_model = 1;
		    vector_quant_Wo_e = 1;
	            lsp = 1;
	            phase0 = 1;
	            postfilt = 1;
	            decimate = 4;
 		    lpcpf = 1;
               } else if(strcmp(optarg,"1300") == 0) {
	            lpc_model = 1;
		    scalar_quant_Wo_e = 1;
	            lsp = 1;
	            phase0 = 1;
	            postfilt = 1;
	            decimate = 4;
 		    lpcpf = 1;
               } else if(strcmp(optarg,"1200") == 0) {
	            lpc_model = 1;
		    scalar_quant_Wo_e = 1;
	            lspjvm = 1;
	            phase0 = 1;
	            postfilt = 1;
	            decimate = 4;
 		    lpcpf = 1;
                } else {
                    fprintf(stderr, "Error: invalid output rate (3200|2400|1400|1200) %s\n", optarg);
                    exit(1);
                }
            }
            break;

         case 'h':
            print_help(long_options, num_opts, argv);
            break;

         case 'o':
	     if (strcmp(optarg, "-") == 0) fout = stdout;
	     else if ((fout = fopen(optarg,"wb")) == NULL) {
	        fprintf(stderr, "Error opening output speech file: %s: %s.\n",
		    optarg, strerror(errno));
	        exit(1);
	     }
	     strcpy(out_file,optarg);
	     break;

         default:
            /* This will never be reached */
            break;
        }
    }

    /* Input file */

    if (strcmp(argv[optind], "-")  == 0) fin = stdin;
    else if ((fin = fopen(argv[optind],"rb")) == NULL) {
	fprintf(stderr, "Error opening input speech file: %s: %s.\n",
		argv[optind], strerror(errno));
	exit(1);
    }

    ex_phase[0] = 0;
    Woe_[0] = Woe_[1] = 1.0;

    /*
      printf("lspd: %d lspdt: %d lspdt_mode: %d  phase0: %d postfilt: %d "
	   "decimate: %d dt: %d\n",lspd,lspdt,lspdt_mode,phase0,postfilt,
	   decimate,dt);
    */

    /* Initialise ------------------------------------------------------------*/

    fft_fwd_cfg = kiss_fft_alloc(FFT_ENC, 0, NULL, NULL); /* fwd FFT,used in several places   */
    fft_inv_cfg = kiss_fft_alloc(FFT_DEC, 1, NULL, NULL); /* inverse FFT, used just for synth */
    make_analysis_window(fft_fwd_cfg, w, W);
    make_synthesis_window(Pn);
    quantise_init();
    if (phaseexp)
	pexp = phase_experiment_create();
    if (ampexp)
	aexp = amp_experiment_create();

    if (bpf_en) {
        for(i=0; i<BPF_N; i++)
            bpf_buf[i] = 0.0;
    }

    for(i=0; i<LPC_ORD; i++) {
        prev_lsps_dec[i] = i*PI/(LPC_ORD+1);
    }
    prev_e_dec = 1;
    for(m=1; m<=MAX_AMP; m++)
	prev_model_dec.A[m] = 0.0;
    prev_model_dec.Wo = TWO_PI/P_MAX;
    prev_model_dec.L = PI/prev_model_dec.Wo;
    prev_model_dec.voiced = 0;

    /*----------------------------------------------------------------* \

                            Main Loop

    \*----------------------------------------------------------------*/

    frames = 0;
    sum_snr = 0;
    while(fread(buf,sizeof(short),N,fin)) {
	frames++;

	for(i=0; i<N; i++)
	    buf_float[i] = buf[i];

	/* optionally filter input speech */
	
        if (prede) {
           pre_emp(Sn_pre, buf_float, &pre_mem, N);
           for(i=0; i<N; i++)
                buf_float[i] = Sn_pre[i];
        }
         
        if (bpf_en) {
            for(i=0; i<BPF_N; i++)
                bpf_buf[i] =  bpf_buf[N+i];
            for(i=0; i<N; i++)
                bpf_buf[BPF_N+i] = buf_float[i];
            inverse_filter(&bpf_buf[BPF_N], bpf, N, buf_float, BPF_N);
       }

        /* shift buffer of input samples, and insert new samples */

	for(i=0; i<M-N; i++) {
	    Sn[i] = Sn[i+N];
	}
	for(i=0; i<N; i++)
	    Sn[i+M-N] = buf_float[i];

	/*------------------------------------------------------------*\

                      Estimate Sinusoidal Model Parameters

	\*------------------------------------------------------------*/

	nlp(nlp_states,Sn,N,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo);
	model.Wo = TWO_PI/pitch;

	dft_speech(fft_fwd_cfg, Sw, Sn, w);
	two_stage_pitch_refinement(&model, Sw);
	estimate_amplitudes(&model, Sw, W, 1);
 
        #ifdef DUMP 
	dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
        #endif

	if (ampexp)
	    amp_experiment(aexp, &model, ampexp_arg);

	if (phaseexp) {
            #ifdef DUMP
	    dump_phase(&model.phi[0], model.L);
            #endif
	    phase_experiment(pexp, &model, phaseexp_arg);
            #ifdef DUMP
	    dump_phase_(&model.phi[0], model.L);
            #endif
	}
	
	if (hi) {
	    int m;
	    for(m=1; m<model.L/2; m++)
		model.A[m] = 0.0;
	    for(m=3*model.L/4; m<=model.L; m++)
		model.A[m] = 0.0;
	}

	/*------------------------------------------------------------*\

                            Zero-phase modelling

	\*------------------------------------------------------------*/

	if (phase0) {
	    float Wn[M];		        /* windowed speech samples */
	    float Rk[order+1];                  /* autocorrelation coeffs  */
            COMP a[FFT_ENC];	                

            #ifdef DUMP
	    dump_phase(&model.phi[0], model.L);
            #endif

	    /* find aks here, these are overwritten if LPC modelling is enabled */

            for(i=0; i<M; i++)
                Wn[i] = Sn[i]*w[i];
	    autocorrelate(Wn,Rk,M,order);
	    levinson_durbin(Rk,ak,order);

	    /* determine voicing */

	    snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew);

	    if (dump_pitch_e)
		fprintf(fjvm, "%f %f %d ", model.Wo, snr, model.voiced);

	    //printf("snr %3.2f v: %d Wo: %f prev_Wo: %f\n", snr, model.voiced,
	    //	   model.Wo, prev_uq_Wo);
            #ifdef DUMP
	    dump_Sw_(Sw_);
	    dump_Ew(Ew);
	    dump_snr(snr);
            #endif

	    /* just to make sure we are not cheating - kill all phases */

	    for(i=0; i<=MAX_AMP; i++)
	    	model.phi[i] = 0;
	
            /* Determine DFT of A(exp(jw)), which is needed for phase0 model when
               LPC is not used, e.g. indecimate=1 (10ms) frames with no LPC */

            for(i=0; i<FFT_ENC; i++) {
                a[i].real = 0.0;
                a[i].imag = 0.0; 
            }

            for(i=0; i<=order; i++)
                a[i].real = ak[i];
            kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)a, (kiss_fft_cpx *)Aw);

	    if (hand_voicing) {
		fscanf(fvoicing,"%d\n",&model.voiced);
	    }
	}

	/*------------------------------------------------------------*\

	        LPC model amplitudes and LSP quantisation 

	\*------------------------------------------------------------*/

	if (lpc_model) {
	    
            e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
            for(i=0; i<LPC_ORD; i++)
                lsps_[i] = lsps[i];

            #ifdef DUMP
	    dump_ak(ak, order);
            dump_E(e);
            #endif
	
	    /* tracking down -ve energy values with BW expansion */
	    /*
	    if (e < 0.0) {
		int i;
		FILE*f=fopen("x.txt","wt");
		for(i=0; i<M; i++)
		    fprintf(f,"%f\n", Sn[i]);
		fclose(f);
		printf("e = %f frames = %d\n", e, frames);
		for(i=0; i<order; i++)
		    printf("%f ", ak[i]);
		exit(0);
	    }
	    */

	    if (dump_pitch_e)
		fprintf(fjvm, "%f\n", e);

            #ifdef DUMP
            dump_lsp(lsps);
            #endif

	    /* various LSP quantisation schemes */

	    if (lsp) {
		encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
		decode_lsps_scalar(lsps_, lsp_indexes, LPC_ORD);
		bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0);
		lsp_to_lpc(lsps_, ak, LPC_ORD);
	    }

	    if (lspd) {
		encode_lspds_scalar(lsp_indexes, lsps, LPC_ORD);
		decode_lspds_scalar(lsps_, lsp_indexes, LPC_ORD);
		lsp_to_lpc(lsps_, ak, LPC_ORD);
	    }

#ifdef __EXPERIMENTAL__
	    if (lspvq) {
		lspvq_quantise(lsps, lsps_, LPC_ORD);
		bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0);
		lsp_to_lpc(lsps_, ak, LPC_ORD);
	    }
#endif

	    if (lspjvm) {
		/* Jean-Marc's multi-stage, split VQ */
		lspjvm_quantise(lsps, lsps_, LPC_ORD);
		{ 
		    float lsps_bw[LPC_ORD];
		    memcpy(lsps_bw, lsps_, sizeof(float)*LPC_ORD);
		    bw_expand_lsps(lsps_bw, LPC_ORD, 50.0, 100.0);			    
		    lsp_to_lpc(lsps_bw, ak, LPC_ORD);
		}
	    }

#ifdef __EXPERIMENTAL__
	    if (lspanssi) {
		/*  multi-stage VQ from Anssi Ramo OH3GDD */

		lspanssi_quantise(lsps, lsps_, LPC_ORD, 5);
		bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0);			    
		lsp_to_lpc(lsps_, ak, LPC_ORD);
	    }
#endif

	    /* experimenting with non-linear LSP spacing to see if
	       it's just noticable */

	    if (lspjnd) {
		for(i=0; i<LPC_ORD; i++)
		    lsps_[i] = lsps[i];
		locate_lsps_jnd_steps(lsps_, LPC_ORD);
		lsp_to_lpc(lsps_, ak, LPC_ORD);
	    }

	    /* Another experiment with non-linear LSP spacing, this
	       time using a scaled version of mel frequency axis
	       warping.  The scaling is such that the integer output
	       can be directly sent over the channel.
	    */

	    if (lspmel) {
		float f, f_;
		float mel[LPC_ORD];
		int   mel_indexes[LPC_ORD];

		for(i=0; i<order; i++) {
		    f = (4000.0/PI)*lsps[i];
		    mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
		}

		for(i=1; i<order; i++) {
		    if (mel[i] == mel[i-1])
			mel[i]++;
		}

 		encode_mels_scalar(mel_indexes, mel, 6);
		decode_mels_scalar(mel, mel_indexes, 6);
                
                #ifdef DUMP
                dump_mel(mel, order);
                #endif

		for(i=0; i<LPC_ORD; i++) {
		    f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
		    lsps_[i] = f_*(PI/4000.0);
		}
 
		lsp_to_lpc(lsps_, ak, order);
	    }
	
	    if (scalar_quant_Wo_e) {

		e = decode_energy(encode_energy(e, E_BITS), E_BITS);
                model.Wo = decode_Wo(encode_Wo(model.Wo, WO_BITS), WO_BITS);
		model.L  = PI/model.Wo; /* if we quantise Wo re-compute L */
	    }

	    if (vector_quant_Wo_e) {

		/* JVM's experimental joint Wo & LPC energy quantiser */

		quantise_WoE(&model, &e, Woe_);
	    }
            
	}

	/*------------------------------------------------------------*\

          Synthesise and optional decimation to 20 or 40ms frame rate

	\*------------------------------------------------------------*/

        /* 
           if decimate == 2, we interpolate frame n from frame n-1 and n+1
           if decimate == 4, we interpolate frames n, n+1, n+2, from frames n-1 and n+3

           This is meant to give identical results to the implementations of various modes
           in codec2.c
        */

        /* delay line to keep frame by frame voicing decisions */

        for(i=0; i<decimate-1; i++)
            model_dec[i] = model_dec[i+1];
        model_dec[decimate-1] = model;

        if ((frames % decimate) == 0) {
            for(i=0; i<order; i++)
                lsps_dec[decimate-1][i] = lsps_[i];
            e_dec[decimate-1] = e;
            model_dec[decimate-1] = model;

            /* interpolate the model parameters */

            weight_inc = 1.0/decimate;
            for(i=0, weight=weight_inc; i<decimate-1; i++, weight += weight_inc) {
                //model_dec[i].voiced = model_dec[decimate-1].voiced;
                interpolate_lsp_ver2(&lsps_dec[i][0], prev_lsps_dec, &lsps_dec[decimate-1][0], weight, order);
                interp_Wo2(&model_dec[i], &prev_model_dec, &model_dec[decimate-1], weight);
                e_dec[i] = interp_energy2(prev_e_dec, e_dec[decimate-1],weight);
            }

            /* then recover spectral amplitudes and synthesise */    

            for(i=0; i<decimate; i++) {
                if (lpc_model) {
                    lsp_to_lpc(&lsps_dec[i][0], &ak_dec[i][0], order);                
                    aks_to_M2(fft_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i], 
                              &snr, 0, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw);
                    apply_lpc_correction(&model_dec[i]);
                    #ifdef DUMP
                    dump_lsp_(&lsps_dec[i][0]);
                    dump_ak_(&ak_dec[i][0], order);
                    sum_snr += snr;
                    dump_quantised_model(&model_dec[i]);
                    #endif
                }

                if (phase0)
                    phase_synth_zero_order(fft_fwd_cfg, &model_dec[i], ex_phase, Aw);	
                synth_one_frame(fft_inv_cfg, buf, &model_dec[i], Sn_, Pn, prede, &de_mem, gain);
                if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
            }
                    /*
            for(i=0; i<decimate; i++) {
                    printf("%d Wo: %f L: %d v: %d\n", frames, model_dec[i].Wo, model_dec[i].L, model_dec[i].voiced);
            }
            if (frames == 4*50)
                exit(0);
                    */
            /* update memories for next frame ----------------------------*/

            prev_model_dec = model_dec[decimate-1];
            prev_e_dec = e_dec[decimate-1];
            for(i=0; i<LPC_ORD; i++)
                prev_lsps_dec[i] = lsps_dec[decimate-1][i];        
       }

    }

    /*----------------------------------------------------------------*\

                            End Main Loop

    \*----------------------------------------------------------------*/

    fclose(fin);

    if (fout != NULL)
	fclose(fout);

    if (lpc_model)
    	printf("SNR av = %5.2f dB\n", sum_snr/frames);

    if (phaseexp)
	phase_experiment_destroy(pexp);
    if (ampexp)
	amp_experiment_destroy(aexp);
    #ifdef DUMP
    if (dump)
	dump_off();
    #endif

    if (hand_voicing)
	fclose(fvoicing);

    nlp_destroy(nlp_states);

    return 0;
}
Example #10
0
float lpc_model_amplitudes(
  float  Sn[],			/* Input frame of speech samples */
  float  w[],			
  MODEL *model,			/* sinusoidal model parameters */
  int    order,                 /* LPC model order */
  int    lsp_quant,             /* optional LSP quantisation if non-zero */
  float  ak[]                   /* output aks */
)
{
  float Wn[M];
  float R[LPC_MAX+1];
  float E;
  int   i,j;
  float snr;	
  float lsp[LPC_MAX];
  float lsp_hz[LPC_MAX];
  float lsp_[LPC_MAX];
  int   roots;                  /* number of LSP roots found */
  int   index;
  float se;
  int   k,m;
  const float * cb;
  float wt[LPC_MAX];

  for(i=0; i<M; i++)
    Wn[i] = Sn[i]*w[i];
  autocorrelate(Wn,R,M,order);
  levinson_durbin(R,ak,order);
  
  E = 0.0;
  for(i=0; i<=order; i++)
      E += ak[i]*R[i];
 
  for(i=0; i<order; i++)
      wt[i] = 1.0;

  if (lsp_quant) {
    roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
    if (roots != order)
	printf("LSP roots not found\n");

    /* convert from radians to Hz to make quantisers more
       human readable */

    for(i=0; i<order; i++)
	lsp_hz[i] = (4000.0/PI)*lsp[i];
    
    /* simple uniform scalar quantisers */

    for(i=0; i<10; i++) {
	k = lsp_cb[i].k;
	m = lsp_cb[i].m;
	cb = lsp_cb[i].cb;
	index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
	lsp_hz[i] = cb[index*k];
    }
    
    /* experiment: simulating uniform quantisation error
    for(i=0; i<order; i++)
	lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
    */

    for(i=0; i<order; i++)
	lsp[i] = (PI/4000.0)*lsp_hz[i];

    /* Bandwidth Expansion (BW).  Prevents any two LSPs getting too
       close together after quantisation.  We know from experiment
       that LSP quantisation errors < 12.5Hz (25Hz setp size) are
       inaudible so we use that as the minimum LSP separation.
    */

    for(i=1; i<5; i++) {
	if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
	    lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
    }

    /* as quantiser gaps increased, larger BW expansion was required
       to prevent twinkly noises */

    for(i=5; i<8; i++) {
	if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
	    lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
    }
    for(i=8; i<order; i++) {
	if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
	    lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
    }

    for(j=0; j<order; j++) 
	lsp_[j] = lsp[j];

    lsp_to_lpc(lsp_, ak, order);
#ifdef DUMP
    dump_lsp(lsp);
#endif
  }

#ifdef DUMP
  dump_E(E);
#endif
  #ifdef SIM_QUANT
  /* simulated LPC energy quantisation */
  {
      float e = 10.0*log10(E);
      e += 2.0*(1.0 - 2.0*(float)rand()/RAND_MAX);
      E = pow(10.0,e/10.0);
  }
  #endif

  aks_to_M2(ak,order,model,E,&snr, 1);   /* {ak} -> {Am} LPC decode */

  return snr;
}
double PitchDetector::detectAcfPitchForBlock (float* samples, int numSamples)
{
    const int minSample = int (sampleRate / maxFrequency);
    const int maxSample = int (sampleRate / minFrequency);

    lowFilter.reset();
    highFilter.reset();
    lowFilter.processSamples (samples, numSamples);
    highFilter.processSamples (samples, numSamples);
    
    autocorrelate (samples, numSamples, buffer1.getData());
    normalise (buffer1.getData(), buffer1.getSize());

//    float max = 0.0f;
//    int sampleIndex = 0;
//    for (int i = minSample; i < maxSample; ++i)
//    {
//        const float sample = buffer1.getData()[i];
//        if (sample > max)
//        {
//            max = sample;
//            sampleIndex = i;
//        }
//    }

    float* bufferData = buffer1.getData();
//    const int bufferSize = buffer1.getSize();
    int firstNegativeZero = 0;
    
    // first peak method
    for (int i = 0; i < numSamples - 1; ++i)
    {
        if (bufferData[i] >= 0.0f && bufferData[i + 1] < 0.0f)
        {
            firstNegativeZero = i;
            break;
        }
    }
    
    // apply gain ramp
//    float rampDelta = 1.0f / numSamples;
//    float rampLevel = 1.0f;
//    for (int i = 0; i < numSamples - 1; ++i)
//    {
//        bufferData[i] *= cubeNumber (rampLevel);
//        rampLevel -= rampDelta;
//    }
    
    float max = -1.0f;
    int sampleIndex = 0;
    for (int i = jmax (firstNegativeZero, minSample); i < maxSample; ++i)
    {
        if (bufferData[i] > max)
        {
            max = bufferData[i];
            sampleIndex = i;
        }
    }
    
    
//    buffer2.setSizeQuick (numSamples);
/*    autocorrelate (buffer1.getData(), buffer1.getSize(), buffer2.getData());
    normalise (buffer2.getData(), buffer2.getSize());*/
    //buffer2.quickCopy (buffer1.getData(), buffer1.getSize());
//    differentiate (buffer1.getData(), buffer1.getSize(), buffer2.getData());
//    normalise (buffer2.getData()+2, buffer2.getSize()-2);
//    differentiate (buffer2.getData(), buffer2.getSize(), buffer2.getData());
    
/*    for (int i = minSample + 1; i < maxSample - 1; ++i)
    {
        const float previousSample = buffer2.getData()[i - 1];
        const float sample = buffer2.getData()[i];
        const float nextSample = buffer2.getData()[i + 1];

        if (sample > previousSample
            && sample > nextSample
            && sample > 0.5f)
            sampleIndex = i;
    }*/
    
    //differentiate (buffer2.getData(), buffer2.getSize(), buffer2.getData());
    //normalise (buffer2.getData() + minSample, buffer2.getSize() - minSample);
    
//    float min = 0.0f;
//    int sampleIndex = 0;
//    for (int i = minSample; i < maxSample; ++i)
//    {
//        const float sample = buffer2.getData()[i];
//        if (sample < min)
//        {
//            min = sample;
//            sampleIndex = i;
//        }
//    }


    if (sampleIndex > 0)
        return sampleRate / sampleIndex;
    else
        return 0.0;
}
main(int argc, char *argv[])
{ 

  FILE *fp;
  int fd,arg;
  snd_pcm_t *handle;
  snd_pcm_hw_params_t *hw_params;
  int rate=8000;
  float f[WINDOW],hann[WINDOW],w[WINDOW],w2[WINDOW],s0,s1=0,tot;
  float ac[ORDER+1],lcp[ORDER+1],lsp[ORDER],l[ORDER],weight[ORDER],delta,d;
  short sample,s[160],buf[2000];
  int i,j,n,b,toggle=1;
  float e,laste=0;
  int ebit=ETOPBIT, ebuff=0;
  int sound=0; // boolean start/stop
  float f2[FFT],min;
  float real[FFT],imag[FFT];
  int dummy[100000];
  float amp[WINDOW],pha[WINDOW];
  int frame=0;
  SpeexPreprocessState *st;
 
  for (i=0; i<8; i++) report[i]=0; 
  st = speex_preprocess_state_init(160, 8000);
  i=1;
  speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i);
//  i=1;
//  speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB, &i);
//  e=.0;
//  speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &e);
//  e=.0;
//  speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &e);
  
  
  setup_twiddles(realtwiddle,imtwiddle,FFT);
  
  for (i=0; i<WINDOW; i++) f[i]=0;
  for (i=0; i<ORDER; i++) {last[i]=0; last2[i]=0;}
  
  for(i=0; i<WINDOW; i++) hann[i]=0.5-0.5*cos(2.0*M_PI*i/(WINDOW-1));
  
  if (argc==2) training=atoi(argv[1]);
  fprintf(stderr,"training=%i\n",training); //  exit(0);
  
  cbsize=0; start[0]=0; size[0]=0;
  if (training==0) if (fp=fopen("cb.txt","r")) {
    while(!feof(fp)) {
      fscanf(fp,"%i\n",&size[cbsize]);
      for (i=start[cbsize]; i<start[cbsize]+size[cbsize]; i++) {
        for (n=1; n<FF2-1; n++) fscanf(fp,"%f,",&cb[i][n]); fscanf(fp,"\n");
      }
      start[cbsize+1]=start[cbsize]+size[cbsize];  cbsize++; 
    }
    fclose(fp);
  }
  //for (i=0; i<cbsize; i++) printf("%i,\n",size[i]); exit(0);
  
  //---------------------------------
  
  
  fp=fopen("/tmp/b.raw","w");
  snd_pcm_open(&handle, "default", SND_PCM_STREAM_CAPTURE, 0);	
  snd_pcm_hw_params_malloc(&hw_params);			 
  snd_pcm_hw_params_any(handle, hw_params);
  snd_pcm_hw_params_set_access(handle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED);	
  snd_pcm_hw_params_set_format(handle, hw_params, SND_PCM_FORMAT_S16_LE);	
  snd_pcm_hw_params_set_rate_near(handle, hw_params, &rate, 0);
  snd_pcm_hw_params_set_channels(handle, hw_params, 2);
  snd_pcm_hw_params(handle, hw_params);
  snd_pcm_hw_params_free(hw_params);
  snd_pcm_prepare(handle);
  
  //printf("sleep 1...\n"); sleep(1); printf("OK, go....\n");
  while(1) {
    for (i=0; i<WINDOW-STEP; i++) f[i]=f[i+STEP]; // shift samples down
    if (toggle) {
      //read(fd,s,160*2);
      snd_pcm_readi(handle, buf, 160);
      for (i=0; i<160; i++) s[i]=buf[i*2];
      speex_preprocess_run(st, s);
    }
    else bcopy(&s[80],s,80*2);
    toggle=!toggle;
    for (i=WINDOW-STEP,j=0; i<WINDOW; i++,j++) {
      sample=s[j]; s0=(float)sample;
      f[i]=s0-s1*EMPH; s1=s0; // 1.0 pre-emphasis
      fwrite(&sample,2,1,fp);
  
    }
    for (i=0; i<WINDOW; i++) w[i]=f[i];
    // remove any DC level....
    tot=0; for (i=0; i<WINDOW; i++) tot+=w[i]; tot/=WINDOW;
    for (i=0; i<WINDOW; i++) w[i]-=tot;
    
    for (i=0; i<WINDOW; i++) w[i]*=hann[i]; // window data
 
    autocorrelate(w,ac,WINDOW,ORDER);
    wld(&lpc[1],ac,ORDER); lpc[0]=1.0;
        // e=ac[0];
    e=0;for(i=0; i<=ORDER; i++) e+=ac[i]*lpc[i];   if (e<0) e=0;

    if (e>TOL_OFF) ebuff|=ebit; else ebuff&=~ebit; // update energy bit-buffer
    ebit>>=1; if (ebit==0) ebit=ETOPBIT; // circular shift
 
    for (i=0; i<FFT; i++) {real[i]=0; imag[i]=0;}
    for (i=0; i<=ORDER; i++) real[i]=lpc[i];
    simple_fft(real,imag,realtwiddle,imtwiddle,FFT);
    for (i=0; i<FF2; i++) {
      b=bin[i];
      f2[i]=powf(real[b]*real[b]+imag[b]*imag[b],-0.5);
      //f2[i]=powf(f2[i],0.333333);
      //f2[i]=powf(f2[i],1.2);
      f2[i]=logf(f2[i]);
    }

   
    // spectral tilt compensation...
    for (i=1; i<FF2; i++) f2[i]=f2[i]*(float)(i+TILT)/TILT;

 
    // fold down to 9 bins...
/*
    if (f2[FF2-2]>f2[FF2-3]) f2[FF2-3]=f2[FF2-2]; f2[FF2-2]=0;
    if (f2[FF2-4]>f2[FF2-5]) f2[FF2-5]=f2[FF2-4]; f2[FF2-4]=0;
    if (f2[FF2-9]>f2[FF2-10]) f2[FF2-10]=f2[FF2-9]; f2[FF2-9]=0;
    if (f2[FF2-11]>f2[FF2-12]) f2[FF2-12]=f2[FF2-11]; f2[FF2-11]=0;
    if (f2[FF2-13]>f2[FF2-14]) f2[FF2-14]=f2[FF2-13]; f2[FF2-13]=0;
    if (f2[FF2-15]>f2[FF2-16]) f2[FF2-16]=f2[FF2-15]; f2[FF2-15]=0;
*/
    for (i=0; i<FF2; i++) {
      if (f2[i]>6.0) f2[i]=6.0;
      f2[i]*=100.0;
    }
    
if (TRACE) { fprintf(stderr,"%.f,",e);  for (i=1; i<FF2-1; i++) fprintf(stderr,"%.f,",f2[i]); fprintf(stderr,"\n");}

   
    // calculate frame delta....
    delta=0; for (i=1; i<FF2-1; i++) {d=f2[i]-last[i]; delta+=d*d;}
    //printf("delta=%f\n",delta);
 
    if (sound==0 && e>TOL_ON && frame>200)  { // start recording...
      bcopy(last2,&word[wsize],FF2*4); wsize++;
      bcopy(last,&word[wsize],FF2*4); wsize++;
      sound=1; wsize=0;
      bcopy(f2,&word[wsize],FF2*4); wsize++;
      bcopy(last,last2,FF2*4);
      bcopy(f2,last,FF2*4);
    }
    else if (sound==1 && e>TOL_OFF) { // continue reading word...
      bcopy(f2,&word[wsize],FF2*4); wsize++; if (wsize>200) wsize=200;
      bcopy(last,last2,FF2*4);
      bcopy(f2,last,FF2*4);
    }
    else if (sound==1 && ebuff==0) { // finised reading word
       // wsize-=8; // remove training silence (2 frame buffer)
       if (wsize>4 && wsize<50) {
         if (training>0) train();
         else   closest(); 
       }     
       sound=0; wsize=0; bcopy(last,last2,FF2*4); bcopy(f2,last,FF2*4);
    }
    
   //for (i=1; i<FF2-1; i++) printf("%.0f,",f2[i]); printf("  e=%f\n",e);
   laste=e;
   frame++; if (frame==37800) exit(0);
  }
}