void lsp_quant_nb(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits) { int i; int id; spx_word16_t quant_weight[10]; for (i=0;i<order;i++) qlsp[i]=lsp[i]; compute_quant_weights(qlsp, quant_weight, order); for (i=0;i<order;i++) qlsp[i]=SUB16(qlsp[i],LSP_LINEAR(i)); #ifndef FIXED_POINT for (i=0;i<order;i++) qlsp[i] = LSP_SCALE*qlsp[i]; #endif id = lsp_quant(qlsp, cdbk_nb, NB_CDBK_SIZE, order); speex_bits_pack(bits, id, 6); for (i=0;i<order;i++) qlsp[i]*=2; id = lsp_weight_quant(qlsp, quant_weight, cdbk_nb_low1, NB_CDBK_SIZE_LOW1, 5); speex_bits_pack(bits, id, 6); for (i=0;i<5;i++) qlsp[i]*=2; id = lsp_weight_quant(qlsp, quant_weight, cdbk_nb_low2, NB_CDBK_SIZE_LOW2, 5); speex_bits_pack(bits, id, 6); id = lsp_weight_quant(qlsp+5, quant_weight+5, cdbk_nb_high1, NB_CDBK_SIZE_HIGH1, 5); speex_bits_pack(bits, id, 6); for (i=5;i<10;i++) qlsp[i]*=2; id = lsp_weight_quant(qlsp+5, quant_weight+5, cdbk_nb_high2, NB_CDBK_SIZE_HIGH2, 5); speex_bits_pack(bits, id, 6); #ifdef FIXED_POINT for (i=0;i<order;i++) qlsp[i]=PSHR16(qlsp[i],2); #else for (i=0;i<order;i++) qlsp[i]=qlsp[i] * .00097656; #endif for (i=0;i<order;i++) qlsp[i]=lsp[i]-qlsp[i]; }
static switch_status_t switch_speex_encode(switch_codec_t *codec, switch_codec_t *other_codec, void *decoded_data, uint32_t decoded_data_len, uint32_t decoded_rate, void *encoded_data, uint32_t *encoded_data_len, uint32_t *encoded_rate, unsigned int *flag) { struct speex_context *context = codec->private_info; short *buf; int is_speech = 1; if (!context) { return SWITCH_STATUS_FALSE; } buf = decoded_data; if (context->pp) { is_speech = speex_preprocess(context->pp, buf, NULL); } if (is_speech) { is_speech = speex_encode_int(context->encoder_state, buf, &context->encoder_bits) || !context->codec_settings.dtx; } else { speex_bits_pack(&context->encoder_bits, 0, 5); } if (is_speech) { switch_clear_flag(context, SWITCH_CODEC_FLAG_SILENCE); *flag |= SWITCH_CODEC_FLAG_SILENCE_STOP; } else { if (switch_test_flag(context, SWITCH_CODEC_FLAG_SILENCE)) { *encoded_data_len = 0; *flag |= SWITCH_CODEC_FLAG_SILENCE; return SWITCH_STATUS_SUCCESS; } switch_set_flag(context, SWITCH_CODEC_FLAG_SILENCE); *flag |= SWITCH_CODEC_FLAG_SILENCE_START; } speex_bits_pack(&context->encoder_bits, 15, 5); *encoded_data_len = speex_bits_write(&context->encoder_bits, (char *) encoded_data, context->encoder_frame_size); speex_bits_reset(&context->encoder_bits); (*encoded_data_len)--; return SWITCH_STATUS_SUCCESS; }
void lsp_quant_high(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits) { int i; int id; spx_word16_t quant_weight[10]; for (i=0;i<order;i++) qlsp[i]=lsp[i]; compute_quant_weights(qlsp, quant_weight, order); /* quant_weight[0] = 10/(qlsp[1]-qlsp[0]); quant_weight[order-1] = 10/(qlsp[order-1]-qlsp[order-2]); for (i=1;i<order-1;i++) { tmp1 = 10/(qlsp[i]-qlsp[i-1]); tmp2 = 10/(qlsp[i+1]-qlsp[i]); quant_weight[i] = tmp1 > tmp2 ? tmp1 : tmp2; }*/ for (i=0;i<order;i++) qlsp[i]=SUB16(qlsp[i],LSP_LINEAR_HIGH(i)); #ifndef FIXED_POINT for (i=0;i<order;i++) qlsp[i] = qlsp[i]*LSP_SCALE; #endif id = lsp_quant(qlsp, high_lsp_cdbk, 64, order); speex_bits_pack(bits, id, 6); for (i=0;i<order;i++) qlsp[i]*=2; id = lsp_weight_quant(qlsp, quant_weight, high_lsp_cdbk2, 64, order); speex_bits_pack(bits, id, 6); #ifdef FIXED_POINT for (i=0;i<order;i++) qlsp[i] = PSHR16(qlsp[i],1); #else for (i=0;i<order;i++) qlsp[i] = qlsp[i]*0.0019531; #endif for (i=0;i<order;i++) qlsp[i]=lsp[i]-qlsp[i]; }
static int encode(struct auenc_state *st, uint8_t *buf, size_t *len, const int16_t *sampv, size_t sampc) { const size_t n = st->channels * st->frame_size; int ret, r; if (*len < 128) return ENOMEM; /* VAD */ if (!sampv || !sampc) { /* 5 zeros interpreted by Speex as silence (submode 0) */ speex_bits_pack(&st->bits, 0, 5); goto out; } /* Handle multiple Speex frames in one RTP packet */ while (sampc > 0) { /* Assume stereo */ if (2 == st->channels) { speex_encode_stereo_int((int16_t *)sampv, st->frame_size, &st->bits); } ret = speex_encode_int(st->enc, (int16_t *)sampv, &st->bits); if (1 != ret) { warning("speex: speex_encode_int: ret=%d\n", ret); } sampc -= n; sampv += n; } out: /* Terminate bit stream */ speex_bits_pack(&st->bits, 15, 5); r = speex_bits_write(&st->bits, (char *)buf, (int)*len); *len = r; speex_bits_reset(&st->bits); return 0; }
EXPORT void speex_encode_stereo(float *data, int frame_size, SpeexBits * bits) { int i, tmp; float e_left = 0, e_right = 0, e_tot = 0; float balance, e_ratio; for (i = 0; i < frame_size; i++) { e_left += ((float)data[2 * i]) * data[2 * i]; e_right += ((float)data[2 * i + 1]) * data[2 * i + 1]; data[i] = .5 * (((float)data[2 * i]) + data[2 * i + 1]); e_tot += ((float)data[i]) * data[i]; } balance = (e_left + 1) / (e_right + 1); e_ratio = e_tot / (1 + e_left + e_right); /*Quantization */ speex_bits_pack(bits, 14, 5); speex_bits_pack(bits, SPEEX_INBAND_STEREO, 4); balance = 4 * log(balance); /*Pack sign */ if (balance > 0) speex_bits_pack(bits, 0, 1); else speex_bits_pack(bits, 1, 1); balance = floor(.5 + fabs(balance)); if (balance > 30) balance = 31; speex_bits_pack(bits, (int)balance, 5); /* FIXME: this is a hack */ tmp = scal_quant(e_ratio * Q15_ONE, e_ratio_quant_bounds, 4); speex_bits_pack(bits, tmp, 2); }
void speex_encode_stereo(float *data, int frame_size, SpeexBits *bits) { int i, tmp; float e_left=0, e_right=0, e_tot=0; float balance, e_ratio; for (i=0;i<frame_size;i++) { e_left += data[2*i]*data[2*i]; e_right += data[2*i+1]*data[2*i+1]; data[i] = .5*(data[2*i]+data[2*i+1]); e_tot += data[i]*data[i]; } balance=(e_left+1)/(e_right+1); e_ratio = e_tot/(1+e_left+e_right); /*Quantization*/ speex_bits_pack(bits, 14, 5); speex_bits_pack(bits, SPEEX_INBAND_STEREO, 4); balance=4*log(balance); /*Pack sign*/ if (balance>0) speex_bits_pack(bits, 0, 1); else speex_bits_pack(bits, 1, 1); balance=floor(.5+fabs(balance)); if (balance>30) balance=31; speex_bits_pack(bits, (int)balance, 5); /*Quantize energy ratio*/ tmp=vq_index(&e_ratio, e_ratio_quant, 1, 4); speex_bits_pack(bits, tmp, 2); }
void split_cb_search_shape_sign( spx_sig_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_sig_t *r, SpeexBits *bits, char *stack, int complexity ) { int i,j,k,m,n,q; spx_word16_t *resp; #ifdef _USE_SSE __m128 *resp2; __m128 *E; #else spx_word16_t *resp2; spx_word32_t *E; #endif spx_word16_t *t; spx_sig_t *e, *r2; spx_word16_t *tmp; spx_word32_t *ndist, *odist; int *itmp; spx_word16_t **ot, **nt; int **nind, **oind; int *ind; const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; split_cb_params *params; int N=2; int *best_index; spx_word32_t *best_dist; int have_sign; N=complexity; if (N>10) N=10; ot=PUSH(stack, N, spx_word16_t*); nt=PUSH(stack, N, spx_word16_t*); oind=PUSH(stack, N, int*); nind=PUSH(stack, N, int*); params = (split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128); E = PUSH(stack, shape_cb_size>>2, __m128); #else resp2 = resp; E = PUSH(stack, shape_cb_size, spx_word32_t); #endif t = PUSH(stack, nsf, spx_word16_t); e = PUSH(stack, nsf, spx_sig_t); r2 = PUSH(stack, nsf, spx_sig_t); ind = PUSH(stack, nb_subvect, int); tmp = PUSH(stack, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot[i]=tmp; tmp += nsf; nt[i]=tmp; tmp += nsf; } best_index = PUSH(stack, N, int); best_dist = PUSH(stack, N, spx_word32_t); ndist = PUSH(stack, N, spx_word32_t); odist = PUSH(stack, N, spx_word32_t); itmp = PUSH(stack, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp; itmp+=nb_subvect; oind[i]=itmp; itmp+=nb_subvect; for (j=0;j<nb_subvect;j++) nind[i][j]=oind[i][j]=-1; } /* FIXME: make that adaptive? */ for (i=0;i<nsf;i++) t[i]=SHR(target[i],6); for (j=0;j<N;j++) for (i=0;i<nsf;i++) ot[j][i]=t[i]; /*for (i=0;i<nsf;i++) printf ("%d\n", (int)t[i]);*/ /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=-2; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; /*Find new n-best based on previous n-best j*/ if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { spx_word16_t *ct; spx_word32_t err=0; ct = ot[j]; /*update target*/ /*previous target*/ for (m=i*subvect_size;m<(i+1)*subvect_size;m++) t[m]=ct[m]; /* New code: update only enough of the target to calculate error*/ { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] -= res[m]; else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] += res[m]; } /*compute error (distance)*/ err=odist[j]; for (m=i*subvect_size;m<(i+1)*subvect_size;m++) err += t[m]*t[m]; /*update n-best list*/ if (err<ndist[N-1] || ndist[N-1]<-1) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) t[m]=ct[m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q])); #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],g*r[q]); #endif } for (m=0;m<N;m++) { if (err < ndist[m] || ndist[m]<-1) { for (n=N-1;n>m;n--) { for (q=(i+1)*subvect_size;q<nsf;q++) nt[n][q]=nt[n-1][q]; for (q=0;q<nb_subvect;q++) nind[n][q]=nind[n-1][q]; ndist[n]=ndist[n-1]; } for (q=(i+1)*subvect_size;q<nsf;q++) nt[m][q]=t[q]; for (q=0;q<nb_subvect;q++) nind[m][q]=oind[j][q]; nind[m][i]=best_index[k]; ndist[m]=err; break; } } } } if (i==0) break; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]+=e[j]; /* Update target */ syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]-=r2[j]; }
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ int pitch_search_3tap( spx_word16_t target[], /* Target vector */ spx_word16_t* sw, spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const void* par, int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits* bits, char* stack, spx_word16_t* exc2, spx_word16_t* r, int complexity, int cdbk_offset, int plc_tuning, spx_word32_t* cumul_gain ) { int i; int cdbk_index, pitch = 0, best_gain_index = 0; VARDECL(spx_sig_t * best_exc); VARDECL(spx_word16_t * new_target); VARDECL(spx_word16_t * best_target); int best_pitch = 0; spx_word32_t err, best_err = -1; int N; const ltp_params* params; const signed char* gain_cdbk; int gain_cdbk_size; int scaledown = 0; VARDECL(int * nbest); params = (const ltp_params*) par; gain_cdbk_size = 1 << params->gain_bits; gain_cdbk = params->gain_cdbk + 4 * gain_cdbk_size * cdbk_offset; N = complexity; if (N > 10) N = 10; if (N < 1) N = 1; ALLOC(nbest, N, int); params = (const ltp_params*) par; if (end < start) { speex_bits_pack(bits, 0, params->pitch_bits); speex_bits_pack(bits, 0, params->gain_bits); SPEEX_MEMSET(exc, 0, nsf); return start; } #ifdef FIXED_POINT /* Check if we need to scale everything down in the pitch search to avoid overflows */ for (i = 0; i < nsf; i++) { if (ABS16(target[i]) > 16383) { scaledown = 1; break; } } for (i = -end; i < nsf; i++) { if (ABS16(exc2[i]) > 16383) { scaledown = 1; break; } } #endif if (N > end - start + 1) N = end - start + 1; if (end != start) open_loop_nbest_pitch(sw, start, end, nsf, nbest, NULL, N, stack); else nbest[0] = start; ALLOC(best_exc, nsf, spx_sig_t); ALLOC(new_target, nsf, spx_word16_t); ALLOC(best_target, nsf, spx_word16_t); for (i = 0; i < N; i++) { pitch = nbest[i]; SPEEX_MEMSET(exc, 0, nsf); err = pitch_gain_search_3tap(target, ak, awk1, awk2, exc, gain_cdbk, gain_cdbk_size, pitch, p, nsf, bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain, scaledown); if (err < best_err || best_err < 0) { SPEEX_COPY(best_exc, exc, nsf); SPEEX_COPY(best_target, new_target, nsf); best_err = err; best_pitch = pitch; best_gain_index = cdbk_index; } } /*printf ("pitch: %d %d\n", best_pitch, best_gain_index);*/ speex_bits_pack(bits, best_pitch - start, params->pitch_bits); speex_bits_pack(bits, best_gain_index, params->gain_bits); #ifdef FIXED_POINT *cumul_gain = MULT16_32_Q13(SHL16(params->gain_cdbk[4 * best_gain_index + 3], 8), MAX32(1024, *cumul_gain)); #else *cumul_gain = 0.03125 * MAX32(1024, *cumul_gain) * params->gain_cdbk[4 * best_gain_index + 3]; #endif /*printf ("%f\n", cumul_gain);*/ /*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/ SPEEX_COPY(exc, best_exc, nsf); SPEEX_COPY(target, best_target, nsf); #ifdef FIXED_POINT /* Scale target back up if needed */ if (scaledown) { for (i = 0; i < nsf; i++) target[i] = SHL16(target[i], 1); } #endif return pitch; }
EXPORT void speex_encode_stereo_int(spx_int16_t * data, int frame_size, SpeexBits * bits) { int i, tmp; spx_word32_t e_left = 0, e_right = 0, e_tot = 0; spx_word32_t balance, e_ratio; spx_word32_t largest, smallest; int balance_id; #ifdef FIXED_POINT int shift; #endif /* In band marker */ speex_bits_pack(bits, 14, 5); /* Stereo marker */ speex_bits_pack(bits, SPEEX_INBAND_STEREO, 4); for (i = 0; i < frame_size; i++) { e_left += SHR32(MULT16_16(data[2 * i], data[2 * i]), 8); e_right += SHR32(MULT16_16(data[2 * i + 1], data[2 * i + 1]), 8); #ifdef FIXED_POINT /* I think this is actually unbiased */ data[i] = SHR16(data[2 * i], 1) + PSHR16(data[2 * i + 1], 1); #else data[i] = .5 * (((float)data[2 * i]) + data[2 * i + 1]); #endif e_tot += SHR32(MULT16_16(data[i], data[i]), 8); } if (e_left > e_right) { speex_bits_pack(bits, 0, 1); largest = e_left; smallest = e_right; } else { speex_bits_pack(bits, 1, 1); largest = e_right; smallest = e_left; } /* Balance quantization */ #ifdef FIXED_POINT shift = spx_ilog2(largest) - 15; largest = VSHR32(largest, shift - 4); smallest = VSHR32(smallest, shift); balance = DIV32(largest, ADD32(smallest, 1)); if (balance > 32767) balance = 32767; balance_id = scal_quant(EXTRACT16(balance), balance_bounds, 32); #else balance = (largest + 1.) / (smallest + 1.); balance = 4 * log(balance); balance_id = floor(.5 + fabs(balance)); if (balance_id > 30) balance_id = 31; #endif speex_bits_pack(bits, balance_id, 5); /* "coherence" quantisation */ #ifdef FIXED_POINT shift = spx_ilog2(e_tot); e_tot = VSHR32(e_tot, shift - 25); e_left = VSHR32(e_left, shift - 10); e_right = VSHR32(e_right, shift - 10); e_ratio = DIV32(e_tot, e_left + e_right + 1); #else e_ratio = e_tot / (1. + e_left + e_right); #endif tmp = scal_quant(EXTRACT16(e_ratio), e_ratio_quant_bounds, 4); /*fprintf (stderr, "%d %d %d %d\n", largest, smallest, balance_id, e_ratio); */ speex_bits_pack(bits, tmp, 2); }
static void split_cb_search_shape_sign_N1( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int update_target ) { int i,j,m,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int best_index; spx_word32_t best_dist; int have_sign; params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); /* FIXME: Do we still need to copy the target? */ SPEEX_COPY(t, target, nsf); compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (i=0;i<nb_subvect;i++) { spx_word16_t *x=t+subvect_size*i; /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); speex_bits_pack(bits,best_index,params->shape_bits+have_sign); { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } } /* Update excitation */ /* FIXME: We could update the excitation directly above */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
/*! \brief convert work buffer and produce output frame */ static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt) { struct speex_coder_pvt *tmp = pvt->pvt; int is_speech=1; int datalen = 0; /* output bytes */ int samples = 0; /* output samples */ /* We can't work on anything less than a frame in size */ if (pvt->samples < tmp->framesize) return NULL; speex_bits_reset(&tmp->bits); while (pvt->samples >= tmp->framesize) { #ifdef _SPEEX_TYPES_H /* Preprocess audio */ if (preproc) is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL); /* Encode a frame of data */ if (is_speech) { /* If DTX enabled speex_encode returns 0 during silence */ is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx; } else { /* 5 zeros interpreted by Speex as silence (submode 0) */ speex_bits_pack(&tmp->bits, 0, 5); } #else { float fbuf[1024]; int x; /* Convert to floating point */ for (x = 0; x < tmp->framesize; x++) fbuf[x] = tmp->buf[samples + x]; /* Encode a frame of data */ is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx; } #endif samples += tmp->framesize; pvt->samples -= tmp->framesize; } /* Move the data at the end of the buffer to the front */ if (pvt->samples) memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2); /* Use AST_FRAME_CNG to signify the start of any silence period */ if (is_speech) { tmp->silent_state = 0; } else { if (tmp->silent_state) { return NULL; } else { struct ast_frame frm = { .frametype = AST_FRAME_CNG, .src = pvt->t->name, }; /* * XXX I don't think the AST_FRAME_CNG code has ever * really worked for speex. There doesn't seem to be * any consumers of the frame type. Everyone that * references the type seems to pass the frame on. */ tmp->silent_state = 1; /* XXX what now ? format etc... */ return ast_frisolate(&frm); } } /* Terminate bit stream */ speex_bits_pack(&tmp->bits, 15, 5); datalen = speex_bits_write(&tmp->bits, pvt->outbuf.c, pvt->t->buf_size); return ast_trans_frameout(pvt, datalen, samples); } static void speextolin_destroy(struct ast_trans_pvt *arg) { struct speex_coder_pvt *pvt = arg->pvt; speex_decoder_destroy(pvt->speex); speex_bits_destroy(&pvt->bits); } static void lintospeex_destroy(struct ast_trans_pvt *arg) { struct speex_coder_pvt *pvt = arg->pvt; #ifdef _SPEEX_TYPES_H if (preproc) speex_preprocess_state_destroy(pvt->pp); #endif speex_encoder_destroy(pvt->speex); speex_bits_destroy(&pvt->bits); } static struct ast_translator speextolin = { .name = "speextolin", .src_codec = { .name = "speex", .type = AST_MEDIA_TYPE_AUDIO, .sample_rate = 8000, }, .dst_codec = { .name = "slin", .type = AST_MEDIA_TYPE_AUDIO, .sample_rate = 8000, }, .format = "slin",
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ int pitch_search_3tap( spx_sig_t target[], /* Target vector */ spx_sig_t *sw, spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const void *par, int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits *bits, char *stack, spx_sig_t *exc2, spx_word16_t *r, int complexity, int cdbk_offset, int plc_tuning ) { int i,j; int cdbk_index, pitch=0, best_gain_index=0; VARDECL(spx_sig_t *best_exc); VARDECL(spx_sig_t *new_target); VARDECL(spx_sig_t *best_target); int best_pitch=0; spx_word64_t err, best_err=-1; int N; const ltp_params *params; VARDECL(int *nbest); N=complexity; if (N>10) N=10; if (N<1) N=1; ALLOC(nbest, N, int); params = (const ltp_params*) par; if (end<start) { speex_bits_pack(bits, 0, params->pitch_bits); speex_bits_pack(bits, 0, params->gain_bits); for (i=0; i<nsf; i++) exc[i]=0; return start; } ALLOC(best_exc, nsf, spx_sig_t); ALLOC(new_target, nsf, spx_sig_t); ALLOC(best_target, nsf, spx_sig_t); if (N>end-start+1) N=end-start+1; open_loop_nbest_pitch(sw, start, end, nsf, nbest, NULL, N, stack); for (i=0; i<N; i++) { pitch=nbest[i]; for (j=0; j<nsf; j++) exc[j]=0; err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, par, pitch, p, nsf, bits, stack, exc2, r, new_target, &cdbk_index, cdbk_offset, plc_tuning); if (err<best_err || best_err<0) { for (j=0; j<nsf; j++) best_exc[j]=exc[j]; for (j=0; j<nsf; j++) best_target[j]=new_target[j]; best_err=err; best_pitch=pitch; best_gain_index=cdbk_index; } } /*printf ("pitch: %d %d\n", best_pitch, best_gain_index);*/ speex_bits_pack(bits, best_pitch-start, params->pitch_bits); speex_bits_pack(bits, best_gain_index, params->gain_bits); /*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/ for (i=0; i<nsf; i++) exc[i]=best_exc[i]; for (i=0; i<nsf; i++) target[i]=best_target[i]; return pitch; }
int main(int argc, char **argv) { int nb_samples, total_samples=0, nb_encoded; int c; int option_index = 0; char *inFile, *outFile; FILE *fin, *fout; short input[MAX_FRAME_SIZE]; spx_int32_t frame_size; int quiet=0; spx_int32_t vbr_enabled=0; spx_int32_t vbr_max=0; int abr_enabled=0; spx_int32_t vad_enabled=0; spx_int32_t dtx_enabled=0; int nbBytes; const SpeexMode *mode=NULL; int modeID = -1; void *st; SpeexBits bits; char cbits[MAX_FRAME_BYTES]; int with_skeleton = 0; struct option long_options[] = { {"wideband", no_argument, NULL, 0}, {"ultra-wideband", no_argument, NULL, 0}, {"narrowband", no_argument, NULL, 0}, {"vbr", no_argument, NULL, 0}, {"vbr-max-bitrate", required_argument, NULL, 0}, {"abr", required_argument, NULL, 0}, {"vad", no_argument, NULL, 0}, {"dtx", no_argument, NULL, 0}, {"quality", required_argument, NULL, 0}, {"bitrate", required_argument, NULL, 0}, {"nframes", required_argument, NULL, 0}, {"comp", required_argument, NULL, 0}, #ifdef USE_SPEEXDSP {"denoise", no_argument, NULL, 0}, {"agc", no_argument, NULL, 0}, #endif {"no-highpass", no_argument, NULL, 0}, {"skeleton",no_argument,NULL, 0}, {"help", no_argument, NULL, 0}, {"quiet", no_argument, NULL, 0}, {"le", no_argument, NULL, 0}, {"be", no_argument, NULL, 0}, {"8bit", no_argument, NULL, 0}, {"16bit", no_argument, NULL, 0}, {"stereo", no_argument, NULL, 0}, {"rate", required_argument, NULL, 0}, {"version", no_argument, NULL, 0}, {"version-short", no_argument, NULL, 0}, {"comment", required_argument, NULL, 0}, {"author", required_argument, NULL, 0}, {"title", required_argument, NULL, 0}, {"print-rate", no_argument, NULL, 0}, {0, 0, 0, 0} }; int print_bitrate=0; spx_int32_t rate=0; spx_int32_t size; int chan=1; int fmt=16; spx_int32_t quality=-1; float vbr_quality=-1; int lsb=1; ogg_stream_state os; ogg_stream_state so; /* ogg stream for skeleton bitstream */ ogg_page og; ogg_packet op; int bytes_written=0, ret, result; int id=-1; SpeexHeader header; int nframes=1; spx_int32_t complexity=3; const char* speex_version; char vendor_string[64]; char *comments; int comments_length; int close_in=0, close_out=0; int eos=0; spx_int32_t bitrate=0; double cumul_bits=0, enc_frames=0; char first_bytes[12]; int wave_input=0; spx_int32_t tmp; #ifdef USE_SPEEXDSP SpeexPreprocessState *preprocess = NULL; int denoise_enabled=0, agc_enabled=0; #endif int highpass_enabled=1; int output_rate=0; spx_int32_t lookahead = 0; speex_lib_ctl(SPEEX_LIB_GET_VERSION_STRING, (void*)&speex_version); snprintf(vendor_string, sizeof(vendor_string), "Encoded with Speex %s", speex_version); comment_init(&comments, &comments_length, vendor_string); /*Process command-line options*/ while(1) { c = getopt_long (argc, argv, "nwuhvV", long_options, &option_index); if (c==-1) break; switch(c) { case 0: if (strcmp(long_options[option_index].name,"narrowband")==0) { modeID = SPEEX_MODEID_NB; } else if (strcmp(long_options[option_index].name,"wideband")==0) { modeID = SPEEX_MODEID_WB; } else if (strcmp(long_options[option_index].name,"ultra-wideband")==0) { modeID = SPEEX_MODEID_UWB; } else if (strcmp(long_options[option_index].name,"vbr")==0) { vbr_enabled=1; } else if (strcmp(long_options[option_index].name,"vbr-max-bitrate")==0) { vbr_max=atoi(optarg); if (vbr_max<1) { fprintf (stderr, "Invalid VBR max bit-rate value: %d\n", vbr_max); exit(1); } } else if (strcmp(long_options[option_index].name,"abr")==0) { abr_enabled=atoi(optarg); if (!abr_enabled) { fprintf (stderr, "Invalid ABR value: %d\n", abr_enabled); exit(1); } } else if (strcmp(long_options[option_index].name,"vad")==0) { vad_enabled=1; } else if (strcmp(long_options[option_index].name,"dtx")==0) { dtx_enabled=1; } else if (strcmp(long_options[option_index].name,"quality")==0) { quality = atoi (optarg); vbr_quality=atof(optarg); } else if (strcmp(long_options[option_index].name,"bitrate")==0) { bitrate = atoi (optarg); } else if (strcmp(long_options[option_index].name,"nframes")==0) { nframes = atoi (optarg); if (nframes<1) nframes=1; if (nframes>10) nframes=10; } else if (strcmp(long_options[option_index].name,"comp")==0) { complexity = atoi (optarg); #ifdef USE_SPEEXDSP } else if (strcmp(long_options[option_index].name,"denoise")==0) { denoise_enabled=1; } else if (strcmp(long_options[option_index].name,"agc")==0) { agc_enabled=1; #endif } else if (strcmp(long_options[option_index].name,"no-highpass")==0) { highpass_enabled=0; } else if (strcmp(long_options[option_index].name,"skeleton")==0) { with_skeleton=1; } else if (strcmp(long_options[option_index].name,"help")==0) { usage(); exit(0); } else if (strcmp(long_options[option_index].name,"quiet")==0) { quiet = 1; } else if (strcmp(long_options[option_index].name,"version")==0) { version(); exit(0); } else if (strcmp(long_options[option_index].name,"version-short")==0) { version_short(); exit(0); } else if (strcmp(long_options[option_index].name,"print-rate")==0) { output_rate=1; } else if (strcmp(long_options[option_index].name,"le")==0) { lsb=1; } else if (strcmp(long_options[option_index].name,"be")==0) { lsb=0; } else if (strcmp(long_options[option_index].name,"8bit")==0) { fmt=8; } else if (strcmp(long_options[option_index].name,"16bit")==0) { fmt=16; } else if (strcmp(long_options[option_index].name,"stereo")==0) { chan=2; } else if (strcmp(long_options[option_index].name,"rate")==0) { rate=atoi (optarg); } else if (strcmp(long_options[option_index].name,"comment")==0) { if (!strchr(optarg, '=')) { fprintf (stderr, "Invalid comment: %s\n", optarg); fprintf (stderr, "Comments must be of the form name=value\n"); exit(1); } comment_add(&comments, &comments_length, NULL, optarg); } else if (strcmp(long_options[option_index].name,"author")==0) { comment_add(&comments, &comments_length, "author=", optarg); } else if (strcmp(long_options[option_index].name,"title")==0) { comment_add(&comments, &comments_length, "title=", optarg); } break; case 'n': modeID = SPEEX_MODEID_NB; break; case 'h': usage(); exit(0); break; case 'v': version(); exit(0); break; case 'V': print_bitrate=1; break; case 'w': modeID = SPEEX_MODEID_WB; break; case 'u': modeID = SPEEX_MODEID_UWB; break; case '?': usage(); exit(1); break; } } if (argc-optind!=2) { usage(); exit(1); } inFile=argv[optind]; outFile=argv[optind+1]; /*Initialize Ogg stream struct*/ srand(time(NULL)); if (ogg_stream_init(&os, rand())==-1) { fprintf(stderr,"Error: stream init failed\n"); exit(1); } if (with_skeleton && ogg_stream_init(&so, rand())==-1) { fprintf(stderr,"Error: stream init failed\n"); exit(1); } if (strcmp(inFile, "-")==0) { #if defined WIN32 || defined _WIN32 _setmode(_fileno(stdin), _O_BINARY); #elif defined OS2 _fsetmode(stdin,"b"); #endif fin=stdin; } else { fin = fopen(inFile, "rb"); if (!fin) { perror(inFile); exit(1); } close_in=1; } { if (fread(first_bytes, 1, 12, fin) != 12) { perror("short file"); exit(1); } if (strncmp(first_bytes,"RIFF",4)==0 || strncmp(first_bytes,"riff",4)==0) { if (read_wav_header(fin, &rate, &chan, &fmt, &size)==-1) exit(1); wave_input=1; lsb=1; /* CHECK: exists big-endian .wav ?? */ } } if (modeID==-1 && !rate) { /* By default, use narrowband/8 kHz */ modeID = SPEEX_MODEID_NB; rate=8000; } else if (modeID!=-1 && rate) { mode = speex_lib_get_mode (modeID); if (rate>48000) { fprintf (stderr, "Error: sampling rate too high: %d Hz, try down-sampling\n", rate); exit(1); } else if (rate>25000) { if (modeID != SPEEX_MODEID_UWB) { fprintf (stderr, "Warning: Trying to encode in %s at %d Hz. I'll do it but I suggest you try ultra-wideband instead\n", mode->modeName , rate); } } else if (rate>12500) { if (modeID != SPEEX_MODEID_WB) { fprintf (stderr, "Warning: Trying to encode in %s at %d Hz. I'll do it but I suggest you try wideband instead\n", mode->modeName , rate); } } else if (rate>=6000) { if (modeID != SPEEX_MODEID_NB) { fprintf (stderr, "Warning: Trying to encode in %s at %d Hz. I'll do it but I suggest you try narrowband instead\n", mode->modeName , rate); } } else { fprintf (stderr, "Error: sampling rate too low: %d Hz\n", rate); exit(1); } } else if (modeID==-1) { if (rate>48000) { fprintf (stderr, "Error: sampling rate too high: %d Hz, try down-sampling\n", rate); exit(1); } else if (rate>25000) { modeID = SPEEX_MODEID_UWB; } else if (rate>12500) { modeID = SPEEX_MODEID_WB; } else if (rate>=6000) { modeID = SPEEX_MODEID_NB; } else { fprintf (stderr, "Error: Sampling rate too low: %d Hz\n", rate); exit(1); } } else if (!rate) { if (modeID == SPEEX_MODEID_NB) rate=8000; else if (modeID == SPEEX_MODEID_WB) rate=16000; else if (modeID == SPEEX_MODEID_UWB) rate=32000; } if (!quiet) if (rate!=8000 && rate!=16000 && rate!=32000) fprintf (stderr, "Warning: Speex is only optimized for 8, 16 and 32 kHz. It will still work at %d Hz but your mileage may vary\n", rate); if (!mode) mode = speex_lib_get_mode (modeID); speex_init_header(&header, rate, 1, mode); header.frames_per_packet=nframes; header.vbr=vbr_enabled; header.nb_channels = chan; { char *st_string="mono"; if (chan==2) st_string="stereo"; if (!quiet) fprintf (stderr, "Encoding %d Hz audio using %s mode (%s)\n", header.rate, mode->modeName, st_string); } /*fprintf (stderr, "Encoding %d Hz audio at %d bps using %s mode\n", header.rate, mode->bitrate, mode->modeName);*/ /*Initialize Speex encoder*/ st = speex_encoder_init(mode); if (strcmp(outFile,"-")==0) { #if defined WIN32 || defined _WIN32 _setmode(_fileno(stdout), _O_BINARY); #endif fout=stdout; } else { fout = fopen(outFile, "wb"); if (!fout) { perror(outFile); exit(1); } close_out=1; } speex_encoder_ctl(st, SPEEX_GET_FRAME_SIZE, &frame_size); speex_encoder_ctl(st, SPEEX_SET_COMPLEXITY, &complexity); speex_encoder_ctl(st, SPEEX_SET_SAMPLING_RATE, &rate); if (quality >= 0) { if (vbr_enabled) { if (vbr_max>0) speex_encoder_ctl(st, SPEEX_SET_VBR_MAX_BITRATE, &vbr_max); speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_quality); } else speex_encoder_ctl(st, SPEEX_SET_QUALITY, &quality); } if (bitrate) { if (quality >= 0 && vbr_enabled) fprintf (stderr, "Warning: --bitrate option is overriding --quality\n"); speex_encoder_ctl(st, SPEEX_SET_BITRATE, &bitrate); } if (vbr_enabled) { tmp=1; speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp); } else if (vad_enabled) { tmp=1; speex_encoder_ctl(st, SPEEX_SET_VAD, &tmp); } if (dtx_enabled) speex_encoder_ctl(st, SPEEX_SET_DTX, &tmp); if (dtx_enabled && !(vbr_enabled || abr_enabled || vad_enabled)) { fprintf (stderr, "Warning: --dtx is useless without --vad, --vbr or --abr\n"); } else if ((vbr_enabled || abr_enabled) && (vad_enabled)) { fprintf (stderr, "Warning: --vad is already implied by --vbr or --abr\n"); } if (with_skeleton) { fprintf (stderr, "Warning: Enabling skeleton output may cause some decoders to fail.\n"); } if (abr_enabled) { speex_encoder_ctl(st, SPEEX_SET_ABR, &abr_enabled); } speex_encoder_ctl(st, SPEEX_SET_HIGHPASS, &highpass_enabled); speex_encoder_ctl(st, SPEEX_GET_LOOKAHEAD, &lookahead); #ifdef USE_SPEEXDSP if (denoise_enabled || agc_enabled) { preprocess = speex_preprocess_state_init(frame_size, rate); speex_preprocess_ctl(preprocess, SPEEX_PREPROCESS_SET_DENOISE, &denoise_enabled); speex_preprocess_ctl(preprocess, SPEEX_PREPROCESS_SET_AGC, &agc_enabled); lookahead += frame_size; } #endif /* first packet should be the skeleton header. */ if (with_skeleton) { add_fishead_packet(&so); if ((ret = flush_ogg_stream_to_file(&so, fout))) { fprintf (stderr,"Error: failed skeleton (fishead) header to output stream\n"); exit(1); } else bytes_written += ret; } /*Write header*/ { int packet_size; op.packet = (unsigned char *)speex_header_to_packet(&header, &packet_size); op.bytes = packet_size; op.b_o_s = 1; op.e_o_s = 0; op.granulepos = 0; op.packetno = 0; ogg_stream_packetin(&os, &op); free(op.packet); while((result = ogg_stream_flush(&os, &og))) { if(!result) break; ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } op.packet = (unsigned char *)comments; op.bytes = comments_length; op.b_o_s = 0; op.e_o_s = 0; op.granulepos = 0; op.packetno = 1; ogg_stream_packetin(&os, &op); } /* fisbone packet should be write after all bos pages */ if (with_skeleton) { add_fisbone_packet(&so, os.serialno, &header); if ((ret = flush_ogg_stream_to_file(&so, fout))) { fprintf (stderr,"Error: failed writing skeleton (fisbone )header to output stream\n"); exit(1); } else bytes_written += ret; } /* writing the rest of the speex header packets */ while((result = ogg_stream_flush(&os, &og))) { if(!result) break; ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } free(comments); /* write the skeleton eos packet */ if (with_skeleton) { add_eos_packet_to_stream(&so); if ((ret = flush_ogg_stream_to_file(&so, fout))) { fprintf (stderr,"Error: failed writing skeleton header to output stream\n"); exit(1); } else bytes_written += ret; } speex_bits_init(&bits); if (!wave_input) { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, first_bytes, NULL); } else { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size); } if (nb_samples==0) eos=1; total_samples += nb_samples; nb_encoded = -lookahead; /*Main encoding loop (one frame per iteration)*/ while (!eos || total_samples>nb_encoded) { id++; /*Encode current frame*/ if (chan==2) speex_encode_stereo_int(input, frame_size, &bits); #ifdef USE_SPEEXDSP if (preprocess) speex_preprocess(preprocess, input, NULL); #endif speex_encode_int(st, input, &bits); nb_encoded += frame_size; if (print_bitrate) { int tmp; char ch=13; speex_encoder_ctl(st, SPEEX_GET_BITRATE, &tmp); fputc (ch, stderr); cumul_bits += tmp; enc_frames += 1; if (!quiet) { if (vad_enabled || vbr_enabled || abr_enabled) fprintf (stderr, "Bitrate is use: %d bps (average %d bps) ", tmp, (int)(cumul_bits/enc_frames)); else fprintf (stderr, "Bitrate is use: %d bps ", tmp); if (output_rate) printf ("%d\n", tmp); } } if (wave_input) { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size); } else { nb_samples = read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, NULL); } if (nb_samples==0) { eos=1; } if (eos && total_samples<=nb_encoded) op.e_o_s = 1; else op.e_o_s = 0; total_samples += nb_samples; if ((id+1)%nframes!=0) continue; speex_bits_insert_terminator(&bits); nbBytes = speex_bits_write(&bits, cbits, MAX_FRAME_BYTES); speex_bits_reset(&bits); op.packet = (unsigned char *)cbits; op.bytes = nbBytes; op.b_o_s = 0; /*Is this redundent?*/ if (eos && total_samples<=nb_encoded) op.e_o_s = 1; else op.e_o_s = 0; op.granulepos = (id+1)*frame_size-lookahead; if (op.granulepos>total_samples) op.granulepos = total_samples; /*printf ("granulepos: %d %d %d %d %d %d\n", (int)op.granulepos, id, nframes, lookahead, 5, 6);*/ op.packetno = 2+id/nframes; ogg_stream_packetin(&os, &op); /*Write all new pages (most likely 0 or 1)*/ while (ogg_stream_pageout(&os,&og)) { ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } } if ((id+1)%nframes!=0) { while ((id+1)%nframes!=0) { id++; speex_bits_pack(&bits, 15, 5); } nbBytes = speex_bits_write(&bits, cbits, MAX_FRAME_BYTES); op.packet = (unsigned char *)cbits; op.bytes = nbBytes; op.b_o_s = 0; op.e_o_s = 1; op.granulepos = (id+1)*frame_size-lookahead; if (op.granulepos>total_samples) op.granulepos = total_samples; op.packetno = 2+id/nframes; ogg_stream_packetin(&os, &op); } /*Flush all pages left to be written*/ while (ogg_stream_flush(&os, &og)) { ret = oe_write_page(&og, fout); if(ret != og.header_len + og.body_len) { fprintf (stderr,"Error: failed writing header to output stream\n"); exit(1); } else bytes_written += ret; } speex_encoder_destroy(st); speex_bits_destroy(&bits); ogg_stream_clear(&os); if (close_in) fclose(fin); if (close_out) fclose(fout); return 0; }
void AudioInput::encodeAudioFrame() { int iArg; ClientPlayer *p=ClientPlayer::get(g.uiSession); int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) { return; } sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psMic[i] * psMic[i]); dPeakMic=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakMic < -96.0f) dPeakMic = -96.0f; max = 1; for (i=0;i<iFrameSize;i++) max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max); dMaxMic = max; if (g.bEchoTest) { STACKVAR(float, fft, iFrameSize); STACKVAR(float, power, iFrameSize); float scale = 1.f / static_cast<float>(iFrameSize); for (i=0;i<iFrameSize;i++) fft[i] = static_cast<float>(psMic[i]) * scale; mumble_drft_forward(&fftTable, fft); float mp = 0.0f; int bin = 0; power[0]=power[1]=0.0f; for (i=2;i < iFrameSize / 2;i++) { power[i] = sqrtf(fft[2*i]*fft[2*i]+fft[2*i-1]*fft[2*i-1]); if (power[i] > mp) { bin = i; mp = power[i]; } } for (i=2;i< iFrameSize / 2;i++) { if (power[i] * 2 > mp) { if (i != bin) bin = 0; } } iBestBin = bin * 2; } if (iEchoChannels > 0) { sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]); dPeakSpeaker=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakSpeaker < -96.0f) dPeakSpeaker = -96.0f; } else { dPeakSpeaker = 0.0; } QMutexLocker l(&qmSpeex); if (bResetProcessor) { if (sppPreprocess) speex_preprocess_state_destroy(sppPreprocess); if (sesEcho) speex_echo_state_destroy(sesEcho); sppPreprocess = speex_preprocess_state_init(iFrameSize, SAMPLE_RATE); iArg = 1; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg); iArg = 30000; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg); float v = 30000.0f / static_cast<float>(g.s.iMinLoudness); iArg = lroundf(floorf(20.0f * log10f(v))); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg); iArg = g.s.iNoiseSuppress; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (iEchoChannels > 0) { sesEcho = speex_echo_state_init(iFrameSize, iFrameSize*10); iArg = SAMPLE_RATE; speex_echo_ctl(sesEcho, SPEEX_SET_SAMPLING_RATE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho); jitter_buffer_reset(jb); qWarning("AudioInput: ECHO CANCELLER ACTIVE"); } else { sesEcho = NULL; } iFrames = 0; speex_bits_reset(&sbBits); bResetProcessor = false; } int iIsSpeech; if (sesEcho) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); iIsSpeech=speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { iIsSpeech=speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; } sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal=20.0f*log10f(micLevel / 32768.0f); if (dPeakSignal < -96.0f) dPeakSignal = -96.0f; spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f); if (level > g.s.fVADmax) iIsSpeech = 1; else if (level > g.s.fVADmin && bPreviousVoice) iIsSpeech = 1; else iIsSpeech = 0; if (! iIsSpeech) { iHoldFrames++; if (iHoldFrames < g.s.iVoiceHold) iIsSpeech=1; } else { iHoldFrames = 0; } if (g.s.atTransmit == Settings::Continous) iIsSpeech = 1; else if (g.s.atTransmit == Settings::PushToTalk) iIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush)); iIsSpeech = iIsSpeech || (g.iPushToTalk > 0) || (g.iAltSpeak > 0); if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && p->bMute) || g.bPushToMute) { iIsSpeech = 0; } if (iIsSpeech) { iSilentFrames = 0; } else { iSilentFrames++; if (iSilentFrames > 200) iFrameCounter = 0; } if (p) p->setTalking(iIsSpeech, (g.iAltSpeak > 0)); if (g.s.bPushClick && (g.s.atTransmit == Settings::PushToTalk)) { AudioOutputPtr ao = g.ao; if (iIsSpeech && ! bPreviousVoice && ao) ao->playSine(400.0f,1200.0f,5); else if (ao && !iIsSpeech && bPreviousVoice && ao) ao->playSine(620.0f,-1200.0f,5); } if (! iIsSpeech && ! bPreviousVoice) { iBitrate = 0; if (g.s.iIdleTime && ! g.s.bMute && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) { emit doMute(); tIdle.restart(); } return; } bPreviousVoice = iIsSpeech; tIdle.restart(); if (! iIsSpeech) { memset(psMic, 0, sizeof(short) * iFrameSize); } if (g.s.bTransmitPosition && g.p && ! g.bCenterPosition && (iFrames == 0) && g.p->fetch()) { QByteArray q; QDataStream ds(&q, QIODevice::WriteOnly); ds << g.p->fPosition[0]; ds << g.p->fPosition[1]; ds << g.p->fPosition[2]; speex_bits_pack(&sbBits, 13, 5); speex_bits_pack(&sbBits, q.size(), 4); const unsigned char *d=reinterpret_cast<const unsigned char*>(q.data()); for (i=0;i<q.size();i++) { speex_bits_pack(&sbBits, d[i], 8); } } speex_encode_int(esEncState, psSource, &sbBits); iFrames++; speex_encoder_ctl(esEncState, SPEEX_GET_BITRATE, &iBitrate); flushCheck(); }
static void close_output(void) { int i; char cbits[MAX_FRAME_BYTES]; Speex_ctx *ctx = speex_ctx; int nbBytes; int ret; if (ctx == NULL) return; if (dpm.fd < 0) return; /* Write last frame */ if (speex_ctx != NULL) { if ((ctx->ogg_packetid + 1) % ctx->nframes != 0) { while ((ctx->ogg_packetid + 1) % ctx->nframes != 0) { ctx->ogg_packetid++; speex_bits_pack(&ctx->bits, 15, 5); } nbBytes = speex_bits_write(&ctx->bits, cbits, MAX_FRAME_BYTES); ctx->op.packet = (unsigned char *)cbits; ctx->op.bytes = nbBytes; ctx->op.b_o_s = 0; ctx->op.e_o_s = 1; ctx->op.granulepos = (ctx->ogg_packetid + ctx->nframes) * ctx->frame_size; ctx->op.packetno = 2 + ctx->ogg_packetid / ctx->nframes; ogg_stream_packetin(&ctx->os, &ctx->op); } for (i = ctx->input_idx; i < ctx->frame_size * ctx->channels; i++) { /* left is zero-cleaned */ ctx->input[i] = 0; } if (ctx->channels == 2) speex_encode_stereo(ctx->input, ctx->frame_size, &ctx->bits); /* Encode the frame */ speex_encode(ctx->state, ctx->input, &ctx->bits); speex_bits_insert_terminator(&ctx->bits); /* Copy the bits to an array of char that can be written */ nbBytes = speex_bits_write(&ctx->bits, cbits, MAX_FRAME_BYTES); /* Flush all the bits in the struct so we can encode a new frame */ speex_bits_reset(&ctx->bits); /* ogg packet setup */ ctx->op.packet = (unsigned char *)cbits; ctx->op.bytes = nbBytes; ctx->op.b_o_s = 0; ctx->op.e_o_s = 1; ctx->op.granulepos = (ctx->ogg_packetid + ctx->nframes) * ctx->frame_size; ctx->op.packetno = 2 + ctx->ogg_packetid / ctx->nframes; ogg_stream_packetin(&ctx->os, &ctx->op); /* Write all new pages (most likely 0 or 1) */ while (ogg_stream_pageout(&ctx->os, &ctx->og)) { ret = oe_write_page(&ctx->og, dpm.fd); if (ret != ctx->og.header_len + ctx->og.body_len) { ctl->cmsg(CMSG_ERROR, VERB_NORMAL, "failed writing header to output stream"); return; } else ctx->out_bytes += ret; } ogg_stream_clear(&speex_ctx->os); speex_bits_destroy(&speex_ctx->bits); speex_encoder_destroy(speex_ctx->state); close(dpm.fd); dpm.fd = -1; free(speex_ctx->input); ctl->cmsg(CMSG_INFO, VERB_NORMAL, "Wrote %lu/%lu bytes(%g%% compressed)", ctx->out_bytes, ctx->in_bytes, ((double)ctx->out_bytes / (double)ctx->in_bytes)) * 100.; speex_ctx->input = NULL; free(speex_ctx); speex_ctx = NULL; } return; }
/*! \brief convert work buffer and produce output frame */ static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt) { struct speex_coder_pvt *tmp = pvt->pvt; int is_speech=1; int datalen = 0; /* output bytes */ int samples = 0; /* output samples */ /* We can't work on anything less than a frame in size */ if (pvt->samples < tmp->framesize) return NULL; speex_bits_reset(&tmp->bits); while (pvt->samples >= tmp->framesize) { #ifdef _SPEEX_TYPES_H /* Preprocess audio */ if (preproc) is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL); /* Encode a frame of data */ if (is_speech) { /* If DTX enabled speex_encode returns 0 during silence */ is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx; } else { /* 5 zeros interpreted by Speex as silence (submode 0) */ speex_bits_pack(&tmp->bits, 0, 5); } #else { float fbuf[1024]; int x; /* Convert to floating point */ for (x = 0; x < tmp->framesize; x++) fbuf[x] = tmp->buf[samples + x]; /* Encode a frame of data */ is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx; } #endif samples += tmp->framesize; pvt->samples -= tmp->framesize; } /* Move the data at the end of the buffer to the front */ if (pvt->samples) memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2); /* Use AST_FRAME_CNG to signify the start of any silence period */ if (is_speech) { tmp->silent_state = 0; } else { if (tmp->silent_state) { return NULL; } else { tmp->silent_state = 1; speex_bits_reset(&tmp->bits); memset(&pvt->f, 0, sizeof(pvt->f)); pvt->f.frametype = AST_FRAME_CNG; pvt->f.samples = samples; /* XXX what now ? format etc... */ } } /* Terminate bit stream */ speex_bits_pack(&tmp->bits, 15, 5); datalen = speex_bits_write(&tmp->bits, pvt->outbuf.c, pvt->t->buf_size); return ast_trans_frameout(pvt, datalen, samples); }
void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,m,q; const signed char *shape_cb; int shape_cb_size = 32, subvect_size = 10; int best_index; spx_word32_t best_dist; spx_word16_t resp[320]; spx_word16_t *resp2 = resp; spx_word32_t E[32]; spx_word16_t t[40]; spx_sig_t e[40]; shape_cb=exc_10_32_table; /* FIXME: Do we still need to copy the target? */ SPEEX_COPY(t, target, nsf); //compute_weighted_codebook { int i, k; spx_word16_t shape[10]; for (i=0;i<shape_cb_size;i++) { spx_word16_t *res; res = resp+i*subvect_size; for (k=0;k<subvect_size;k++) shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k]; E[i]=0; /* Compute codeword response using convolution with impulse response */ { spx_word32_t resj; spx_word16_t res16; // 0 resj = MULT16_16(shape[0],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[0] = res16; //++++++++++++++++++++++++++ // 1 resj = MULT16_16(shape[0],r[1]); resj = MAC16_16(resj,shape[1],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[1] = res16; //++++++++++++++++++++++++++ // 2 resj = MULT16_16(shape[0],r[2]); resj = MAC16_16(resj,shape[1],r[1]); resj = MAC16_16(resj,shape[2],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[2] = res16; //++++++++++++++++++++++++++ // 3 resj = MULT16_16(shape[0],r[3]); resj = MAC16_16(resj,shape[1],r[2]); resj = MAC16_16(resj,shape[2],r[1]); resj = MAC16_16(resj,shape[3],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[3] = res16; //++++++++++++++++++++++++++ // 4 resj = MULT16_16(shape[0],r[4]); resj = MAC16_16(resj,shape[1],r[3]); resj = MAC16_16(resj,shape[2],r[2]); resj = MAC16_16(resj,shape[3],r[1]); resj = MAC16_16(resj,shape[4],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[4] = res16; //++++++++++++++++++++++++++ // 5 resj = MULT16_16(shape[0],r[5]); resj = MAC16_16(resj,shape[1],r[4]); resj = MAC16_16(resj,shape[2],r[3]); resj = MAC16_16(resj,shape[3],r[2]); resj = MAC16_16(resj,shape[4],r[1]); resj = MAC16_16(resj,shape[5],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[5] = res16; //++++++++++++++++++++++++++ // 6 resj = MULT16_16(shape[0],r[6]); resj = MAC16_16(resj,shape[1],r[5]); resj = MAC16_16(resj,shape[2],r[4]); resj = MAC16_16(resj,shape[3],r[3]); resj = MAC16_16(resj,shape[4],r[2]); resj = MAC16_16(resj,shape[5],r[1]); resj = MAC16_16(resj,shape[6],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[6] = res16; //++++++++++++++++++++++++++ // 7 resj = MULT16_16(shape[0],r[7]); resj = MAC16_16(resj,shape[1],r[6]); resj = MAC16_16(resj,shape[2],r[5]); resj = MAC16_16(resj,shape[3],r[4]); resj = MAC16_16(resj,shape[4],r[3]); resj = MAC16_16(resj,shape[5],r[2]); resj = MAC16_16(resj,shape[6],r[1]); resj = MAC16_16(resj,shape[7],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[7] = res16; //++++++++++++++++++++++++++ // 8 resj = MULT16_16(shape[0],r[8]); resj = MAC16_16(resj,shape[1],r[7]); resj = MAC16_16(resj,shape[2],r[6]); resj = MAC16_16(resj,shape[3],r[5]); resj = MAC16_16(resj,shape[4],r[4]); resj = MAC16_16(resj,shape[5],r[3]); resj = MAC16_16(resj,shape[6],r[2]); resj = MAC16_16(resj,shape[7],r[1]); resj = MAC16_16(resj,shape[8],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[8] = res16; //++++++++++++++++++++++++++ // 9 resj = MULT16_16(shape[0],r[9]); resj = MAC16_16(resj,shape[1],r[8]); resj = MAC16_16(resj,shape[2],r[7]); resj = MAC16_16(resj,shape[3],r[6]); resj = MAC16_16(resj,shape[4],r[5]); resj = MAC16_16(resj,shape[5],r[4]); resj = MAC16_16(resj,shape[6],r[3]); resj = MAC16_16(resj,shape[7],r[2]); resj = MAC16_16(resj,shape[8],r[1]); resj = MAC16_16(resj,shape[9],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[9] = res16; //++++++++++++++++++++++++++ } } } for (i=0;i<4;i++) { spx_word16_t *x=t+subvect_size*i; /*Find new n-best based on previous n-best j*/ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); speex_bits_pack(bits,best_index,5); { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } } for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; g=sign*shape_cb[rind*subvect_size+m]; target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } } /* Update excitation */ /* FIXME: We could update the excitation directly above */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); }
void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,k,m,n,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); VARDECL(spx_word16_t *tmp); VARDECL(spx_word32_t *ndist); VARDECL(spx_word32_t *odist); VARDECL(int *itmp); VARDECL(spx_word16_t **ot2); VARDECL(spx_word16_t **nt2); spx_word16_t **ot, **nt; VARDECL(int **nind); VARDECL(int **oind); VARDECL(int *ind); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int N=2; VARDECL(int *best_index); VARDECL(spx_word32_t *best_dist); VARDECL(int *best_nind); VARDECL(int *best_ntarget); int have_sign; N=complexity; if (N>10) N=10; /* Complexity isn't as important for the codebooks as it is for the pitch */ N=(2*N)/3; if (N<1) N=1; if (N==1) { split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target); return; } ALLOC(ot2, N, spx_word16_t*); ALLOC(nt2, N, spx_word16_t*); ALLOC(oind, N, int*); ALLOC(nind, N, int*); params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); ALLOC(ind, nb_subvect, int); ALLOC(tmp, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot2[i]=tmp+2*i*nsf; nt2[i]=tmp+(2*i+1)*nsf; } ot=ot2; nt=nt2; ALLOC(best_index, N, int); ALLOC(best_dist, N, spx_word32_t); ALLOC(best_nind, N, int); ALLOC(best_ntarget, N, int); ALLOC(ndist, N, spx_word32_t); ALLOC(odist, N, spx_word32_t); ALLOC(itmp, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp+2*i*nb_subvect; oind[i]=itmp+(2*i+1)*nb_subvect; } SPEEX_COPY(t, target, nsf); for (j=0;j<N;j++) SPEEX_COPY(&ot[j][0], t, nsf); /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=VERY_LARGE32; /* This is not strictly necessary, but it provides an additonal safety to prevent crashes in case something goes wrong in the previous steps (e.g. NaNs) */ for (j=0;j<N;j++) best_nind[j] = best_ntarget[j] = 0; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; spx_word32_t tener = 0; for (m=0;m<subvect_size;m++) tener = MAC16_16(tener, x[m],x[m]); #ifdef FIXED_POINT tener = SHR32(tener,1); #else tener *= .5; #endif /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { /* Compute total distance (including previous sub-vectors */ spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener); /*update n-best list*/ if (err<ndist[N-1]) { for (m=0;m<N;m++) { if (err < ndist[m]) { for (n=N-1;n>m;n--) { ndist[n] = ndist[n-1]; best_nind[n] = best_nind[n-1]; best_ntarget[n] = best_ntarget[n-1]; } /* n is equal to m here, so they're interchangeable */ ndist[m] = err; best_nind[n] = best_index[k]; best_ntarget[n] = j; break; } } } } if (i==0) break; } for (j=0;j<N;j++) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) nt[j][m]=ot[best_ntarget[j]][m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_nind[j]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } for (q=0;q<nb_subvect;q++) nind[j][q]=oind[best_ntarget[j]][q]; nind[j][i]=best_nind[j]; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
int SpeexEncoder::EncodeFromFile(FILE *fin) { int id = -1; int nframes = 1; int lsb = 1; int fmt = 16; spx_int32_t size; nb_samples = read_samples(fin, frame_size, fmt, chan, lsb, input, &size); if (nb_samples == 0) eos = 1; total_samples += nb_samples; nb_encoded = -lookahead; /*Main encoding loop (one frame per iteration)*/ while (!eos || total_samples > nb_encoded) { id++; /*Encode current frame*/ if (chan == 2) speex_encode_stereo_int(input, frame_size, &bits); if (preprocess) speex_preprocess(preprocess, input, NULL); speex_encode_int(st, input, &bits); nb_encoded += frame_size; nb_samples = read_samples(fin, frame_size, fmt, chan, lsb, input, NULL); if (nb_samples == 0) { eos = 1; } if (eos && total_samples <= nb_encoded) op.e_o_s = 1; else op.e_o_s = 0; total_samples += nb_samples; if ((id + 1) % nframes != 0) continue; speex_bits_insert_terminator(&bits); nbBytes = speex_bits_write(&bits, cbits, MAX_FRAME_BYTES); speex_bits_reset(&bits); op.packet = (unsigned char *)cbits; op.bytes = nbBytes; op.b_o_s = 0; /*Is this redundent?*/ if (eos && total_samples <= nb_encoded) op.e_o_s = 1; else op.e_o_s = 0; op.granulepos = (id + 1)*frame_size - lookahead; if (op.granulepos > total_samples) op.granulepos = total_samples; //printf("granulepos: %d %d %d %d %d %d\n", (int)op.granulepos, id, 2 + id / nframes, lookahead, 5, 6); op.packetno = 2 + id / nframes; ogg_stream_packetin(&os, &op); /*Write all new pages (most likely 0 or 1)*/ while (ogg_stream_pageout(&os, &og)) { ret = oe_write_page(&og, fout); if (ret != og.header_len + og.body_len) { fprintf(stderr, "Error: failed writing header to output stream\n"); fclose(fin); exit(1); } else bytes_written += ret; } } if ((id + 1) % nframes != 0) { while ((id + 1) % nframes != 0) { id++; speex_bits_pack(&bits, 15, 5); } nbBytes = speex_bits_write(&bits, cbits, MAX_FRAME_BYTES); op.packet = (unsigned char *)cbits; op.bytes = nbBytes; op.b_o_s = 0; op.e_o_s = 1; op.granulepos = (id + 1)*frame_size - lookahead; if (op.granulepos > total_samples) op.granulepos = total_samples; op.packetno = 2 + id / nframes; ogg_stream_packetin(&os, &op); } /*Flush all pages left to be written*/ while (ogg_stream_flush(&os, &og)) { ret = oe_write_page(&og, fout); if (ret != og.header_len + og.body_len) { fprintf(stderr, "Error: failed writing header to output stream\n"); fclose(fin); exit(1); } else bytes_written += ret; } int durationInSec = frame_size *id / rate; fprintf(stderr, "Duration: %d\n", durationInSec); int minutes = (int)(frame_size *id / rate)/60; int seconds = (frame_size *id / rate) - (minutes * 60); fprintf(stderr, "Duration Minutes: %d\n", minutes); fprintf(stderr, "Duration Seconds: %d\n", seconds); char duration[5]; sprintf(duration, "%05d", durationInSec); fseek(fout, durationIndex, SEEK_SET); fputs(duration, fout); fclose(fin); return 0; }