void segment_audio() { FILE *file; int16 pcm_buf[BLOCKSIZE]; mfcc_t **cep_buf; int16 voiced_buf = NULL; int32 voiced_nsamps, out_frameidx, uttstart = 0; char file_name[1024]; uint8 cur_vad_state, vad_state, writing; int uttno, uttlen, sample_rate; int32 nframes, nframes_tmp; int16 frame_size, frame_shift, frame_rate; size_t k; sample_rate = (int) cmd_ln_float32_r(config, "-samprate"); frame_rate = cmd_ln_int32_r(config, "-frate"); frame_size = (int32) (cmd_ln_float32_r(config, "-wlen") * sample_rate + 0.5); frame_shift = (int32) (sample_rate / cmd_ln_int32_r(config, "-frate") + 0.5); nframes = (BLOCKSIZE - frame_size) / frame_shift; cep_buf = (mfcc_t **) ckd_calloc_2d(nframes, fe_get_output_size(fe), sizeof(mfcc_t)); uttno = 0; uttlen = 0; cur_vad_state = 0; voiced_nsamps = 0; writing = 0; file = NULL; fe_start_stream(fe); fe_start_utt(fe); while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) { int16 const *pcm_buf_tmp; pcm_buf_tmp = &pcm_buf[0]; while (k) { nframes_tmp = nframes; fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf, &nframes_tmp, voiced_buf, &voiced_nsamps, &out_frameidx); if (out_frameidx > 0) { uttstart = out_frameidx; } vad_state = fe_get_vad_state(fe); if (!cur_vad_state && vad_state) { /* silence->speech transition, time to start new file */ uttno++; if (!singlefile) { sprintf(file_name, "%s%04d.raw", infile_path, uttno); if ((file = fopen(file_name, "wb")) == NULL) E_FATAL_SYSTEM("Failed to open '%s' for writing", file_name); } else { sprintf(file_name, "%s.raw", infile_path); if ((file = fopen(file_name, "ab")) == NULL) E_FATAL_SYSTEM("Failed to open '%s' for writing", file_name); } writing = 1; } if (writing && file && voiced_nsamps > 0) { fwrite(voiced_buf, sizeof(int16), voiced_nsamps, file); uttlen += voiced_nsamps; } if (cur_vad_state && !vad_state) { /* speech -> silence transition, time to finish file */ fclose(file); printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n", uttno, file_name, ((double) uttstart) / frame_rate, uttlen, ((double) uttlen) / sample_rate); fflush(stdout); fe_end_utt(fe, cep_buf[0], &nframes_tmp); writing = 0; uttlen = 0; voiced_nsamps = 0; fe_start_utt(fe); } cur_vad_state = vad_state; } } if (writing) { fclose(file); printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n", uttno, file_name, ((double) uttstart) / frame_rate, uttlen, ((double) uttlen) / sample_rate); fflush(stdout); } fe_end_utt(fe, cep_buf[0], &nframes); ckd_free_2d(cep_buf); }
uint8 ps_get_in_speech(ps_decoder_t *ps) { return fe_get_vad_state(ps->acmod->fe); }