void
segment_audio()
{
    FILE *file;
    int16 pcm_buf[BLOCKSIZE];
    mfcc_t **cep_buf;
    int16 voiced_buf = NULL;
    int32 voiced_nsamps, out_frameidx, uttstart = 0;
    char file_name[1024];
    uint8 cur_vad_state, vad_state, writing;
    int uttno, uttlen, sample_rate;
    int32 nframes, nframes_tmp;
    int16 frame_size, frame_shift, frame_rate;
    size_t k;

    sample_rate = (int) cmd_ln_float32_r(config, "-samprate");
    frame_rate = cmd_ln_int32_r(config, "-frate");
    frame_size =
        (int32) (cmd_ln_float32_r(config, "-wlen") * sample_rate + 0.5);
    frame_shift =
        (int32) (sample_rate / cmd_ln_int32_r(config, "-frate") + 0.5);
    nframes = (BLOCKSIZE - frame_size) / frame_shift;
    cep_buf =
        (mfcc_t **) ckd_calloc_2d(nframes, fe_get_output_size(fe),
                                  sizeof(mfcc_t));

    uttno = 0;
    uttlen = 0;
    cur_vad_state = 0;
    voiced_nsamps = 0;
    writing = 0;
    file = NULL;
    fe_start_stream(fe);
    fe_start_utt(fe);
    while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) {
        int16 const *pcm_buf_tmp;
        pcm_buf_tmp = &pcm_buf[0];
        while (k) {
            nframes_tmp = nframes;
            fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf,
                                  &nframes_tmp, voiced_buf,
                                  &voiced_nsamps, &out_frameidx);
            if (out_frameidx > 0) {
        	uttstart = out_frameidx;
            }
            vad_state = fe_get_vad_state(fe);
            if (!cur_vad_state && vad_state) {
                /* silence->speech transition, time to start new file */
                uttno++;
                if (!singlefile) {
                    sprintf(file_name, "%s%04d.raw", infile_path, uttno);
                    if ((file = fopen(file_name, "wb")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
                } else {
                    sprintf(file_name, "%s.raw", infile_path);
                    if ((file = fopen(file_name, "ab")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
		}
		writing = 1;
            }

            if (writing && file && voiced_nsamps > 0) {
                fwrite(voiced_buf, sizeof(int16), voiced_nsamps, file);
                uttlen += voiced_nsamps;
            }

            if (cur_vad_state && !vad_state) {
                /* speech -> silence transition, time to finish file */
                fclose(file);
	        printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    		       uttno,
    		       file_name,
    	    	       ((double) uttstart) / frame_rate,
            	        uttlen,
            	       ((double) uttlen) / sample_rate);
                fflush(stdout);
                fe_end_utt(fe, cep_buf[0], &nframes_tmp);
                writing = 0;
                uttlen = 0;
                voiced_nsamps = 0;
                fe_start_utt(fe);
            }
            cur_vad_state = vad_state;
        }
    }

    if (writing) {
        fclose(file);
	printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    	        uttno,
    		file_name,
    	    	((double) uttstart) / frame_rate,
            	uttlen,
                ((double) uttlen) / sample_rate);
        fflush(stdout);
    }
    fe_end_utt(fe, cep_buf[0], &nframes);
    ckd_free_2d(cep_buf);
}
Beispiel #2
0
uint8 
ps_get_in_speech(ps_decoder_t *ps)
{
    return fe_get_vad_state(ps->acmod->fe);
}