Esempio n. 1
0
int32
fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
{
    /* Process any remaining data, not very accurate for the VAD */
    *nframes = 0;
    if (fe->num_overflow_samps > 0) {
        fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
        fe_write_frame(fe, cepvector, FALSE);
        if (fe->vad_data->in_speech)
            *nframes = 1;
    }

    /* reset overflow buffers... */
    fe->num_overflow_samps = 0;
    fe->start_flag = 0;

    return 0;
}
Esempio n. 2
0
int32
fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
{
    /* Process any remaining data. */
    if (fe->num_overflow_samps > 0) {
        fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
        *nframes = fe_write_frame(fe, cepvector);
    }
    else {
        *nframes = 0;
    }

    /* reset overflow buffers... */
    fe->num_overflow_samps = 0;
    fe->start_flag = 0;

    return 0;
}
Esempio n. 3
0
int32
fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes)
{
    /* Process any remaining data. */
    *nframes = 0;
    if (fe->num_overflow_samps > 0) {
        fe_read_frame(fe, fe->overflow_samps, fe->num_overflow_samps);
        fe_write_frame(fe, cepvector);
        if (!fe->vad_data->state_changed && fe->vad_data->global_state)
            (*nframes)++;
    }

    /* reset overflow buffers... */
    fe->num_overflow_samps = 0;
    fe->start_flag = 0;

    return 0;
}
Esempio n. 4
0
int 
fe_process_frames_ext(fe_t *fe,
                  int16 const **inout_spch,
                  size_t *inout_nsamps,
                  mfcc_t **buf_cep,
                  int32 *inout_nframes,
                  int16 *voiced_spch,
                  int32 *voiced_spch_nsamps,
                  int32 *out_frameidx)
{
    int outidx, n_overflow, orig_n_overflow;
    int16 const *orig_spch;
    size_t orig_nsamps;
    
    /* The logic here is pretty complex, please be careful with modifications */

    /* FIXME: Dump PCM data if needed */

    /* In the special case where there is no output buffer, return the
     * maximum number of frames which would be generated. */
    if (buf_cep == NULL) {
        if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size)
            *inout_nframes = 0;
        else 
            *inout_nframes = 1
                + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
                   / fe->frame_shift);
        if (!fe->vad_data->in_speech)
            *inout_nframes += fe_prespch_ncep(fe->vad_data->prespch_buf);
        return *inout_nframes;
    }

    if (out_frameidx)
        *out_frameidx = 0;

    /* Are there not enough samples to make at least 1 frame? */
    if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) {
        if (*inout_nsamps > 0) {
            /* Append them to the overflow buffer. */
            memcpy(fe->overflow_samps + fe->num_overflow_samps,
                   *inout_spch, *inout_nsamps * (sizeof(int16)));
            fe->num_overflow_samps += *inout_nsamps;
            /* Update input-output pointers and counters. */
            *inout_spch += *inout_nsamps;
            *inout_nsamps = 0;
        }
        /* We produced no frames of output, sorry! */
        *inout_nframes = 0;
        return 0;
    }

    /* Can't write a frame?  Then do nothing! */
    if (*inout_nframes < 1) {
        *inout_nframes = 0;
        return 0;
    }

    /* Index of output frame. */
    outidx = 0;

    /* Try to read from prespeech buffer */
    if (fe->vad_data->in_speech && fe_prespch_ncep(fe->vad_data->prespch_buf) > 0) {
    	outidx = fe_copy_from_prespch(fe, inout_nframes, buf_cep, outidx);
        if ((*inout_nframes) < 1) {
            /* mfcc buffer is filled from prespeech buffer */
            *inout_nframes = outidx;
            return 0;
        }
    }

    /* Keep track of the original start of the buffer. */
    orig_spch = *inout_spch;
    orig_nsamps = *inout_nsamps;
    orig_n_overflow = fe->num_overflow_samps;

    /* Start processing, taking care of any incoming overflow. */
    if (fe->num_overflow_samps > 0) {
        int offset = fe->frame_size - fe->num_overflow_samps;
        /* Append start of spch to overflow samples to make a full frame. */
        memcpy(fe->overflow_samps + fe->num_overflow_samps,
               *inout_spch, offset * sizeof(**inout_spch));
        fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
        /* Update input-output pointers and counters. */
        *inout_spch += offset;
        *inout_nsamps -= offset;
    } else {
        fe_read_frame(fe, *inout_spch, fe->frame_size);
        /* Update input-output pointers and counters. */
        *inout_spch += fe->frame_size;
        *inout_nsamps -= fe->frame_size;
    }

    fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);
    outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);

    /* Process all remaining frames. */
    while (*inout_nframes > 0 && *inout_nsamps >= (size_t)fe->frame_shift) {
        fe_shift_frame(fe, *inout_spch, fe->frame_shift);
        fe_write_frame(fe, buf_cep[outidx], voiced_spch != NULL);

	outidx = fe_check_prespeech(fe, inout_nframes, buf_cep, outidx, out_frameidx, inout_nsamps, orig_nsamps);

        /* Update input-output pointers and counters. */
        *inout_spch += fe->frame_shift;
        *inout_nsamps -= fe->frame_shift;
    }

    /* How many relevant overflow samples are there left? */
    if (fe->num_overflow_samps <= 0) {
        /* Maximum number of overflow samples past *inout_spch to save. */
        n_overflow = *inout_nsamps;
        if (n_overflow > fe->frame_shift)
            n_overflow = fe->frame_shift;
        fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
        /* Make sure this isn't an illegal read! */
        if (fe->num_overflow_samps > *inout_spch - orig_spch)
            fe->num_overflow_samps = *inout_spch - orig_spch;
        fe->num_overflow_samps += n_overflow;
        if (fe->num_overflow_samps > 0) {
            memcpy(fe->overflow_samps,
                   *inout_spch - (fe->frame_size - fe->frame_shift),
                   fe->num_overflow_samps * sizeof(**inout_spch));
            /* Update the input pointer to cover this stuff. */
            *inout_spch += n_overflow;
            *inout_nsamps -= n_overflow;
        }
    } else {
        /* There is still some relevant data left in the overflow buffer. */
        /* Shift existing data to the beginning. */
        memmove(fe->overflow_samps,
                fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
                fe->num_overflow_samps * sizeof(*fe->overflow_samps));
        /* Copy in whatever we had in the original speech buffer. */
        n_overflow = *inout_spch - orig_spch + *inout_nsamps;
        if (n_overflow > fe->frame_size - fe->num_overflow_samps)
            n_overflow = fe->frame_size - fe->num_overflow_samps;
        memcpy(fe->overflow_samps + fe->num_overflow_samps,
               orig_spch, n_overflow * sizeof(*orig_spch));
        fe->num_overflow_samps += n_overflow;
        /* Advance the input pointers. */
        if (n_overflow > *inout_spch - orig_spch) {
            n_overflow -= (*inout_spch - orig_spch);
            *inout_spch += n_overflow;
            *inout_nsamps -= n_overflow;
        }
    }

    /* Finally update the frame counter with the number of frames
     * and global sample counter with number of samples we procesed*/
    *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */
    fe->sample_counter += orig_nsamps - *inout_nsamps;

    return 0;
}
Esempio n. 5
0
int
fe_process_frames(fe_t *fe,
                  int16 const **inout_spch,
                  size_t *inout_nsamps,
                  mfcc_t **buf_cep,
                  int32 *inout_nframes)
{
    int32 frame_count;
    int outidx, i, n, n_overflow, orig_n_overflow;
    int16 const *orig_spch;

    /* In the special case where there is no output buffer, return the
     * maximum number of frames which would be generated. */
    if (buf_cep == NULL) {
        if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size)
            *inout_nframes = 0;
        else 
            *inout_nframes = 1
                + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
                   / fe->frame_shift);
        return *inout_nframes;
    }

    /* Are there not enough samples to make at least 1 frame? */
    if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) {
        if (*inout_nsamps > 0) {
            /* Append them to the overflow buffer. */
            memcpy(fe->overflow_samps + fe->num_overflow_samps,
                   *inout_spch, *inout_nsamps * (sizeof(int16)));
            fe->num_overflow_samps += *inout_nsamps;
            /* Update input-output pointers and counters. */
            *inout_spch += *inout_nsamps;
            *inout_nsamps = 0;
        }
        /* We produced no frames of output, sorry! */
        *inout_nframes = 0;
        return 0;
    }

    /* Can't write a frame?  Then do nothing! */
    if (*inout_nframes < 1) {
        *inout_nframes = 0;
        return 0;
    }

    /* Keep track of the original start of the buffer. */
    orig_spch = *inout_spch;
    orig_n_overflow = fe->num_overflow_samps;
    /* How many frames will we be able to get? */
    frame_count = 1
        + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size)
           / fe->frame_shift);
    /* Limit it to the number of output frames available. */
    if (frame_count > *inout_nframes)
        frame_count = *inout_nframes;
    /* Index of output frame. */
    outidx = 0;

    /* Start processing, taking care of any incoming overflow. */
    if (fe->num_overflow_samps) {
        int offset = fe->frame_size - fe->num_overflow_samps;

        /* Append start of spch to overflow samples to make a full frame. */
        memcpy(fe->overflow_samps + fe->num_overflow_samps,
               *inout_spch, offset * sizeof(**inout_spch));
        fe_read_frame(fe, fe->overflow_samps, fe->frame_size);
        assert(outidx < frame_count);
        if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
            return -1;
        outidx += n;
        /* Update input-output pointers and counters. */
        *inout_spch += offset;
        *inout_nsamps -= offset;
        fe->num_overflow_samps -= fe->frame_shift;
    }
    else {
        fe_read_frame(fe, *inout_spch, fe->frame_size);
        assert(outidx < frame_count);
        if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
            return -1;
        outidx += n;
        /* Update input-output pointers and counters. */
        *inout_spch += fe->frame_size;
        *inout_nsamps -= fe->frame_size;
    }

    /* Process all remaining frames. */
    for (i = 1; i < frame_count; ++i) {
        assert(*inout_nsamps >= (size_t)fe->frame_shift);

        fe_shift_frame(fe, *inout_spch, fe->frame_shift);
        assert(outidx < frame_count);
        if ((n = fe_write_frame(fe, buf_cep[outidx])) < 0)
            return -1;
        outidx += n;
        /* Update input-output pointers and counters. */
        *inout_spch += fe->frame_shift;
        *inout_nsamps -= fe->frame_shift;
        /* Amount of data behind the original input which is still needed. */
        if (fe->num_overflow_samps > 0)
            fe->num_overflow_samps -= fe->frame_shift;
    }

    /* How many relevant overflow samples are there left? */
    if (fe->num_overflow_samps <= 0) {
        /* Maximum number of overflow samples past *inout_spch to save. */
        n_overflow = *inout_nsamps;
        if (n_overflow > fe->frame_shift)
            n_overflow = fe->frame_shift;
        fe->num_overflow_samps = fe->frame_size - fe->frame_shift;
        /* Make sure this isn't an illegal read! */
        if (fe->num_overflow_samps > *inout_spch - orig_spch)
            fe->num_overflow_samps = *inout_spch - orig_spch;
        fe->num_overflow_samps += n_overflow;
        if (fe->num_overflow_samps > 0) {
            memcpy(fe->overflow_samps,
                   *inout_spch - (fe->frame_size - fe->frame_shift),
                   fe->num_overflow_samps * sizeof(**inout_spch));
            /* Update the input pointer to cover this stuff. */
            *inout_spch += n_overflow;
            *inout_nsamps -= n_overflow;
        }
    }
    else {
        /* There is still some relevant data left in the overflow buffer. */
        /* Shift existing data to the beginning. */
        memmove(fe->overflow_samps,
                fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps,
                fe->num_overflow_samps * sizeof(*fe->overflow_samps));
        /* Copy in whatever we had in the original speech buffer. */
        n_overflow = *inout_spch - orig_spch + *inout_nsamps;
        if (n_overflow > fe->frame_size - fe->num_overflow_samps)
            n_overflow = fe->frame_size - fe->num_overflow_samps;
        memcpy(fe->overflow_samps + fe->num_overflow_samps,
               orig_spch, n_overflow * sizeof(*orig_spch));
        fe->num_overflow_samps += n_overflow;
        /* Advance the input pointers. */
        if (n_overflow > *inout_spch - orig_spch) {
            n_overflow -= (*inout_spch - orig_spch);
            *inout_spch += n_overflow;
            *inout_nsamps -= n_overflow;
        }
    }

    /* Finally update the frame counter with the number of frames we procesed. */
    *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */
    return 0;
}
Esempio n. 6
0
int32
fe_process_frame(fe_t * fe, int16 const *spch, int32 nsamps, mfcc_t * fr_cep)
{
    fe_read_frame(fe, spch, nsamps);
    return fe_write_frame(fe, fr_cep);
}