/********************************************************************* FUNCTION: fe_end_utt PARAMETERS: fe_t *FE, mfcc_t *cepvector, int32 *nframes RETURNS: status, successful or not DESCRIPTION: if there are overflow samples remaining, it will pad with zeros to make a complete frame and then process to cepstra. also deactivates start flag of FE, and resets overflow buffer count. **********************************************************************/ int32 fe_end_utt(fe_t * FE, mfcc_t * cepvector, int32 * nframes) { int32 pad_len = 0, frame_count = 0; frame_t *spbuf; int32 return_value = FE_SUCCESS; /* if there are any samples left in overflow buffer, pad zeros to make a frame and then process that frame */ if ((FE->NUM_OVERFLOW_SAMPS > 0)) { pad_len = FE->FRAME_SIZE - FE->NUM_OVERFLOW_SAMPS; memset(FE->OVERFLOW_SAMPS + (FE->NUM_OVERFLOW_SAMPS), 0, pad_len * sizeof(int16)); FE->NUM_OVERFLOW_SAMPS += pad_len; assert(FE->NUM_OVERFLOW_SAMPS == FE->FRAME_SIZE); if ((spbuf = (frame_t *) calloc(FE->FRAME_SIZE, sizeof(frame_t))) == NULL) { E_WARN("memory alloc failed in fe_end_utt()\n"); return (FE_MEM_ALLOC_ERROR); } if (FE->dither) { fe_dither(FE->OVERFLOW_SAMPS, FE->FRAME_SIZE); } if (FE->PRE_EMPHASIS_ALPHA != 0.0) { fe_pre_emphasis(FE->OVERFLOW_SAMPS, spbuf, FE->FRAME_SIZE, FE->PRE_EMPHASIS_ALPHA, FE->PRIOR); } else { fe_short_to_frame(FE->OVERFLOW_SAMPS, spbuf, FE->FRAME_SIZE); } fe_hamming_window(spbuf, FE->HAMMING_WINDOW, FE->FRAME_SIZE, FE->remove_dc); return_value = fe_frame_to_fea(FE, spbuf, cepvector); frame_count = 1; free(spbuf); /* RAH */ } else { frame_count = 0; } /* reset overflow buffers... */ FE->NUM_OVERFLOW_SAMPS = 0; FE->START_FLAG = 0; *nframes = frame_count; return return_value; }
/********************************************************************* FUNCTION: fe_end_utt PARAMETERS: fe_t *FE, float *cepvector RETURNS: number of frames processed (0 or 1) DESCRIPTION: if there are overflow samples remaining, it will pad with zeros to make a complete frame and then process to cepstra. also deactivates start flag of FE, and resets overflow buffer count. **********************************************************************/ int32 fe_end_utt(fe_t *FE, float32 *cepvector) { int32 pad_len=0, frame_count=0; int32 i; double *spbuf, *fr_fea = NULL; /* if there are any samples left in overflow buffer, pad zeros to make a frame and then process that frame */ if ((FE->NUM_OVERFLOW_SAMPS > 0)) { pad_len = FE->FRAME_SIZE - FE->NUM_OVERFLOW_SAMPS; memset(FE->OVERFLOW_SAMPS+(FE->NUM_OVERFLOW_SAMPS),0,pad_len*sizeof(int16)); FE->NUM_OVERFLOW_SAMPS += pad_len; assert(FE->NUM_OVERFLOW_SAMPS==FE->FRAME_SIZE); if ((spbuf=(double *)calloc(FE->FRAME_SIZE,sizeof(double)))==NULL){ fprintf(stderr,"memory alloc failed in fe_end_utt()\n...exiting\n"); exit(0); } if (FE->PRE_EMPHASIS_ALPHA != 0.0){ fe_pre_emphasis(FE->OVERFLOW_SAMPS, spbuf, FE->FRAME_SIZE,FE->PRE_EMPHASIS_ALPHA, FE->PRIOR); } else { fe_short_to_double(FE->OVERFLOW_SAMPS, spbuf, FE->FRAME_SIZE); } /* again, who should implement cep vector? this can be implemented easily from outside or easily from in here */ if ((fr_fea = (double *)calloc(FE->NUM_CEPSTRA, sizeof(double)))==NULL){ fprintf(stderr,"memory alloc failed in fe_end_utt()\n...exiting\n"); exit(0); } fe_hamming_window(spbuf, FE->HAMMING_WINDOW, FE->FRAME_SIZE); fe_frame_to_fea(FE, spbuf, fr_fea); for (i=0;i<FE->NUM_CEPSTRA;i++) cepvector[i] = (float32)fr_fea[i]; frame_count=1; free(fr_fea); /* RAH - moved up */ free (spbuf); /* RAH */ } else { frame_count=0; cepvector = NULL; } /* reset overflow buffers... */ FE->NUM_OVERFLOW_SAMPS = 0; FE->START_FLAG=0; return frame_count; }
/********************************************************************* FUNCTION: fe_process_frame PARAMETERS: fe_t *FE, int16 *spch, int32 nsamps, mfcc_t *fr_cep RETURNS: status, successful or not DESCRIPTION: processes the given speech data and returns features. Modified to process one frame of speech only. **********************************************************************/ int32 fe_process_frame(fe_t * FE, int16 * spch, int32 nsamps, mfcc_t * fr_cep) { int32 spbuf_len, i; frame_t *spbuf; int32 return_value = FE_SUCCESS; spbuf_len = FE->FRAME_SIZE; /* assert(spbuf_len <= nsamps); */ if ((spbuf = (frame_t *) calloc(spbuf_len, sizeof(frame_t))) == NULL) { E_FATAL("memory alloc failed in fe_process_frame()...exiting\n"); } /* Added byte-swapping for Endian-ness compatibility */ if (FE->swap) for (i = 0; i < nsamps; i++) SWAP_INT16(&spch[i]); /* Add dither, if need. Warning: this may add dither twice to the samples in overlapping frames. */ if (FE->dither) { fe_dither(spch, spbuf_len); } /* pre-emphasis if needed,convert from int16 to float64 */ if (FE->PRE_EMPHASIS_ALPHA != 0.0) { fe_pre_emphasis(spch, spbuf, spbuf_len, FE->PRE_EMPHASIS_ALPHA, FE->PRIOR); FE->PRIOR = spch[FE->FRAME_SHIFT - 1]; /* Z.A.B for frame by frame analysis */ } else { fe_short_to_frame(spch, spbuf, spbuf_len); } /* frame based processing - let's make some cepstra... */ fe_hamming_window(spbuf, FE->HAMMING_WINDOW, FE->FRAME_SIZE, FE->remove_dc); return_value = fe_frame_to_fea(FE, spbuf, fr_cep); free(spbuf); return return_value; }
/********************************************************************* FUNCTION: fe_process_utt PARAMETERS: fe_t *FE, int16 *spch, int32 nsamps, float **cep RETURNS: number of frames of cepstra computed DESCRIPTION: processes the given speech data and returns features. will prepend overflow data from last call and store new overflow data within the FE **********************************************************************/ int32 fe_process_utt(fe_t *FE, int16 *spch, int32 nsamps, float32 ***cep_block) /* RAH, upgraded cep_block to float32 */ { int32 frame_start, frame_count=0, whichframe=0; int32 i, spbuf_len, offset=0; double *spbuf, *fr_data, *fr_fea; int16 *tmp_spch = spch; float32 **cep=NULL; /* are there enough samples to make at least 1 frame? */ if (nsamps+FE->NUM_OVERFLOW_SAMPS >= FE->FRAME_SIZE){ /* if there are previous samples, pre-pend them to input speech samps */ if ((FE->NUM_OVERFLOW_SAMPS > 0)) { if ((tmp_spch = (int16 *) malloc (sizeof(int16)*(FE->NUM_OVERFLOW_SAMPS +nsamps)))==NULL){ fprintf(stderr,"memory alloc failed in fe_process_utt()\n...exiting\n"); exit(0); } /* RAH */ memcpy (tmp_spch,FE->OVERFLOW_SAMPS,FE->NUM_OVERFLOW_SAMPS*(sizeof(int16))); /* RAH */ memcpy(tmp_spch+FE->NUM_OVERFLOW_SAMPS, spch, nsamps*(sizeof(int16))); /* RAH */ /* memcpy(FE->OVERFLOW_SAMPS + FE->NUM_OVERFLOW_SAMPS, spch, nsamps*(sizeof(int16))); */ /* */ /* spch = FE->OVERFLOW_SAMPS; */ /* */ nsamps += FE->NUM_OVERFLOW_SAMPS; FE->NUM_OVERFLOW_SAMPS = 0; /*reset overflow samps count */ } /* compute how many complete frames can be processed and which samples correspond to those samps */ frame_count=0; for (frame_start=0; frame_start+FE->FRAME_SIZE <= nsamps; frame_start += FE->FRAME_SHIFT) frame_count++; /* if (cep!=NULL) fe_free_2d((void**)cep); */ /* It should never not be NULL */ /* 01.14.01 RAH, added +1 Adding one gives us space to stick the last flushed buffer*/ if ((cep = (float32 **)fe_create_2d(frame_count+1,FE->NUM_CEPSTRA,sizeof(float32))) == NULL) { fprintf(stderr,"memory alloc for cep failed in fe_process_utt()\n\tfe_create_2d(%ld,%d,%d)\n...exiting\n",(long int) (frame_count+1),FE->NUM_CEPSTRA,sizeof(float32)); /* typecast to make the compiler happy - EBG */ exit(0); } spbuf_len = (frame_count-1)*FE->FRAME_SHIFT + FE->FRAME_SIZE; /* assert(spbuf_len <= nsamps);*/ if ((spbuf=(double *)calloc(spbuf_len, sizeof(double)))==NULL){ fprintf(stderr,"memory alloc failed in fe_process_utt()\n...exiting\n"); exit(0); } /* pre-emphasis if needed,convert from int16 to double */ if (FE->PRE_EMPHASIS_ALPHA != 0.0){ fe_pre_emphasis(tmp_spch, spbuf, spbuf_len, FE->PRE_EMPHASIS_ALPHA, FE->PRIOR); } else{ fe_short_to_double(tmp_spch, spbuf, spbuf_len); } /* frame based processing - let's make some cepstra... */ fr_data = (double *)calloc(FE->FRAME_SIZE, sizeof(double)); fr_fea = (double *)calloc(FE->NUM_CEPSTRA, sizeof(double)); if (fr_data==NULL || fr_fea==NULL){ fprintf(stderr,"memory alloc failed in fe_process_utt()\n...exiting\n"); exit(0); } for (whichframe=0;whichframe<frame_count;whichframe++){ for (i=0;i<FE->FRAME_SIZE;i++) fr_data[i] = spbuf[whichframe*FE->FRAME_SHIFT + i]; fe_hamming_window(fr_data, FE->HAMMING_WINDOW, FE->FRAME_SIZE); fe_frame_to_fea(FE, fr_data, fr_fea); for (i=0;i<FE->NUM_CEPSTRA;i++) cep[whichframe][i] = (float32)fr_fea[i]; } /* done making cepstra */ /* assign samples which don't fill an entire frame to FE overflow buffer for use on next pass */ if (spbuf_len < nsamps) { offset = ((frame_count)*FE->FRAME_SHIFT); memcpy(FE->OVERFLOW_SAMPS,tmp_spch+offset,(nsamps-offset)*sizeof(int16)); FE->NUM_OVERFLOW_SAMPS = nsamps-offset; FE->PRIOR = tmp_spch[offset-1]; assert(FE->NUM_OVERFLOW_SAMPS<FE->FRAME_SIZE); } if (spch != tmp_spch) free (tmp_spch); free(spbuf); free(fr_data); free(fr_fea); } /* if not enough total samps for a single frame, append new samps to previously stored overlap samples */ else { memcpy(FE->OVERFLOW_SAMPS+FE->NUM_OVERFLOW_SAMPS,tmp_spch, nsamps*(sizeof(int16))); FE->NUM_OVERFLOW_SAMPS += nsamps; assert(FE->NUM_OVERFLOW_SAMPS < FE->FRAME_SIZE); frame_count=0; } *cep_block = cep; /* MLS */ return frame_count; }
/********************************************************************* FUNCTION: fe_process_utt PARAMETERS: fe_t *FE, int16 *spch, int32 nsamps, mfcc_t **cep, int32 nframes RETURNS: status, successful or not DESCRIPTION: processes the given speech data and returns features. will prepend overflow data from last call and store new overflow data within the FE **********************************************************************/ int32 fe_process_utt(fe_t * FE, int16 * spch, int32 nsamps, mfcc_t *** cep_block, int32 * nframes) { int32 frame_start, frame_count = 0, whichframe = 0; int32 i, spbuf_len, offset = 0; frame_t *spbuf, *fr_data; int16 *tmp_spch = spch; mfcc_t **cep = NULL; int32 return_value = FE_SUCCESS; int32 frame_return_value; /* Added byte-swapping for Endian-ness compatibility */ if (FE->swap) for (i = 0; i < nsamps; i++) SWAP_INT16(&spch[i]); /* are there enough samples to make at least 1 frame? */ if (nsamps + FE->NUM_OVERFLOW_SAMPS >= FE->FRAME_SIZE) { /* if there are previous samples, pre-pend them to input speech samps */ if ((FE->NUM_OVERFLOW_SAMPS > 0)) { if ((tmp_spch = (int16 *) malloc(sizeof(int16) * (FE->NUM_OVERFLOW_SAMPS + nsamps))) == NULL) { E_WARN("memory alloc failed in fe_process_utt()\n"); return FE_MEM_ALLOC_ERROR; } /* RAH */ memcpy(tmp_spch, FE->OVERFLOW_SAMPS, FE->NUM_OVERFLOW_SAMPS * (sizeof(int16))); /* RAH */ memcpy(tmp_spch + FE->NUM_OVERFLOW_SAMPS, spch, nsamps * (sizeof(int16))); /* RAH */ nsamps += FE->NUM_OVERFLOW_SAMPS; FE->NUM_OVERFLOW_SAMPS = 0; /*reset overflow samps count */ } /* compute how many complete frames can be processed and which samples correspond to those samps */ frame_count = 0; for (frame_start = 0; frame_start + FE->FRAME_SIZE <= nsamps; frame_start += FE->FRAME_SHIFT) frame_count++; if ((cep = (mfcc_t **) fe_create_2d(frame_count + 1, FE->FEATURE_DIMENSION, sizeof(mfcc_t))) == NULL) { E_WARN ("memory alloc for cep failed in fe_process_utt()\n\tfe_create_2d(%ld,%d,%d)\n", (long int) (frame_count + 1), FE->FEATURE_DIMENSION, sizeof(mfcc_t)); return (FE_MEM_ALLOC_ERROR); } spbuf_len = (frame_count - 1) * FE->FRAME_SHIFT + FE->FRAME_SIZE; if ((spbuf = (frame_t *) calloc(spbuf_len, sizeof(frame_t))) == NULL) { E_WARN("memory alloc failed in fe_process_utt()\n"); return (FE_MEM_ALLOC_ERROR); } /* Add dither, if requested */ if (FE->dither) { fe_dither(tmp_spch, spbuf_len); } /* pre-emphasis if needed, convert from int16 to float64 */ if (FE->PRE_EMPHASIS_ALPHA != 0.0) { fe_pre_emphasis(tmp_spch, spbuf, spbuf_len, FE->PRE_EMPHASIS_ALPHA, FE->PRIOR); } else { fe_short_to_frame(tmp_spch, spbuf, spbuf_len); } /* frame based processing - let's make some cepstra... */ fr_data = (frame_t *) calloc(FE->FRAME_SIZE, sizeof(frame_t)); if (fr_data == NULL) { E_WARN("memory alloc failed in fe_process_utt()\n"); return (FE_MEM_ALLOC_ERROR); } for (whichframe = 0; whichframe < frame_count; whichframe++) { for (i = 0; i < FE->FRAME_SIZE; i++) fr_data[i] = spbuf[whichframe * FE->FRAME_SHIFT + i]; fe_hamming_window(fr_data, FE->HAMMING_WINDOW, FE->FRAME_SIZE, FE->remove_dc); frame_return_value = fe_frame_to_fea(FE, fr_data, cep[whichframe]); if (FE_SUCCESS != frame_return_value) { return_value = frame_return_value; } } /* done making cepstra */ /* assign samples which don't fill an entire frame to FE overflow buffer for use on next pass */ if ((offset = ((frame_count) * FE->FRAME_SHIFT)) < nsamps) { memcpy(FE->OVERFLOW_SAMPS, tmp_spch + offset, (nsamps - offset) * sizeof(int16)); FE->NUM_OVERFLOW_SAMPS = nsamps - offset; FE->PRIOR = tmp_spch[offset - 1]; assert(FE->NUM_OVERFLOW_SAMPS < FE->FRAME_SIZE); } if (spch != tmp_spch) free(tmp_spch); free(spbuf); free(fr_data); } /* if not enough total samps for a single frame, append new samps to previously stored overlap samples */ else { memcpy(FE->OVERFLOW_SAMPS + FE->NUM_OVERFLOW_SAMPS, tmp_spch, nsamps * (sizeof(int16))); FE->NUM_OVERFLOW_SAMPS += nsamps; assert(FE->NUM_OVERFLOW_SAMPS < FE->FRAME_SIZE); frame_count = 0; } *cep_block = cep; /* MLS */ *nframes = frame_count; return return_value; }