fextract_t *fextract_pitch_energy_create(int frame_len, char *m_e_params) { fextract_t *fex; dsp_fextract_t *mfcc; fex = (fextract_t *) rs_malloc(sizeof(fextract_t),"emotion feature extraction data"); fex->n_features = V2_N_FEATURES; /* Abtastrate etc. ... */ fex->samplerate = SAMPLERATE; fex->frame_len = frame_len; /* global frame length (in frames) */ fex->pitch = pitch_create(AC_GAUSS); fex->frame_shift = fex->frame_len - ((fex->pitch->method == AC_GAUSS? 2 : 1 ) * fex->pitch->periodsPerWindow / fex->pitch->minimumPitch - fex->pitch->dt) * fex->samplerate ; /* global shift (in frames) */ fex->hnr = NULL; fex->vq = NULL; /* MFCCs ... */ mfcc = (dsp_fextract_t *) rs_malloc(sizeof(dsp_fextract_t), "feature extraction data"); mfcc->type = dsp_fextype_MFCC; mfcc->version = DSP_MK_VERSION(1, 4); if (!dsp_mfcc_create(mfcc, m_e_params)) { rs_free(mfcc); mfcc = NULL; rs_warning("Could not initialize MFCC configuration!"); } fex->mfcc=mfcc; return(fex); }
EmoVoiceVAD::EmoVoiceVAD () { int version = DSP_MK_VERSION(1,0); vad = dsp_vad_create (version, EMOVOICEVAD_FRAME_SIZE); voice = new dsp_sample_t[EMOVOICEVAD_FRAME_SIZE]; steps = 0; no_va_counter = 0; in_va_segment = false; }
int emo_afile_segment(char *file, asegmentation_method_t *method, asegmentation_type_t type, dsp_sample_t ***signal_segment, int **segment_length) { int size=SEGMENT_LENGTH, n_samples=0, n_segments, samples_read; FILE *fp; dsp_sample_t *signal=NULL; dsp_vad_t *Vad; signal = (dsp_sample_t *) rs_malloc(sizeof(dsp_sample_t) * size, "Signal data"); if (!method) method = (asegmentation_method_t *) rs_malloc(sizeof(asegmentation_method_t),"Audio segmentation method"); if (strcmp(file,"-")==0) fp = stdin; else fp = fopen(file,"r"); if (!fp) { rs_warning("Cannot open file %s!",file); return -1; } while ((samples_read =fread(signal+n_samples,sizeof(dsp_sample_t),BLOCKSIZE,fp)) && samples_read >0) { n_samples+=samples_read; if (size <= n_samples) { size +=SEGMENT_LENGTH; signal = (dsp_sample_t *) rs_realloc(signal,sizeof(dsp_sample_t) * size, "Signal data"); } if (samples_read != BLOCKSIZE) break; } fclose(fp); if (type == vad && !method->vad) { Vad = dsp_vad_create(DSP_MK_VERSION(1,0),VAD_FRAME_LEN); method->vad = Vad; } n_segments = emo_asegment(method,type,signal,n_samples,signal_segment,segment_length); if (n_segments == -1) rs_error("Aborting during procession of file %s!",file); rs_free(signal); return n_segments; }
dsp_vad_t *dsp_vad_create(int version, int frame_len) { dsp_vad_t *vad; /* Parameter pruefen ... */ if (frame_len <= 0) return(NULL); /* ... speziellen Datenbereich erzeugen ... */ vad = rs_malloc(sizeof(dsp_vad_t), "VAD configuration data"); /* ... und gemaess Version fuellen ... */ vad->version = version; vad->frame_len = frame_len; vad->state = dsp_vad_no_decision; vad->n_no_va_frames = 0; vad->signal = rs_malloc(sizeof(dsp_sample_t) * vad->frame_len, "VAD frame buffer"); /* optionale Eintraege - noch nicht verfuegbar! */ vad->sigbuf = NULL; vad->ehist = NULL; switch(vad->version) { case DSP_MK_VERSION(1, 0): /* ... Signalpuffer hinzufuegen ... */ vad->sigbuf = dsp_delay_create(V1_0_BUFFER_LEN, sizeof(dsp_sample_t), vad->frame_len); /* ... und begrenztes Energiehistogramm erzeugen */ vad->ehist = mx_histogram_create( V1_0_EHIST_MIN, V1_0_EHIST_MAX, V1_0_EHIST_RES); mx_histogram_limit_set(vad->ehist, V1_0_EHIST_LEN); vad->last_idx = -1; break; default: dsp_vad_destroy(vad); return(NULL); } return(vad); }
void dsp_vad_reset(dsp_vad_t *vad) { /* Parameter pruefen ... */ if (!vad) return; /* ... und gemaess Version ruecksetzen ... */ vad->state = dsp_vad_no_decision; vad->n_no_va_frames = 0; switch(vad->version) { case DSP_MK_VERSION(1, 0): dsp_delay_flush(vad->sigbuf); mx_histogram_reset(vad->ehist); vad->last_idx = -1; break; default: return; } }
/** * dsp_vad_calc(&voice[], vad, samples[]) * Fuehrt eine "voice activity detection" durch. Dazu wird in * in Abhaengigkeit vom internen in 'vad' gepeichterten Zustand * sowie der neuen Signaldaten 'samples[]' ein Zustandsuebergang * durchgefuehrt und die Signaldaten im internen Puffer gespeichert. * * Aus diesem Puffer wird anschliessend ggf. wieder ein Signalframe * entnommen, der i.A. um einige Frames verzoegert wurde. * * Liefert 1, falls ein Sprachframe aus dem Puffer extrahiert wurde, * 0 bei einem Pausenframe und -1, falls keine Daten entnommen werden * konnten bzw. im Fehlerfalle. **/ int dsp_vad_calc(dsp_sample_t *voice, dsp_vad_t *vad, dsp_sample_t *samples) { dsp_vad_state_t mark; /* Parameter pruefen ... */ if (!vad) return(-1); /* ... falls neue Daten vorliegen ... */ if (samples) { /* ... Zustandsaenderung gemaess Version berechnen ... */ switch(vad->version) { case DSP_MK_VERSION(1, 0): _dsp_vad_calc_1_0(voice, vad, samples); break; default: return(-1); } } /* ... ggf. verzoegerten Frame zur Verarbeitung zurueckgeben ... */ if (dsp_delay_topm(NULL, &mark, vad->sigbuf) < 0) return(-1); /* ... Pause ... */ if (mark <= dsp_vad_silence) { dsp_delay_pop(voice, vad->sigbuf); return(0); } /* ... evtl. beginnender Sprachabschnitt ... */ else if (mark == dsp_vad_starting) return(-1); /* ... bzw. Sprache */ else { dsp_delay_pop(voice, vad->sigbuf); return(1); } }
/* Segmentiert eine Liste von Dateien Argumente: filelist: Name der Datei, die die Dateien enthaelt, die segmentiert werden sollen method: dsp_vad_t* fuer voice activity detection, int fuer frames mit fester Laenge char* fuer Segmentierungsinfo type: vad, fixed oder info Rueckgabewert: Anzahl der Signalabschnitte in signal_segment; -1, falls ein Fehler aufgetreten ist */ int emo_afilelist_segment(char *filelist, asegmentation_method_t *method, asegmentation_type_t type,int audio_output){ FILE *fp; int n_segments=0; int *segment_length=NULL; dsp_sample_t **signal_segment=NULL; fp = fopen(filelist,"r"); if (!fp) { rs_warning("Cannot open input file: %s!",filelist); return -1; } switch (type) { case vad: { int stat=0, x=0; char filename[STRING_LENGTH], outdir[STRING_LENGTH]; if (!method) method = (asegmentation_method_t *) rs_malloc(sizeof(asegmentation_method_t),"Audio segmentation method"); if (!method->vad) method->vad = dsp_vad_create(DSP_MK_VERSION(1,0),VAD_FRAME_LEN); //rest der zeile ueberspringen while(fscanf(fp,"%s\t%s",filename,outdir)==2 && stat >=0) { if (output) fprintf(stderr,"%s\t%s\t",filename,outdir); stat=emo_afile_segment(filename,method,vad,&signal_segment,&segment_length); if (stat == -1){ rs_warning("Segmentation of file %s failed!",filename); return -1; } if (audio_output && stat != emo_afile_output(filename,outdir,stat,signal_segment,segment_length)) { rs_warning("Output of segments of file %s failed!",filename); return -1; } if (segment_length) rs_free(segment_length); if (signal_segment) { int i; for (i=0;i<stat;i++) if (signal_segment[i]) rs_free(signal_segment[i]); rs_free(signal_segment); } n_segments+=stat; while (x != '\n' && x != EOF) x=fgetc(fp); x=0; } break; } case fixed: { int stat=0, x=0; char filename[STRING_LENGTH], outdir[STRING_LENGTH]; if (!method->segmentation_info) { rs_warning("No segment length or shift info available!"); return -1; } while(fscanf(fp,"%s\t%s",filename,outdir)==2 && stat >=0) { stat=emo_afile_segment(filename,method,fixed,&signal_segment,&segment_length); if (stat == -1){ rs_warning("Segmentation of file %s failed!",filename); return -1; } if (audio_output) { stat = emo_afile_output(filename,outdir,stat,signal_segment,segment_length); if (stat == -1){ rs_warning("Output of segments of file %s failed!",filename); } return -1; } n_segments+=stat; while (x != '\n' && x != EOF) x=fgetc(fp); x=0; } break; } case info: { int stat=0; char filename[STRING_LENGTH], outdir[STRING_LENGTH], seg_info[10*STRING_LENGTH]; if (!method->segmentation_info) { rs_warning("No segment length or shift info available!"); return -1; } while(fscanf(fp,"%s\t%s\t%[^\n]s",filename,outdir,seg_info)==3 && stat >=0) { method->segmentation_info=seg_info; stat=emo_afile_segment(filename,method,info,&signal_segment,&segment_length); if (stat == -1){ rs_warning("Segmentation of file %s failed!",filename); return -1; } if (audio_output && stat != emo_afile_output(filename,outdir,stat,signal_segment,segment_length)) { rs_warning("Output of segments of file %s failed!",filename); return -1; } n_segments+=stat; } break; } default: rs_warning("Unknown segmentation type: %d",type); return -1; } fclose(fp); return n_segments; }