Exemplo n.º 1
0
void EmoVoiceVAD::transform (ITransformer::info info,
	ssi_stream_t &stream_in,
	ssi_stream_t &stream_out,
	ssi_size_t xtra_stream_in_num,
	ssi_stream_t xtra_stream_in[]) {

	ssi_size_t sample_number = stream_in.num;

    int16_t *inblock = ssi_pcast (int16_t, stream_in.ptr);
	int *outblock = ssi_pcast (int, stream_out.ptr);

	ssi_size_t steps = (sample_number - (EMOVOICEVAD_FRAME_SIZE - EMOVOICEVAD_FRAME_STEP)) / EMOVOICEVAD_FRAME_STEP;

	if (steps <= 0) {
		ssi_err ("Input vector too short (%d)", EMOVOICEVAD_FRAME_SIZE);
	}

	for (ssi_size_t i = 0; i < steps; i++) {
		*outblock++ = dsp_vad_calc((dsp_sample_t *)voice, (dsp_vad_t *)vad, (dsp_sample_t *)(inblock + i*EMOVOICEVAD_FRAME_STEP));
	}
}
Exemplo n.º 2
0
int _asegment_vad(dsp_vad_t *vad, dsp_sample_t *signal, int n_samples, dsp_sample_t ***signal_segment, int **length) {
	int va=0, i, out_sample;
	int n_segments =0, last_va=0, samples=0;
	int max_segments=MAX_SEGMENTS, segment_length = SEGMENT_LENGTH;
	int delay_len=vad->sigbuf->length;
	int *_length;
	dsp_sample_t *voice;
	dsp_sample_t **_segments;
	int temp=0;
	dsp_sample_t *_signal;

	_signal = (dsp_sample_t *) rs_malloc(n_samples*sizeof(dsp_sample_t),"signal copy");
	for (i=0;i<n_samples;i++)
		_signal[i]=signal[i];

	if (!vad) {
		rs_warning("No voice activity detection info available!");
		return -1;
	}
	voice = (dsp_sample_t *) rs_malloc(vad->frame_len * sizeof(dsp_sample_t),"Voice frame");

	_segments = (dsp_sample_t **) rs_malloc(max_segments * sizeof(dsp_sample_t *),"Signal segments");
	_length = (int *) rs_malloc(max_segments * sizeof(int),"Segment lengths");
    
	for (i=0;i<=n_samples-VAD_FRAME_SHIFT || va >=0;i+=VAD_FRAME_SHIFT) {
		if (i > n_samples-VAD_FRAME_SHIFT) {
			va = dsp_vad_calc(voice,vad,NULL);
		}
		else {
			if (i>n_samples-vad->frame_len) {
				int j, new_len=i+vad->frame_len;
				_signal= (dsp_sample_t *) rs_realloc(_signal,new_len*sizeof(dsp_sample_t),"Signal buffer");
				for (j=n_samples;j<new_len;j++)
					_signal[j]=0;
			}
			va = dsp_vad_calc(voice, vad, _signal+i);
		}
		if (va >=0 && ((va && !last_va) || (!va && last_va)) && samples >0) {
			out_sample=i-(delay_len-vad->sigbuf->need_elems)*160;
			if (out_sample>=n_samples)
				out_sample=n_samples-1;
			if (va) {
				if (n_segments >= max_segments) {
					max_segments += MAX_SEGMENTS;
					_segments = (dsp_sample_t **) rs_realloc(_segments,max_segments * sizeof(dsp_sample_t *),"Signal segments");
					_length = (int *) rs_realloc(_length,max_segments * sizeof(int),"Segment lengths");
				}
				_segments[n_segments] = (dsp_sample_t *) rs_malloc(segment_length * sizeof(dsp_sample_t),"Signal segment");
				if (output)
					fprintf(stderr,"[%d..",out_sample);
			}
			else {
				if (output)
					fprintf(stderr,"%d] ",out_sample-1);
				_length[n_segments]=last_va*VAD_FRAME_SHIFT;
				n_segments++;
				segment_length=SEGMENT_LENGTH;
			}
		}


		if (!va) 
			last_va=0;
	
		if (va==1 && samples>0) {
			if ((last_va+1)*VAD_FRAME_SHIFT > segment_length) {
				segment_length += SEGMENT_LENGTH;
				_segments[n_segments] = (dsp_sample_t *) rs_realloc(_segments[n_segments],segment_length * sizeof(dsp_sample_t),"Signal segment");
			}
			samples+=VAD_FRAME_SHIFT;
			memcpy(_segments[n_segments]+last_va*VAD_FRAME_SHIFT,voice,VAD_FRAME_SHIFT*sizeof(dsp_sample_t));
			last_va++;
		}
	
		temp=0;
		while (va==0) {
			temp++;
			samples+=VAD_FRAME_SHIFT;
			va = dsp_vad_calc(voice, vad, NULL);
		}
	}

	if (last_va && samples >0) {
		out_sample=i-(delay_len-vad->sigbuf->need_elems)*160;
		if (out_sample>=n_samples)
			out_sample=n_samples;
		if (output)
			fprintf(stderr,"%d] ",out_sample-1);
		_length[n_segments]=last_va*VAD_FRAME_SHIFT;
		n_segments++;
	}
	if (output)
		fprintf(stderr,";\n");
    
	rs_free(voice);
	rs_free(_signal);
	*signal_segment=_segments;
	*length=_length;
	return n_segments;
}