예제 #1
0
void AUD_SuperposeReader::read(int& length, bool& eos, sample_t* buffer)
{
	AUD_Specs specs = m_reader1->getSpecs();
	AUD_Specs s2 = m_reader2->getSpecs();
	if(!AUD_COMPARE_SPECS(specs, s2))
		AUD_THROW(AUD_ERROR_SPECS, specs_error);

	int samplesize = AUD_SAMPLE_SIZE(specs);

	m_buffer.assureSize(length * samplesize);

	int len1 = length;
	m_reader1->read(len1, eos, buffer);

	if(len1 < length)
		memset(buffer + len1 * specs.channels, 0, (length - len1) * samplesize);

	int len2 = length;
	bool eos2;
	sample_t* buf = m_buffer.getBuffer();
	m_reader2->read(len2, eos2, buf);

	for(int i = 0; i < len2 * specs.channels; i++)
		buffer[i] += buf[i];

	length = AUD_MAX(len1, len2);
	eos &= eos2;
}
예제 #2
0
void AUD_SuperposeReader::read(int & length, sample_t* & buffer)
{
	AUD_Specs specs = m_reader1->getSpecs();
	int samplesize = AUD_SAMPLE_SIZE(specs);

	if(m_buffer.getSize() < length * samplesize)
		m_buffer.resize(length * samplesize);
	buffer = m_buffer.getBuffer();

	int len1 = length;
	sample_t* buf;
	m_reader1->read(len1, buf);
	memcpy(buffer, buf, len1 * samplesize);

	if(len1 < length)
		memset(buffer + len1 * specs.channels, 0, (length - len1) * samplesize);

	int len2 = length;
	m_reader2->read(len2, buf);

	for(int i = 0; i < len2 * specs.channels; i++)
		buffer[i] += buf[i];

	length = AUD_MAX(len1, len2);
}
예제 #3
0
파일: gmm.cpp 프로젝트: yao-matrix/mProto
/* compute probability for Mixture of Gaussians */
AUD_Double gmm_prob( void *hGmmHandle, AUD_Matrix *pFeatMatrix, AUD_Int32s rowIndex, AUD_Vector *pComponentProb )
{
	AUD_ASSERT( hGmmHandle && pFeatMatrix );
	AUD_ASSERT( pFeatMatrix->dataType == AUD_DATATYPE_INT32S );
	AUD_ASSERT( rowIndex < pFeatMatrix->rows );
	AUD_ASSERT( !pComponentProb || ( pComponentProb && pComponentProb->dataType == AUD_DATATYPE_DOUBLE ) );

	GmmModel    *pState = (GmmModel*)hGmmHandle;
	AUD_Int32s  i;

	AUD_ASSERT( pFeatMatrix->cols == pState->width );

	AUD_Double  *llr   = (AUD_Double*)calloc( pState->numMix * sizeof(AUD_Double), 1 );
	AUD_ASSERT( llr );
	AUD_Double  maxLLR = 0.;

	for ( i = 0; i < pState->numMix; i++ )
	{
		loggauss( pFeatMatrix->pInt32s + rowIndex * pFeatMatrix->cols, pState->means.pInt32s + i * pState->step, pState->cvars.pInt64s + i * pState->step,
		          pState->width, &llr[i], (pState->dets.pDouble)[i] );

		if ( pComponentProb != NULL )
		{
			pComponentProb->pDouble[i] = exp( llr[i] );
		}

		if ( i == 0 )
		{
			maxLLR = llr[i];
		}
		else
		{
			maxLLR = AUD_MAX( maxLLR, llr[i] );
		}
	}

	AUD_Double expDiffSum = 0., totalLLR, prob;
	for ( i = 0; i < pState->numMix; i++ )
	{
		expDiffSum += exp( llr[i] - maxLLR );
	}
	totalLLR = log( expDiffSum ) + maxLLR;

	prob = exp( totalLLR );
	// AUDLOG( "totalLLR[%f], prob[%f]\n", totalLLR, prob );

	free( llr );
	llr = NULL;

	return prob;
}
예제 #4
0
void AUD_FFMPEGWriter::write(unsigned int length, sample_t* buffer)
{
	unsigned int samplesize = AUD_SAMPLE_SIZE(m_specs);

	if(m_input_size)
	{
		sample_t* inbuf = m_input_buffer.getBuffer();

		while(length)
		{
			unsigned int len = AUD_MIN(m_input_size - m_input_samples, length);

			memcpy(inbuf + m_input_samples * m_specs.channels, buffer, len * samplesize);

			buffer += len * m_specs.channels;
			m_input_samples += len;
			m_position += len;
			length -= len;

			if(m_input_samples == m_input_size)
			{
				encode(inbuf);

				m_input_samples = 0;
			}
		}
	}
	else // PCM data, can write directly!
	{
		int samplesize = AUD_SAMPLE_SIZE(m_specs);
		if(m_output_buffer.getSize() != length * m_specs.channels * m_codecCtx->bits_per_coded_sample / 8)
			m_output_buffer.resize(length * m_specs.channels * m_codecCtx->bits_per_coded_sample / 8);
		m_input_buffer.assureSize(length * AUD_MAX(AUD_DEVICE_SAMPLE_SIZE(m_specs), samplesize));

		sample_t* buf = m_input_buffer.getBuffer();
		m_convert(reinterpret_cast<data_t*>(buf), reinterpret_cast<data_t*>(buffer), length * m_specs.channels);

		encode(buf);

		m_position += length;
	}
}
예제 #5
0
파일: gmm.cpp 프로젝트: yao-matrix/mProto
/* compute LLR for Mixture of Gaussians */
AUD_Double gmm_llr( void *hGmmHandle, AUD_Matrix *pFeatMatrix, AUD_Int32s rowIndex, AUD_Vector *pComponentLLR )
{
	AUD_ASSERT( hGmmHandle && pFeatMatrix );
	AUD_ASSERT( pFeatMatrix->dataType == AUD_DATATYPE_INT32S );
	AUD_ASSERT( rowIndex < pFeatMatrix->rows );
	AUD_ASSERT( !pComponentLLR || ( pComponentLLR && pComponentLLR->dataType == AUD_DATATYPE_DOUBLE ) );

	GmmModel    *pState = (GmmModel*)hGmmHandle;
	AUD_Int32s  i;

	AUD_ASSERT( pFeatMatrix->cols == pState->width );

	AUD_Double  *llr = (AUD_Double*)calloc( pState->numMix * sizeof(AUD_Double), 1 );
	AUD_ASSERT( llr );

	AUD_Double  maxLLR = 0., totalLLR;

	if ( pState->dets.len <= 0 )
	{
		pState->dets.len      = pState->numMix;
		pState->dets.dataType = AUD_DATATYPE_DOUBLE;
		AUD_Int32s ret = createVector( &(pState->dets) );
		AUD_ASSERT( ret == 0 );

		calcdet( &(pState->cvars), &(pState->weights), &(pState->dets) );
	}


	for ( i = 0; i < pState->numMix; i++ )
	{
		loggauss( pFeatMatrix->pInt32s + rowIndex * pFeatMatrix->cols, pState->means.pInt32s + i * pState->step, pState->cvars.pInt64s + i * pState->step,
		          pFeatMatrix->cols, &llr[i], (pState->dets.pDouble)[i] );

		if ( pComponentLLR != NULL )
		{
			pComponentLLR->pDouble[i] = llr[i];
		}

		if ( i == 0 )
		{
			maxLLR = llr[i];
		}
		else
		{
			maxLLR = AUD_MAX( maxLLR, llr[i] );
		}
	}


	AUD_Double expDiffSum = 0.;
	for ( i = 0; i < pState->numMix; i++ )
	{
		expDiffSum += exp( llr[i] - maxLLR );
	}
	totalLLR = log( expDiffSum ) + maxLLR;

	free( llr );
	llr = NULL;

	if ( isnan( totalLLR ) || totalLLR < LOG_ZERO )
	{
		totalLLR = LOG_ZERO;
	}

	return totalLLR;
}
예제 #6
0
void AUD_SoftwareDevice::AUD_SoftwareHandle::update()
{
	int flags = 0;

	AUD_Vector3 SL;
	if(m_relative)
		SL = -m_location;
	else
		SL = m_device->m_location - m_location;
	float distance = SL * SL;

	if(distance > 0)
		distance = sqrt(distance);
	else
		flags |= AUD_RENDER_DOPPLER | AUD_RENDER_DISTANCE;

	if(m_pitch->getSpecs().channels != AUD_CHANNELS_MONO)
	{
		m_volume = m_user_volume;
		m_pitch->setPitch(m_user_pitch);
		return;
	}

	flags = ~(flags | m_flags | m_device->m_flags);

	// Doppler and Pitch

	if(flags & AUD_RENDER_DOPPLER)
	{
		float vls;
		if(m_relative)
			vls = 0;
		else
			vls = SL * m_device->m_velocity / distance;
		float vss = SL * m_velocity / distance;
		float max = m_device->m_speed_of_sound / m_device->m_doppler_factor;
		if(vss >= max)
		{
			m_pitch->setPitch(AUD_PITCH_MAX);
		}
		else
		{
			if(vls > max)
				vls = max;

			m_pitch->setPitch((m_device->m_speed_of_sound - m_device->m_doppler_factor * vls) / (m_device->m_speed_of_sound - m_device->m_doppler_factor * vss) * m_user_pitch);
		}
	}
	else
		m_pitch->setPitch(m_user_pitch);

	if(flags & AUD_RENDER_VOLUME)
	{
		// Distance

		if(flags & AUD_RENDER_DISTANCE)
		{
			if(m_device->m_distance_model == AUD_DISTANCE_MODEL_INVERSE_CLAMPED || m_device->m_distance_model == AUD_DISTANCE_MODEL_LINEAR_CLAMPED || m_device->m_distance_model == AUD_DISTANCE_MODEL_EXPONENT_CLAMPED)
			{
				distance = AUD_MAX(AUD_MIN(m_distance_max, distance), m_distance_reference);
			}

			switch(m_device->m_distance_model)
			{
			case AUD_DISTANCE_MODEL_INVERSE:
			case AUD_DISTANCE_MODEL_INVERSE_CLAMPED:
				m_volume = m_distance_reference / (m_distance_reference + m_attenuation * (distance - m_distance_reference));
				break;
			case AUD_DISTANCE_MODEL_LINEAR:
			case AUD_DISTANCE_MODEL_LINEAR_CLAMPED:
			{
				float temp = m_distance_max - m_distance_reference;
				if(temp == 0)
				{
					if(distance > m_distance_reference)
						m_volume = 0.0f;
					else
						m_volume = 1.0f;
				}
				else
					m_volume = 1.0f - m_attenuation * (distance - m_distance_reference) / (m_distance_max - m_distance_reference);
				break;
			}
			case AUD_DISTANCE_MODEL_EXPONENT:
			case AUD_DISTANCE_MODEL_EXPONENT_CLAMPED:
				if(m_distance_reference == 0)
					m_volume = 0;
				else
					m_volume = pow(distance / m_distance_reference, -m_attenuation);
				break;
			default:
				m_volume = 1.0f;
			}
		}
		else
			m_volume = 1.0f;

		// Cone

		if(flags & AUD_RENDER_CONE)
		{
			AUD_Vector3 SZ = m_orientation.getLookAt();

			float phi = acos(float(SZ * SL / (SZ.length() * SL.length())));
			float t = (phi - m_cone_angle_inner)/(m_cone_angle_outer - m_cone_angle_inner);

			if(t > 0)
			{
				if(t > 1)
					m_volume *= m_cone_volume_outer;
				else
					m_volume *= 1 + t * (m_cone_volume_outer - 1);
			}
		}

		if(m_volume > m_volume_max)
			m_volume = m_volume_max;
		else if(m_volume < m_volume_min)
			m_volume = m_volume_min;

		// Volume

		m_volume *= m_user_volume;
	}

	// 3D Cue

	AUD_Quaternion orientation;

	if(!m_relative)
		orientation = m_device->m_orientation;

	AUD_Vector3 Z = orientation.getLookAt();
	AUD_Vector3 N = orientation.getUp();
	AUD_Vector3 A = N * ((SL * N) / (N * N)) - SL;

	float Asquare = A * A;

	if(Asquare > 0)
	{
		float phi = acos(float(Z * A / (Z.length() * sqrt(Asquare))));
		if(N.cross(Z) * A > 0)
			phi = -phi;

		m_mapper->setMonoAngle(phi);
	}
	else
		m_mapper->setMonoAngle(m_relative ? m_user_pan * M_PI / 2.0 : 0);
}
예제 #7
0
void AUD_SequencerReader::read(int& length, bool& eos, sample_t* buffer)
{
	AUD_MutexLock lock(*m_sequence);

	if(m_sequence->m_status != m_status)
	{
		m_device.changeSpecs(m_sequence->m_specs);
		m_device.setSpeedOfSound(m_sequence->m_speed_of_sound);
		m_device.setDistanceModel(m_sequence->m_distance_model);
		m_device.setDopplerFactor(m_sequence->m_doppler_factor);

		m_status = m_sequence->m_status;
	}

	if(m_sequence->m_entry_status != m_entry_status)
	{
		std::list<boost::shared_ptr<AUD_SequencerHandle> > handles;

		AUD_HandleIterator hit = m_handles.begin();
		AUD_EntryIterator  eit = m_sequence->m_entries.begin();

		int result;
		boost::shared_ptr<AUD_SequencerHandle> handle;

		while(hit != m_handles.end() && eit != m_sequence->m_entries.end())
		{
			handle = *hit;
			boost::shared_ptr<AUD_SequencerEntry> entry = *eit;

			result = handle->compare(entry);

			if(result < 0)
			{
				try
				{
					handle = boost::shared_ptr<AUD_SequencerHandle>(new AUD_SequencerHandle(entry, m_device));
					handles.push_back(handle);
				}
				catch(AUD_Exception&)
				{
				}
				eit++;
			}
			else if(result == 0)
			{
				handles.push_back(handle);
				hit++;
				eit++;
			}
			else
			{
				handle->stop();
				hit++;
			}
		}

		while(hit != m_handles.end())
		{
			(*hit)->stop();
			hit++;
		}

		while(eit != m_sequence->m_entries.end())
		{
			try
			{
				handle = boost::shared_ptr<AUD_SequencerHandle>(new AUD_SequencerHandle(*eit, m_device));
				handles.push_back(handle);
			}
			catch(AUD_Exception&)
			{
			}
			eit++;
		}

		m_handles = handles;

		m_entry_status = m_sequence->m_entry_status;
	}

	AUD_Specs specs = m_sequence->m_specs;
	int pos = 0;
	float time = float(m_position) / float(specs.rate);
	float volume, frame;
	int len, cfra;
	AUD_Vector3 v, v2;
	AUD_Quaternion q;


	while(pos < length)
	{
		frame = time * m_sequence->m_fps;
		cfra = int(floor(frame));

		len = int(ceil((cfra + 1) / m_sequence->m_fps * specs.rate)) - m_position;
		len = AUD_MIN(length - pos, len);
		len = AUD_MAX(len, 1);

		for(AUD_HandleIterator it = m_handles.begin(); it != m_handles.end(); it++)
		{
			(*it)->update(time, frame, m_sequence->m_fps);
		}

		m_sequence->m_volume.read(frame, &volume);
		if(m_sequence->m_muted)
			volume = 0.0f;
		m_device.setVolume(volume);

		m_sequence->m_orientation.read(frame, q.get());
		m_device.setListenerOrientation(q);
		m_sequence->m_location.read(frame, v.get());
		m_device.setListenerLocation(v);
		m_sequence->m_location.read(frame + 1, v2.get());
		v2 -= v;
		m_device.setListenerVelocity(v2 * m_sequence->m_fps);

		m_device.read(reinterpret_cast<data_t*>(buffer + specs.channels * pos), len);

		pos += len;
		time += float(len) / float(specs.rate);
	}

	m_position += length;

	eos = false;
}
예제 #8
0
int AUD_SuperposeReader::getPosition() const
{
	int pos1 = m_reader1->getPosition();
	int pos2 = m_reader2->getPosition();
	return AUD_MAX(pos1, pos2);
}
예제 #9
0
AUD_Int32s wov_adapt_gmm_si()
{
	AUD_Error  error = AUD_ERROR_NONE;
	AUD_Int32s ret   = 0;
	AUD_Int8s  wavPath[256] = { 0, };
	AUD_Int32s data;

	setbuf( stdout, NULL );
	setbuf( stdin, NULL );
	AUDLOG( "pls give adapt wav stream's folder path:\n" );
	wavPath[0] = '\0';
	data = scanf( "%s", wavPath );
	AUDLOG( "adapt wav stream's folder path is: %s\n", wavPath );

	// step 1: read UBM model from file
	void *hUbm = NULL;
	FILE *fpUbm = fopen( WOV_UBM_GMMMODEL_FILE, "rb" );
	if ( fpUbm == NULL )
	{
		AUDLOG( "cannot open ubm model file: [%s]\n", WOV_UBM_GMMMODEL_FILE );
		return AUD_ERROR_IOFAILED;
	}
	error = gmm_import( &hUbm, fpUbm );
	AUD_ASSERT( error == AUD_ERROR_NONE );
	fclose( fpUbm );
	fpUbm = NULL;

	// AUDLOG( "ubm GMM as:\n" );
	// gmm_show( hUbm );

	AUD_Int32s i = 0, j = 0;
	entry      *pEntry = NULL;
	dir        *pDir   = NULL;

	AUD_Int32s totalWinNum = 0;
	pDir = openDir( (const char*)wavPath );
	if ( pDir == NULL )
	{
		AUDLOG( "cannot open folder: %s\n", wavPath );
		return -1;
	}

	while ( ( pEntry = scanDir( pDir ) ) )
	{
		AUD_Int8s   keywordFile[256] = { 0, };
		AUD_Summary fileSummary;
		AUD_Int32s  sampleNum = 0;

		snprintf( (char*)keywordFile, 256, "%s/%s", wavPath, pEntry->name );
		// AUDLOG( "%s\n", keywordFile );

		ret = parseWavFromFile( keywordFile, &fileSummary );
		if ( ret < 0 )
		{
			continue;
		}
		AUD_ASSERT( fileSummary.channelNum == CHANNEL_NUM && fileSummary.bytesPerSample == BYTES_PER_SAMPLE && fileSummary.sampleRate == SAMPLE_RATE );

		// request memeory for template
		sampleNum = fileSummary.dataChunkBytes / fileSummary.bytesPerSample;
		for ( j = 0; j * FRAME_STRIDE + FRAME_LEN <= sampleNum; j++ )
		{
			;
		}
		j = j - MFCC_DELAY;

		totalWinNum += j;
	}
	closeDir( pDir );
	pDir = NULL;

	AUD_Matrix featureMatrix;
	featureMatrix.rows     = totalWinNum;
	featureMatrix.cols     = MFCC_FEATDIM;
	featureMatrix.dataType = AUD_DATATYPE_INT32S;
	ret = createMatrix( &featureMatrix );
	AUD_ASSERT( ret == 0 );

	AUD_Int32s currentRow  = 0;
	pDir = openDir( (const char*)wavPath );
	while ( ( pEntry = scanDir( pDir ) ) )
	{
		AUD_Int8s   keywordFile[256] = { 0, };
		AUD_Summary fileSummary;
		AUD_Int32s  sampleNum        = 0;
		void        *hMfccHandle     = NULL;

		snprintf( (char*)keywordFile, 256, "%s/%s", wavPath, pEntry->name );
		// AUDLOG( "%s\n", keywordFile );

		ret = parseWavFromFile( keywordFile, &fileSummary );
		if ( ret < 0 )
		{
			continue;
		}
		AUD_ASSERT( fileSummary.channelNum == CHANNEL_NUM && fileSummary.bytesPerSample == BYTES_PER_SAMPLE && fileSummary.sampleRate == SAMPLE_RATE );

		AUD_Int32s bufLen = fileSummary.dataChunkBytes;
		AUD_Int16s *pBuf  = (AUD_Int16s*)calloc( bufLen, 1 );
		AUD_ASSERT( pBuf );

		sampleNum = readWavFromFile( (AUD_Int8s*)keywordFile, pBuf, bufLen );
		AUD_ASSERT( sampleNum > 0 );

		// pre-processing

		 // pre-emphasis
		sig_preemphasis( pBuf, pBuf, sampleNum );

		 // calc framing number
		for ( j = 0; j * FRAME_STRIDE + FRAME_LEN <= sampleNum; j++ )
		{
			;
		}

		 // XXX: select salient frames
		AUD_Feature feature;
		feature.featureMatrix.rows     = j - MFCC_DELAY;
		feature.featureMatrix.cols     = MFCC_FEATDIM;
		feature.featureMatrix.dataType = AUD_DATATYPE_INT32S;
		feature.featureMatrix.pInt32s  = featureMatrix.pInt32s + currentRow * feature.featureMatrix.cols;

		feature.featureNorm.len      = j - MFCC_DELAY;
		feature.featureNorm.dataType = AUD_DATATYPE_INT64S;
		ret = createVector( &(feature.featureNorm) );
		AUD_ASSERT( ret == 0 );

		error = mfcc16s32s_init( &hMfccHandle, FRAME_LEN, WINDOW_TYPE, MFCC_ORDER, FRAME_STRIDE, SAMPLE_RATE, COMPRESS_TYPE );
		AUD_ASSERT( error == AUD_ERROR_NONE );

		error = mfcc16s32s_calc( hMfccHandle, pBuf, sampleNum, &feature );
		AUD_ASSERT( error == AUD_ERROR_NONE );

		error = mfcc16s32s_deinit( &hMfccHandle );
		AUD_ASSERT( error == AUD_ERROR_NONE );

		free( pBuf );
		pBuf   = NULL;
		bufLen = 0;

		ret = destroyVector( &(feature.featureNorm) );
		AUD_ASSERT( ret == 0 );

		currentRow += feature.featureMatrix.rows;
	}
	closeDir( pDir );
	pDir = NULL;

	AUD_Matrix llrMatrix;
	llrMatrix.rows     = totalWinNum;
	llrMatrix.cols     = gmm_getmixnum( hUbm );
	llrMatrix.dataType = AUD_DATATYPE_DOUBLE;
	ret = createMatrix( &llrMatrix );
	AUD_ASSERT( ret == 0 );

	AUD_Double llr = 0.;
	for ( j = 0; j < featureMatrix.rows; j++ )
	{
		AUD_Vector componentLLR;
		componentLLR.len      = llrMatrix.cols;
		componentLLR.dataType = AUD_DATATYPE_DOUBLE;
		componentLLR.pDouble  = llrMatrix.pDouble + j * llrMatrix.cols;

		llr = gmm_llr( hUbm, &(featureMatrix), j, &componentLLR );
	}

	AUD_Vector sumLlr;
	sumLlr.len      = llrMatrix.cols;
	sumLlr.dataType = AUD_DATATYPE_DOUBLE;
	ret = createVector( &sumLlr );
	AUD_ASSERT( ret == 0 );

	AUD_Double *pSumLlr = sumLlr.pDouble;
	for ( j = 0; j < llrMatrix.cols; j++ )
	{
		pSumLlr[j] = llrMatrix.pDouble[j];
	}
	for ( i = 1; i < llrMatrix.rows; i++ )
	{
		for ( j = 0; j < llrMatrix.cols; j++ )
		{
			pSumLlr[j] = logadd( pSumLlr[j],  *(llrMatrix.pDouble + i * llrMatrix.cols + j) );
		}
	}

#if 0
	AUD_Vector bestIndex;
	bestIndex.len      = TOP_N;
	bestIndex.dataType = AUD_DATATYPE_INT32S;
	ret = createVector( &bestIndex );
	AUD_ASSERT( ret == 0 );

	// get top TOP_N component
	ret = sortVector( &sumLlr, &bestIndex );
	AUD_ASSERT( ret == 0 );
#else
	llr = pSumLlr[0];
	for ( j = 1; j < sumLlr.len; j++ )
	{
		llr = logadd( llr, pSumLlr[j] );
	}
	// AUDLOG( "llr: %.f\n", llr );

	AUD_Vector sortIndex;
	sortIndex.len      = sumLlr.len;
	sortIndex.dataType = AUD_DATATYPE_INT32S;
	ret = createVector( &sortIndex );
	AUD_ASSERT( ret == 0 );

	ret = sortVector( &sumLlr, &sortIndex );
	AUD_ASSERT( ret == 0 );

	int    num = 0;
	double val = 0.;
	for ( i = 0; i < sortIndex.len; i++ )
	{
		// ln( 0.001 ) ~= -7.
		val =  pSumLlr[sortIndex.pInt32s[i]] - llr + 7.;
		// AUDLOG( "%f, \n", val );
		if ( val < 0 )
		{
			break;
		}
		num++;
	}
	// AUDLOG( "\n" );
	AUD_ASSERT( num > 0 );

	AUDLOG( "computed component num: %d\n", num );

	num = AUD_MAX( num, TOP_N );
	AUDLOG( "normalized component num: %d\n", num );

	AUD_Vector bestIndex;
	bestIndex.len      = num;
	bestIndex.dataType = AUD_DATATYPE_INT32S;
	bestIndex.pInt32s  = sortIndex.pInt32s;
#endif

	int slash = '/';
	char *ptr = strrchr( (char*)wavPath, slash );
	ptr++;

	// select imposter GMM
	void      *hImposterGmm        = NULL;
	AUD_Int8s imposterGmmName[256] = { 0, };
	snprintf( (char*)imposterGmmName, 256, "%s-imposter", ptr );
	error = gmm_select( &hImposterGmm, hUbm, &bestIndex, 0 | GMM_INVERTSELECT_MASK, imposterGmmName );
	AUD_ASSERT( error == AUD_ERROR_NONE );

	gmm_show( hImposterGmm );

	// export gmm
	char imposterFile[256] = { 0 };
	snprintf( imposterFile, 256, "%s/%s-imposter.gmm", WOV_IMPOSTER_GMMMODEL_DIR, ptr );
	AUDLOG( "Export imposter GMM Model to: %s\n", imposterFile );
	FILE *fpImposterGmm = fopen( imposterFile, "wb" );
	AUD_ASSERT( fpImposterGmm );

	error = gmm_export( hImposterGmm, fpImposterGmm );
	AUD_ASSERT( error == AUD_ERROR_NONE );
	AUDLOG( "Export imposter GMM Model File Done\n" );

	fclose( fpImposterGmm );
	fpImposterGmm = NULL;

	error = gmm_free( &hImposterGmm );
	AUD_ASSERT( error == AUD_ERROR_NONE );

	// select keyword GMM
	void      *hAdaptedGmm        = NULL;
	AUD_Int8s adaptedGmmName[256] = { 0, };
	snprintf( (char*)adaptedGmmName, 256, "%s", ptr );
	// AUDLOG( "%s\n", adaptedGmmName );
	error = gmm_select( &hAdaptedGmm, hUbm, &bestIndex, 0, adaptedGmmName );
	AUD_ASSERT( error == AUD_ERROR_NONE );

	ret = destroyVector( &sumLlr );
	AUD_ASSERT( ret == 0 );

	ret = destroyMatrix( &llrMatrix );
	AUD_ASSERT( ret == 0 );

#if 0
	ret = destroyVector( &bestIndex );
	AUD_ASSERT( ret == 0 );
#else
	ret = destroyVector( &sortIndex );
	AUD_ASSERT( ret == 0 );
#endif

#if 1
	// adapt GMM
	error = gmm_adapt( hAdaptedGmm, &featureMatrix );
	AUD_ASSERT( error == AUD_ERROR_NONE );
#endif

	gmm_show( hAdaptedGmm );

	// export gmm
	char modelFile[256] = { 0 };
	snprintf( modelFile, 256, "%s/%s.gmm", WOV_KEYWORD_GMMMODEL_DIR, ptr );
	AUDLOG( "Export GMM Model to: %s\n", modelFile );
	FILE *fpGmm = fopen( modelFile, "wb" );
	AUD_ASSERT( fpGmm );

	error = gmm_export( hAdaptedGmm, fpGmm );
	AUD_ASSERT( error == AUD_ERROR_NONE );
	AUDLOG( "Export GMM Model File Done\n" );

	fclose( fpGmm );
	fpGmm = NULL;

	ret = destroyMatrix( &featureMatrix );
	AUD_ASSERT( ret == 0 );

	error = gmm_free( &hAdaptedGmm );
	AUD_ASSERT( error == AUD_ERROR_NONE );

	error = gmm_free( &hUbm );
	AUD_ASSERT( error == AUD_ERROR_NONE );

	AUDLOG( "keyword model adapt2 done\n" );

	return 0;
}
예제 #10
0
AUD_FFMPEGWriter::AUD_FFMPEGWriter(std::string filename, AUD_DeviceSpecs specs, AUD_Container format, AUD_Codec codec, unsigned int bitrate) :
	m_position(0),
	m_specs(specs),
	m_input_samples(0)
{
	static const char* formats[] = { NULL, "ac3", "flac", "matroska", "mp2", "mp3", "ogg", "wav" };

	m_formatCtx = avformat_alloc_context();
	if (!m_formatCtx) AUD_THROW(AUD_ERROR_FFMPEG, context_error);

	strcpy(m_formatCtx->filename, filename.c_str());
	m_outputFmt = m_formatCtx->oformat = av_guess_format(formats[format], filename.c_str(), NULL);
	if (!m_outputFmt) {
		avformat_free_context(m_formatCtx);
		AUD_THROW(AUD_ERROR_FFMPEG, context_error);
	}

	switch(codec)
	{
	case AUD_CODEC_AAC:
		m_outputFmt->audio_codec = AV_CODEC_ID_AAC;
		break;
	case AUD_CODEC_AC3:
		m_outputFmt->audio_codec = AV_CODEC_ID_AC3;
		break;
	case AUD_CODEC_FLAC:
		m_outputFmt->audio_codec = AV_CODEC_ID_FLAC;
		break;
	case AUD_CODEC_MP2:
		m_outputFmt->audio_codec = AV_CODEC_ID_MP2;
		break;
	case AUD_CODEC_MP3:
		m_outputFmt->audio_codec = AV_CODEC_ID_MP3;
		break;
	case AUD_CODEC_PCM:
		switch(specs.format)
		{
		case AUD_FORMAT_U8:
			m_outputFmt->audio_codec = AV_CODEC_ID_PCM_U8;
			break;
		case AUD_FORMAT_S16:
			m_outputFmt->audio_codec = AV_CODEC_ID_PCM_S16LE;
			break;
		case AUD_FORMAT_S24:
			m_outputFmt->audio_codec = AV_CODEC_ID_PCM_S24LE;
			break;
		case AUD_FORMAT_S32:
			m_outputFmt->audio_codec = AV_CODEC_ID_PCM_S32LE;
			break;
		case AUD_FORMAT_FLOAT32:
			m_outputFmt->audio_codec = AV_CODEC_ID_PCM_F32LE;
			break;
		case AUD_FORMAT_FLOAT64:
			m_outputFmt->audio_codec = AV_CODEC_ID_PCM_F64LE;
			break;
		default:
			m_outputFmt->audio_codec = AV_CODEC_ID_NONE;
			break;
		}
		break;
	case AUD_CODEC_VORBIS:
		m_outputFmt->audio_codec = AV_CODEC_ID_VORBIS;
		break;
	default:
		m_outputFmt->audio_codec = AV_CODEC_ID_NONE;
		break;
	}

	try
	{
		if(m_outputFmt->audio_codec == AV_CODEC_ID_NONE)
			AUD_THROW(AUD_ERROR_SPECS, codec_error);

		m_stream = avformat_new_stream(m_formatCtx, NULL);
		if(!m_stream)
			AUD_THROW(AUD_ERROR_FFMPEG, stream_error);

		m_codecCtx = m_stream->codec;
		m_codecCtx->codec_id = m_outputFmt->audio_codec;
		m_codecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
		m_codecCtx->bit_rate = bitrate;
		m_codecCtx->sample_rate = int(m_specs.rate);
		m_codecCtx->channels = m_specs.channels;
		m_codecCtx->time_base.num = 1;
		m_codecCtx->time_base.den = m_codecCtx->sample_rate;

		switch(m_specs.format)
		{
		case AUD_FORMAT_U8:
			m_convert = AUD_convert_float_u8;
			m_codecCtx->sample_fmt = AV_SAMPLE_FMT_U8;
			break;
		case AUD_FORMAT_S16:
			m_convert = AUD_convert_float_s16;
			m_codecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
			break;
		case AUD_FORMAT_S32:
			m_convert = AUD_convert_float_s32;
			m_codecCtx->sample_fmt = AV_SAMPLE_FMT_S32;
			break;
		case AUD_FORMAT_FLOAT32:
			m_convert = AUD_convert_copy<float>;
			m_codecCtx->sample_fmt = AV_SAMPLE_FMT_FLT;
			break;
		case AUD_FORMAT_FLOAT64:
			m_convert = AUD_convert_float_double;
			m_codecCtx->sample_fmt = AV_SAMPLE_FMT_DBL;
			break;
		default:
			AUD_THROW(AUD_ERROR_FFMPEG, format_error);
		}

		try
		{
			if(m_formatCtx->oformat->flags & AVFMT_GLOBALHEADER)
				m_codecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;

			AVCodec* codec = avcodec_find_encoder(m_codecCtx->codec_id);
			if(!codec)
				AUD_THROW(AUD_ERROR_FFMPEG, codec_error);

			if(codec->sample_fmts) {
				// Check if the prefered sample format for this codec is supported.
				const enum AVSampleFormat *p = codec->sample_fmts;
				for(; *p != -1; p++) {
					if(*p == m_stream->codec->sample_fmt)
						break;
				}
				if(*p == -1) {
					// Sample format incompatible with codec. Defaulting to a format known to work.
					m_stream->codec->sample_fmt = codec->sample_fmts[0];
				}
			}

			if(avcodec_open2(m_codecCtx, codec, NULL))
				AUD_THROW(AUD_ERROR_FFMPEG, codec_error);

			m_output_buffer.resize(FF_MIN_BUFFER_SIZE);
			int samplesize = AUD_MAX(AUD_SAMPLE_SIZE(m_specs), AUD_DEVICE_SAMPLE_SIZE(m_specs));

			if(m_codecCtx->frame_size <= 1) {
				m_input_size = FF_MIN_BUFFER_SIZE * 8 / m_codecCtx->bits_per_coded_sample / m_codecCtx->channels;
				m_input_buffer.resize(m_input_size * samplesize);
			}
			else
			{
				m_input_buffer.resize(m_codecCtx->frame_size * samplesize);
				m_input_size = m_codecCtx->frame_size;
			}

#ifdef FFMPEG_HAVE_ENCODE_AUDIO2
			m_frame = av_frame_alloc();
			if (!m_frame)
				AUD_THROW(AUD_ERROR_FFMPEG, codec_error);
			avcodec_get_frame_defaults(m_frame);
			m_frame->linesize[0]    = m_input_size * samplesize;
			m_frame->format         = m_codecCtx->sample_fmt;
			m_frame->nb_samples     = m_input_size;
#  ifdef FFMPEG_HAVE_AVFRAME_SAMPLE_RATE
			m_frame->sample_rate    = m_codecCtx->sample_rate;
#  endif
#  ifdef FFMPEG_HAVE_FRAME_CHANNEL_LAYOUT
			m_frame->channel_layout = m_codecCtx->channel_layout;
#  endif
			m_sample_size = av_get_bytes_per_sample(m_codecCtx->sample_fmt);
			m_frame_pts = 0;
			m_deinterleave = av_sample_fmt_is_planar(m_codecCtx->sample_fmt);
			if(m_deinterleave)
				m_deinterleave_buffer.resize(m_input_size * m_codecCtx->channels * m_sample_size);
#endif

			try
			{
				if(avio_open(&m_formatCtx->pb, filename.c_str(), AVIO_FLAG_WRITE))
					AUD_THROW(AUD_ERROR_FILE, file_error);

				avformat_write_header(m_formatCtx, NULL);
			}
			catch(AUD_Exception&)
			{
				avcodec_close(m_codecCtx);
				av_freep(&m_formatCtx->streams[0]->codec);
				throw;
			}
		}
		catch(AUD_Exception&)
		{
			av_freep(&m_formatCtx->streams[0]);
			throw;
		}
	}
	catch(AUD_Exception&)
	{
		av_free(m_formatCtx);
		throw;
	}
}
예제 #11
0
AUD_Int32s denoise_aud( AUD_Int16s *pInBuf, AUD_Int16s *pOutBuf, AUD_Int32s inLen )
{
    Fft_16s        *hFft        = NULL;
    Ifft_16s       *hIfft       = NULL;
    AUD_Window16s  *hWin        = NULL;

    AUD_Int32s     frameSize    = 512;
    AUD_Int32s     frameStride  = 256;
    AUD_Int32s     frameOverlap = 256;
    AUD_Int32s     nFFT         = frameSize;
    AUD_Int32s     nSpecLen     = nFFT / 2 + 1;
    AUD_Int32s     nNoiseFrame  = 6; // (AUD_Int32s)( ( 0.25 * SAMPLE_RATE - frameSize ) / frameStride + 1 );

    AUD_Int32s     i, j, k, m, n, ret;

    AUD_Int32s     cleanLen     = 0;

    // pre-emphasis
    // sig_preemphasis( pInBuf, pInBuf, inLen );

    // init hamming module
    win16s_init( &hWin, AUD_WIN_HAMM, frameSize, 14 );
    AUD_ASSERT( hWin );

    // init fft handle
    fft_init( &hFft, nFFT, 15 );
    AUD_ASSERT( hFft );

    // init ifft handle
    ifft_init( &hIfft, nFFT, 15 );
    AUD_ASSERT( hIfft );

    AUD_Int16s *pFrame  = (AUD_Int16s*)calloc( frameSize * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pFrame );

    // FFT
    AUD_Int32s *pFFTMag  = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTMag );
    AUD_Int32s *pFFTRe   = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTRe );
    AUD_Int32s *pFFTIm   = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTIm );

    AUD_Int32s *pFFTCleanRe = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTCleanRe );
    AUD_Int32s *pFFTCleanIm = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTCleanIm );

    // noise spectrum
    AUD_Double *pNoiseEn = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pNoiseEn );

    AUD_Double *pNoiseB = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pNoiseB );

    AUD_Double *pXPrev = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pXPrev );

    AUD_Double *pAb = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pAb );

    AUD_Double *pH = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pH );

    AUD_Double *pGammak = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pGammak );

    AUD_Double *pKsi = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pKsi );

    AUD_Double *pLogSigmak = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pLogSigmak );

    AUD_Double *pAlpha = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pAlpha );

    AUD_Int32s *pLinToBark = (AUD_Int32s*)calloc( nSpecLen * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pLinToBark );

    AUD_Int16s *pxOld   = (AUD_Int16s*)calloc( frameOverlap * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pxOld );
    AUD_Int16s *pxClean = (AUD_Int16s*)calloc( nFFT * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pxClean );

    /*
    AUD_Int32s critBandEnds[22] = { 0, 100, 200, 300, 400, 510, 630, 770, 920, 1080, 1270,
                                    1480, 1720, 2000, 2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700 };
    */
    AUD_Int32s critFFTEnds[CRITICAL_BAND_NUM + 1]  = { 0, 4, 7, 10, 13, 17, 21, 25, 30, 35, 41, 48, 56, 64,
                                                       75, 87, 101, 119, 141, 170, 205, 247, 257 };

    // generate linear->bark transform mapping
    k = 0;
    for ( i = 0; i < CRITICAL_BAND_NUM; i++ )
    {
        while ( k >= critFFTEnds[i] &&  k < critFFTEnds[i + 1] )
        {
            pLinToBark[k] = i;
            k++;
        }
    }

    AUD_Double absThr[CRITICAL_BAND_NUM] = { 38, 31, 22, 18.5, 15.5, 13, 11, 9.5, 8.75, 7.25, 4.75, 2.75,
                                             1.5, 0.5, 0, 0, 0, 0, 2, 7, 12, 15.5 };

    AUD_Double dbOffset[CRITICAL_BAND_NUM];
    AUD_Double sumn[CRITICAL_BAND_NUM];
    AUD_Double spread[CRITICAL_BAND_NUM];

    for ( i = 0; i < CRITICAL_BAND_NUM; i++ )
    {
        absThr[i]   = pow( 10., absThr[i] / 10. ) / nFFT / ( 65535. * 65535. );
        dbOffset[i] = 10. + i;

        sumn[i]   = 0.474 + i;
        spread[i] = pow( 10., ( 15.81 + 7.5 * sumn[i] - 17.5 * sqrt( 1. + sumn[i] * sumn[i] ) ) / 10. );
    }

    AUD_Double dcGain[CRITICAL_BAND_NUM];
    for ( i = 0; i < CRITICAL_BAND_NUM; i++ )
    {
        dcGain[i] = 0.;
        for ( j = 0; j < CRITICAL_BAND_NUM; j++ )
        {
            dcGain[i] += spread[MABS( i - j )];
        }
    }

    AUD_Matrix exPatMatrix;
    exPatMatrix.rows     = CRITICAL_BAND_NUM;
    exPatMatrix.cols     = nSpecLen;
    exPatMatrix.dataType = AUD_DATATYPE_DOUBLE;
    ret = createMatrix( &exPatMatrix );
    AUD_ASSERT( ret == 0 );

    // excitation pattern
    AUD_Int32s index = 0;
    for ( i = 0; i < exPatMatrix.rows; i++ )
    {
        AUD_Double *pExpatRow = exPatMatrix.pDouble + i * exPatMatrix.cols;

        for ( j = 0; j < exPatMatrix.cols; j++ )
        {
            index = MABS( i - pLinToBark[j] );
            pExpatRow[j] = spread[index];
        }
    }

    AUD_Int32s frameNum = (inLen - frameSize) / frameStride + 1;
    AUD_ASSERT( frameNum > nNoiseFrame );

    // compute noise mean
    for ( i = 0; i < nNoiseFrame; i++ )
    {
        win16s_calc( hWin, pInBuf + i * frameSize, pFrame );

        fft_mag( hFft, pFrame, frameSize, pFFTMag );

        for ( j = 0; j < nSpecLen; j++ )
        {
            pNoiseEn[j] += pFFTMag[j] / 32768. * pFFTMag[j] / 32768.;
        }
    }
    for ( j = 0; j < nSpecLen; j++ )
    {
        pNoiseEn[j] /= nNoiseFrame;
    }

    // get cirtical band mean filtered noise power
    AUD_Int32s k1 = 0, k2 = 0;
    for ( i = 0; i < CRITICAL_BAND_NUM; i++ )
    {
        k1 = k2;
        AUD_Double segSum = 0.;
        while ( k2 >= critFFTEnds[i] &&  k2 < critFFTEnds[i + 1] )
        {
            segSum += pNoiseEn[k2];
            k2++;
        }
        segSum /= ( k2 - k1 );

        for ( m = k1; m < k2; m++ )
        {
            pNoiseB[m] = segSum;
        }
    }

#if 0
    AUDLOG( "noise band spectrum:\n" );
    for ( j = 0; j < nSpecLen; j++ )
    {
        AUDLOG( "%.2f, ", pNoiseB[j] );
    }
    AUDLOG( "\n" );
#endif

    AUD_Matrix frameMatrix;
    frameMatrix.rows     = nSpecLen;
    frameMatrix.cols     = 1;
    frameMatrix.dataType = AUD_DATATYPE_DOUBLE;
    ret = createMatrix( &frameMatrix );
    AUD_ASSERT( ret == 0 );
    AUD_Double *pFrameEn = frameMatrix.pDouble;

    AUD_Matrix xMatrix;
    xMatrix.rows     = nSpecLen;
    xMatrix.cols     = 1;
    xMatrix.dataType = AUD_DATATYPE_DOUBLE;
    ret = createMatrix( &xMatrix );
    AUD_ASSERT( ret == 0 );
    AUD_Double *pX = xMatrix.pDouble;

    AUD_Matrix cMatrix;
    cMatrix.rows     = CRITICAL_BAND_NUM;
    cMatrix.cols     = 1;
    cMatrix.dataType = AUD_DATATYPE_DOUBLE;
    ret = createMatrix( &cMatrix );
    AUD_ASSERT( ret == 0 );
    AUD_Double *pC = cMatrix.pDouble;

    AUD_Matrix tMatrix;
    tMatrix.rows     = 1;
    tMatrix.cols     = CRITICAL_BAND_NUM;
    tMatrix.dataType = AUD_DATATYPE_DOUBLE;
    ret = createMatrix( &tMatrix );
    AUD_ASSERT( ret == 0 );
    AUD_Double *pT = tMatrix.pDouble;

    AUD_Matrix tkMatrix;
    tkMatrix.rows     = 1;
    tkMatrix.cols     = nSpecLen;
    tkMatrix.dataType = AUD_DATATYPE_DOUBLE;
    ret = createMatrix( &tkMatrix );
    AUD_ASSERT( ret == 0 );
    AUD_Double *pTk = tkMatrix.pDouble;

    AUD_Double dB0[CRITICAL_BAND_NUM];
    AUD_Double epsilon = pow( 2, -52 );

    #define ESTIMATE_MASKTHRESH( sigMatrix, tkMatrix )\
    do {\
        AUD_Double *pSig = sigMatrix.pDouble; \
        for ( m = 0; m < exPatMatrix.rows; m++ ) \
        { \
            AUD_Double suma = 0.; \
            AUD_Double *pExpatRow = exPatMatrix.pDouble + m * exPatMatrix.cols; \
            for ( n = 0; n < exPatMatrix.cols; n++ ) \
            { \
                suma += pExpatRow[n] * pSig[n]; \
            } \
            pC[m] = suma; \
        } \
        AUD_Double product = 1.; \
        AUD_Double sum     = 0.; \
        for ( m = 0; m < sigMatrix.rows; m++ ) \
        { \
            product *= pSig[m]; \
            sum     += pSig[m]; \
        } \
        AUD_Double power = 1. / sigMatrix.rows;\
        AUD_Double sfmDB = 10. * log10( pow( product, power ) / sum / sigMatrix.rows + epsilon ); \
        AUD_Double alpha = AUD_MIN( 1., sfmDB / (-60.) ); \
        for ( m = 0; m < tMatrix.cols; m++ ) \
        { \
            dB0[m] = dbOffset[m] * alpha + 5.5; \
            pT[m]  = pC[m] / pow( 10., dB0[m] / 10. ) / dcGain[m]; \
            pT[m]  = AUD_MAX( pT[m], absThr[m] ); \
        } \
        for ( m = 0; m < tkMatrix.cols; m++ ) \
        { \
            pTk[m] = pT[pLinToBark[m]]; \
        } \
       } while ( 0 )

    AUD_Double aa    = 0.98;
    AUD_Double mu    = 0.98;
    AUD_Double eta   = 0.15;
    AUD_Double vadDecision;

    k = 0;
    // start processing
    for ( i = 0; i < frameNum; i++ )
    {
        win16s_calc( hWin, pInBuf + i * frameStride, pFrame );

        fft_calc( hFft, pFrame, frameSize, pFFTRe, pFFTIm );

        // compute SNR
        vadDecision = 0.;
        for ( j = 0; j < nSpecLen; j++ )
        {
            pFrameEn[j] = pFFTRe[j] / 32768. * pFFTRe[j] / 32768. + pFFTIm[j] / 32768. * pFFTIm[j] / 32768.;

            pGammak[j]  = AUD_MIN( pFrameEn[j] / pNoiseEn[j], 40. );

            if ( i > 0 )
            {
                pKsi[j] = aa * pXPrev[j] / pNoiseEn[j] + ( 1 - aa ) * AUD_MAX( pGammak[j] - 1., 0. );
            }
            else
            {
                pKsi[j] = aa + ( 1. - aa ) * AUD_MAX( pGammak[j] - 1., 0. );
            }

            pLogSigmak[j] = pGammak[j] * pKsi[j] / ( 1. + pKsi[j] ) - log( 1. + pKsi[j] );

            vadDecision += ( j > 0 ? 2 : 1 ) * pLogSigmak[j];
        }
        vadDecision /= nFFT;

#if 0
        AUDLOG( "X prev:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%.2f, ", pXPrev[j] );
        }
        AUDLOG( "\n" );
#endif

#if 0
        AUDLOG( "gamma k:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%.2f, ", pGammak[j] );
        }
        AUDLOG( "\n" );
#endif

#if 0
        AUDLOG( "ksi:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%.2f, ", pKsi[j] );
        }
        AUDLOG( "\n" );
#endif

#if 0
        AUDLOG( "log sigma k:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%.2f, ", pLogSigmak[j] );
        }
        AUDLOG( "\n" );
#endif

        // AUDLOG( "vadDecision: %.2f\n", vadDecision );
        // re-estimate noise
        if ( vadDecision < eta )
        {
            for ( j = 0; j < nSpecLen; j++ )
            {
                pNoiseEn[j] = mu * pNoiseEn[j] + ( 1. - mu ) * pFrameEn[j];
            }

            // re-estimate crital band based noise
            AUD_Int32s k1 = 0, k2 = 0;
            for ( int band = 0; band < CRITICAL_BAND_NUM; band++ )
            {
                k1 = k2;
                AUD_Double segSum = 0.;
                while ( k2 >= critFFTEnds[band] &&  k2 < critFFTEnds[band + 1] )
                {
                    segSum += pNoiseEn[k2];
                    k2++;
                }
                segSum /= ( k2 - k1 );

                for ( m = k1; m < k2; m++ )
                {
                    pNoiseB[m] = segSum;
                }
            }

#if 0
            AUDLOG( "noise band spectrum:\n" );
            for ( j = 0; j < nSpecLen; j++ )
            {
                AUDLOG( "%.2f, ", pNoiseB[j] );
            }
            AUDLOG( "\n" );
#endif
        }

        for ( j = 0; j < nSpecLen; j++ )
        {
            pX[j]     = AUD_MAX( pFrameEn[j] - pNoiseEn[j], 0.001 );
            pXPrev[j] = pFrameEn[j];
        }

        ESTIMATE_MASKTHRESH( xMatrix, tkMatrix );
        for ( int iter = 0; iter < 2; iter++ )
        {
            for ( j = 0; j < nSpecLen; j++ )
            {
                pAb[j]      = pNoiseB[j] + pNoiseB[j] * pNoiseB[j] / pTk[j];
                pFrameEn[j] = pFrameEn[j] * pFrameEn[j] / ( pFrameEn[j] + pAb[j] );
                ESTIMATE_MASKTHRESH( frameMatrix, tkMatrix );

#if 0
                showMatrix( &tMatrix );
#endif
            }
        }

#if 0
        AUDLOG( "tk:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%.2f, ", pTk[j] );
        }
        AUDLOG( "\n" );
#endif

        pAlpha[0]      = ( pNoiseB[0] + pTk[0] ) * ( pNoiseB[0] / pTk[0] );
        pH[0]          = pFrameEn[0] / ( pFrameEn[0] + pAlpha[0] );
        pXPrev[0]     *= pH[0] * pH[0];
        pFFTCleanRe[0] = 0;
        pFFTCleanIm[0] = 0;
        for ( j = 1; j < nSpecLen; j++ )
        {
            pAlpha[j] = ( pNoiseB[j] + pTk[j] ) * ( pNoiseB[j] / pTk[j] );

            pH[j]     = pFrameEn[j] / ( pFrameEn[j] + pAlpha[j] );

            pFFTCleanRe[j] = pFFTCleanRe[nFFT - j] = (AUD_Int32s)round( pH[j] * pFFTRe[j] );
            pFFTCleanIm[j]        = (AUD_Int32s)round( pH[j] * pFFTIm[j] );
            pFFTCleanIm[nFFT - j] = -pFFTCleanIm[j];

            pXPrev[j] *= pH[j] * pH[j];
        }

#if 0
        AUDLOG( "denoise transfer function:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%.2f, ", pH[j] );
        }
        AUDLOG( "\n" );
#endif

#if 0
        AUDLOG( "clean FFT with phase:\n" );
        for ( j = 0; j < nFFT; j++ )
        {
            AUDLOG( "%d + j%d, ", pFFTCleanRe[j], pFFTCleanIm[j] );
        }
        AUDLOG( "\n" );
#endif

        ifft_real( hIfft, pFFTCleanRe, pFFTCleanIm, nFFT, pxClean );

#if 0
        AUDLOG( "clean speech:\n" );
        for ( j = 0; j < nFFT; j++ )
        {
            AUDLOG( "%d, ", pxClean[j] );
        }
        AUDLOG( "\n" );
#endif

        for ( j = 0; j < frameStride; j++ )
        {
            if ( j < frameOverlap )
            {
                pOutBuf[k + j] = pxOld[j] + pxClean[j];
                pxOld[j] = pxClean[frameStride + j];
            }
            else
            {
                pOutBuf[k + j] = pxClean[j];
            }
        }

        k        += frameStride;
        cleanLen += frameStride;
    }

    // de-emphasis
    // sig_deemphasis( pOutBuf, pOutBuf, cleanLen );

    win16s_free( &hWin );
    fft_free( &hFft );
    ifft_free( &hIfft );

    free( pFrame );
    free( pNoiseEn );
    free( pNoiseB );

    free( pFFTMag );
    free( pFFTRe );
    free( pFFTIm );
    free( pXPrev );
    free( pAb );
    free( pH );

    free( pFFTCleanRe );
    free( pFFTCleanIm );

    free( pxOld );
    free( pxClean );

    free( pGammak );
    free( pKsi );
    free( pLogSigmak );
    free( pAlpha );
    free( pLinToBark );

    ret = createMatrix( &xMatrix );
    AUD_ASSERT( ret == 0 );

    ret = destroyMatrix( &exPatMatrix );
    AUD_ASSERT( ret == 0 );

    ret = destroyMatrix( &frameMatrix );
    AUD_ASSERT( ret == 0 );

    ret = destroyMatrix( &cMatrix );
    AUD_ASSERT( ret == 0 );

    ret = destroyMatrix( &tMatrix );
    AUD_ASSERT( ret == 0 );

    ret = destroyMatrix( &tkMatrix );
    AUD_ASSERT( ret == 0 );

    return cleanLen;
}
예제 #12
0
AUD_Int32s denoise_mmse( AUD_Int16s *pInBuf, AUD_Int16s *pOutBuf, AUD_Int32s inLen )
{
    Fft_16s        *hFft        = NULL;
    Ifft_16s       *hIfft       = NULL;
    AUD_Window16s  *hWin        = NULL;

    AUD_Int32s     frameSize    = 512;
    AUD_Int32s     frameStride  = 256;
    AUD_Int32s     frameOverlap = 256;
    AUD_Int32s     nFFT         = frameSize;
    AUD_Int32s     nSpecLen     = nFFT / 2 + 1;
    AUD_Int32s     nNoiseFrame  = (AUD_Int32s)( ( 0.25 * SAMPLE_RATE - frameSize ) / frameStride + 1 );;

    AUD_Int32s     i, j, k;

    AUD_Int32s     cleanLen = 0;

    // pre-emphasis
    // sig_preemphasis( pInBuf, pInBuf, inLen );

    // init hamming module
    win16s_init( &hWin, AUD_WIN_HAMM, frameSize, 14 );
    AUD_ASSERT( hWin );

    // init fft handle
    fft_init( &hFft, nFFT, 15 );
    AUD_ASSERT( hFft );

    // init ifft handle
    ifft_init( &hIfft, nFFT, 15 );
    AUD_ASSERT( hIfft );

    AUD_Int16s *pFrame  = (AUD_Int16s*)calloc( frameSize * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pFrame );

    // FFT
    AUD_Int32s *pFFTMag  = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTMag );
    AUD_Int32s *pFFTRe   = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTRe );
    AUD_Int32s *pFFTIm   = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTIm );

    AUD_Int32s *pFFTCleanRe = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTCleanRe );
    AUD_Int32s *pFFTCleanIm = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTCleanIm );

    AUD_Double *pNoiseMean = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pNoiseMean );
    AUD_Int32s *pFFTCleanMag = (AUD_Int32s*)calloc( nSpecLen * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTCleanMag );

    AUD_Int16s *pxOld   = (AUD_Int16s*)calloc( frameSize * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pxOld );
    AUD_Int16s *pxClean = (AUD_Int16s*)calloc( frameSize * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pxClean );


    for ( i = 0; (i < nNoiseFrame) && ( (i * frameStride + frameSize) < inLen ); i++ )
    {
        win16s_calc( hWin, pInBuf + i * frameSize, pFrame );

        fft_mag( hFft, pFrame, frameSize, pFFTMag );

        for ( j = 0; j < nSpecLen; j++ )
        {
            pNoiseMean[j] += (AUD_Double)pFFTMag[j];
        }
    }
    // compute noise mean
    for ( j = 0; j < nSpecLen; j++ )
    {
        pNoiseMean[j] /= nNoiseFrame;
    }

    AUD_Double thres = 3.;
    AUD_Double alpha = 2.;
    AUD_Double flr   = 0.002;
    AUD_Double G     = 0.9;
    AUD_Double snr, beta;
    AUD_Double tmp;
    AUD_Double sigEnergy, noiseEnergy;

    k = 0;
    // start processing
    for ( i = 0; (i * frameStride + frameSize) < inLen; i++ )
    {
        win16s_calc( hWin, pInBuf + i * frameStride, pFrame );

        fft_calc( hFft, pFrame, frameSize, pFFTRe, pFFTIm );

        // compute SNR
        sigEnergy   = 0.;
        noiseEnergy = 0.;
        for ( j = 0; j < nSpecLen; j++ )
        {
            tmp          = pFFTRe[j] / 32768. * pFFTRe[j] / 32768. + pFFTIm[j] / 32768. * pFFTIm[j] / 32768.;
            sigEnergy   += tmp;
            noiseEnergy += pNoiseMean[j] / 32768. * pNoiseMean[j] / 32768.;

            tmp = sqrt( tmp ) * 32768.;
            if ( tmp > MAX_INT32S )
            {
                AUD_ECHO
                pFFTMag[j] = MAX_INT32S;
            }
            else
            {
                pFFTMag[j] = (AUD_Int32s)round( tmp );
            }
        }

        snr  = 10. * log10( sigEnergy / noiseEnergy );
        beta = berouti( snr );

        // AUDLOG( "signal energy: %.2f, noise energy: %.2f, snr: %.2f, beta: %.2f\n", sigEnergy, noiseEnergy, snr, beta );

#if 0
        AUDLOG( "noisy FFT:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%d, ", pFFTMag[j] );
        }
        AUDLOG( "\n" );
#endif

        tmp = 0.;
        for ( j = 0; j < nSpecLen; j++ )
        {
            tmp  = AUD_MAX( pow( pFFTMag[j], alpha ) - beta * pow( pNoiseMean[j], alpha ), flr * pow( pNoiseMean[j], alpha ) );

            pFFTCleanMag[j] = (AUD_Int32s)round( pow( tmp, 1. / alpha ) );
        }

        // re-estimate noise
        if ( snr < thres )
        {
            AUD_Double tmpNoise;
            for ( j = 0; j < nSpecLen; j++ )
            {
                tmpNoise = G * pow( pNoiseMean[j], alpha ) + ( 1 - G ) * pow( pFFTMag[j], alpha );
                pNoiseMean[j] = pow( tmpNoise, 1. / alpha );
            }
        }

#if 0
        AUDLOG( "clean FFT:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%d, ", pFFTCleanMag[j] );
        }
        AUDLOG( "\n" );
#endif

        pFFTCleanRe[0] = pFFTCleanMag[0];
        pFFTCleanIm[0] = 0;
        AUD_Double costheta, sintheta;
        for ( j = 1; j < nSpecLen; j++ )
        {
            if ( pFFTMag[j] != 0 )
            {
                costheta = (AUD_Double)pFFTRe[j] / (AUD_Double)pFFTMag[j];
                sintheta = (AUD_Double)pFFTIm[j] / (AUD_Double)pFFTMag[j];
                pFFTCleanRe[nFFT - j] = pFFTCleanRe[j] = (AUD_Int32s)round( costheta * pFFTCleanMag[j] );
                pFFTCleanIm[j]        = (AUD_Int32s)round( sintheta * pFFTCleanMag[j] );
                pFFTCleanIm[nFFT - j] = -pFFTCleanIm[j];
            }
            else
            {
                pFFTCleanRe[nFFT - j] = pFFTCleanRe[j] = pFFTCleanMag[j];
                pFFTCleanIm[nFFT - j] = pFFTCleanIm[j] = 0;
            }
        }

#if 0
        AUDLOG( "clean FFT with phase:\n" );
        for ( j = 0; j < nFFT; j++ )
        {
            AUDLOG( "%d + j%d, ", pFFTCleanRe[j], pFFTCleanIm[j] );
        }
        AUDLOG( "\n" );
#endif

        ifft_real( hIfft, pFFTCleanRe, pFFTCleanIm, nFFT, pxClean );

#if 0
        AUDLOG( "clean speech:\n" );
        for ( j = 0; j < nFFT; j++ )
        {
            AUDLOG( "%d, ", pxClean[j] );
        }
        AUDLOG( "\n" );
#endif

        for ( j = 0; j < frameStride; j++ )
        {
            pOutBuf[k + j] = pxOld[j] + pxClean[j];
            pxOld[j]       = pxClean[frameOverlap + j];
        }

        k        += frameStride;
        cleanLen += frameStride;
    }

    // de-emphasis
    // sig_deemphasis( pOutBuf, pOutBuf, cleanLen );

    win16s_free( &hWin );
    fft_free( &hFft );
    ifft_free( &hIfft );

    free( pFrame );
    free( pNoiseMean );

    free( pFFTMag );
    free( pFFTRe );
    free( pFFTIm );

    free( pFFTCleanMag );
    free( pFFTCleanRe );
    free( pFFTCleanIm );

    free( pxOld );
    free( pxClean );

    return cleanLen;
}
예제 #13
0
AUD_Int32s denoise_wiener( AUD_Int16s *pInBuf, AUD_Int16s *pOutBuf, AUD_Int32s inLen )
{
    Fft_16s        *hFft        = NULL;
    Ifft_16s       *hIfft       = NULL;
    AUD_Window16s  *hWin        = NULL;
    _VAD_Nest      vadState;

    AUD_Int32s     frameSize    = 512;
    AUD_Int32s     frameStride  = 256;
    AUD_Int32s     nFFT         = frameSize;
    AUD_Int32s     nSpecLen     = nFFT / 2 + 1;
    AUD_Int32s     nNoiseFrame  = (AUD_Int32s)( ( 0.25 * SAMPLE_RATE - frameSize ) / frameStride + 1 );

    AUD_Int32s     i, j, k;

    AUD_Int32s     cleanLen = 0;

    vadState.meanEn        = 280;
    vadState.nbSpeechFrame = 0;
    vadState.hangOver      = 0;

    AUD_Int16s *pFrame  = (AUD_Int16s*)calloc( frameSize * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pFrame );

    AUD_Double *pNoiseMean = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pNoiseMean );

    AUD_Double *pLamdaD = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pLamdaD );

    AUD_Double *pXi = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pXi );

    AUD_Double *pGamma = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pGamma );

    AUD_Double *pG = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pG );

    AUD_Double *pGammaNew = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pGammaNew );

    for ( j = 0; j < nSpecLen; j++ )
    {
        pG[j]         = 1.;
        pGamma[j]     = 1.;
    }

    // FFT
    AUD_Int32s *pFFTMag  = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTMag );
    AUD_Int32s *pFFTRe   = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTRe );
    AUD_Int32s *pFFTIm   = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTIm );

    AUD_Double *pFFTCleanMag = (AUD_Double*)calloc( nFFT * sizeof(AUD_Double), 1 );
    AUD_ASSERT( pFFTCleanMag );

    AUD_Int32s *pFFTCleanRe = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTCleanRe );
    AUD_Int32s *pFFTCleanIm = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 );
    AUD_ASSERT( pFFTCleanIm );

    AUD_Int16s *pxClean = (AUD_Int16s*)calloc( nFFT * sizeof(AUD_Int16s), 1 );
    AUD_ASSERT( pxClean );

    memset( pOutBuf, 0, inLen * sizeof(AUD_Int16s) );

    // init hamming module
    win16s_init( &hWin, AUD_WIN_HAMM, frameSize, 14 );
    AUD_ASSERT( hWin );

    // init fft handle
    fft_init( &hFft, nFFT, 15 );
    AUD_ASSERT( hFft );

    // init ifft handle
    ifft_init( &hIfft, nFFT, 15 );
    AUD_ASSERT( hIfft );

    // noise manipulation
    for ( i = 0; (i < nNoiseFrame) && ( (i * frameStride + frameSize) < inLen ); i++ )
    {
        win16s_calc( hWin, pInBuf + i * frameStride, pFrame );

        fft_mag( hFft, pFrame, frameSize, pFFTMag );

        for ( j = 0; j < nSpecLen; j++ )
        {
            pNoiseMean[j] += (AUD_Double)pFFTMag[j];
            pLamdaD[j]    += (AUD_Double)pFFTMag[j] * (AUD_Double)pFFTMag[j];
        }
    }
    // compute noise mean
    for ( j = 0; j < nSpecLen; j++ )
    {
        pNoiseMean[j] /= nNoiseFrame;
        pLamdaD[j]    /= nNoiseFrame;
    }

    AUD_Int32s vadFlag  = 0;
    AUD_Double noiseLen = 9.;
    AUD_Double alpha    = 0.99;
    k = 0;
    for ( i = 0; (i * frameStride + frameSize) < inLen; i++ )
    {
        win16s_calc( hWin, pInBuf + i * frameStride, pFrame );

        fft_calc( hFft, pFrame, frameSize, pFFTRe, pFFTIm );

        for ( j = 0; j < nSpecLen; j++ )
        {
            pFFTMag[j] = (AUD_Int32s)round( sqrt( (AUD_Double)pFFTRe[j] * pFFTRe[j] + (AUD_Double)pFFTIm[j] * pFFTIm[j] ) );
        }

#if 0
        AUDLOG( "noisy FFT:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%d, ", pFFTMag[j] );
        }
        AUDLOG( "\n" );
#endif

        vadFlag = vad_nest( &vadState, pFrame, frameSize );
        if ( vadFlag == 0 )
        {
            for ( j = 0; j < nSpecLen; j++ )
            {
                pNoiseMean[j] = ( noiseLen * pNoiseMean[j] + (AUD_Double)pFFTMag[j] ) / ( noiseLen + 1. );
                pLamdaD[j]    = ( noiseLen * pLamdaD[j] + (AUD_Double)pFFTMag[j] * pFFTMag[j] ) / ( noiseLen + 1. );
            }
        }

        for ( j = 0; j < nSpecLen; j++ )
        {
            pGammaNew[j] = (AUD_Double)pFFTMag[j] * pFFTMag[j] / pLamdaD[j];
            pXi[j]       = alpha * pG[j] * pG[j] * pGamma[j] + ( 1. - alpha ) * AUD_MAX( pGammaNew[j] - 1., 0. );
            pGamma[j]    = pGammaNew[j];
            pG[j]        = pXi[j]  / ( pXi[j] + 1. );

            pFFTCleanMag[j] = pG[j] * pFFTMag[j];
        }

#if 0
        AUDLOG( "clean FFT:\n" );
        for ( j = 0; j < nSpecLen; j++ )
        {
            AUDLOG( "%.2f, ", pFFTCleanMag[j] );
        }
        AUDLOG( "\n" );
#endif

        // combine to real/im part of IFFT
        pFFTCleanRe[0] = pFFTCleanMag[0];
        pFFTCleanIm[0] = 0;
        AUD_Double costheta, sintheta;
        for ( j = 1; j < nSpecLen; j++ )
        {
            if ( pFFTMag[j] != 0 )
            {
                costheta = (AUD_Double)pFFTRe[j] / (AUD_Double)pFFTMag[j];
                sintheta = (AUD_Double)pFFTIm[j] / (AUD_Double)pFFTMag[j];
                pFFTCleanRe[nFFT - j] = pFFTCleanRe[j] = (AUD_Int32s)round( costheta * pFFTCleanMag[j] );
                pFFTCleanIm[j] = (AUD_Int32s)round( sintheta * pFFTCleanMag[j] );
                pFFTCleanIm[nFFT - j] = -pFFTCleanIm[j];
            }
            else
            {
                pFFTCleanRe[nFFT - j] = pFFTCleanRe[j] = pFFTCleanMag[j];
                pFFTCleanIm[nFFT - j] = pFFTCleanIm[j] = 0;
            }
        }

        ifft_real( hIfft, pFFTCleanRe, pFFTCleanIm, nFFT, pxClean );

#if 0
        AUDLOG( "clean FFT with phase:\n" );
        for ( j = 0; j < nFFT; j++ )
        {
            AUDLOG( "%d + j%d, ", pFFTCleanRe[j], pFFTCleanIm[j] );
        }
        AUDLOG( "\n" );
#endif

        for ( j = 0; j < frameSize; j++ )
        {
            pOutBuf[k + j] = pOutBuf[k + j] + pxClean[j];
        }

        k        += frameStride;
        cleanLen += frameStride;
    }

    win16s_free( &hWin );
    fft_free( &hFft );
    ifft_free( &hIfft );

    free( pFrame );
    free( pNoiseMean );
    free( pLamdaD );
    free( pXi );
    free( pGamma );
    free( pG );
    free( pGammaNew );

    free( pFFTMag );
    free( pFFTRe );
    free( pFFTIm );

    free( pFFTCleanMag );
    free( pFFTCleanRe );
    free( pFFTCleanIm );

    free( pxClean );

    return cleanLen;
}