void AUD_FFMPEGReader::read(int& length, bool& eos, sample_t* buffer) { // read packages and decode them AVPacket packet; int data_size = 0; int pkgbuf_pos; int left = length; int sample_size = AUD_DEVICE_SAMPLE_SIZE(m_specs); sample_t* buf = buffer; pkgbuf_pos = m_pkgbuf_left; m_pkgbuf_left = 0; // there may still be data in the buffer from the last call if(pkgbuf_pos > 0) { data_size = AUD_MIN(pkgbuf_pos, left * sample_size); m_convert((data_t*) buf, (data_t*) m_pkgbuf.getBuffer(), data_size / AUD_FORMAT_SIZE(m_specs.format)); buf += data_size / AUD_FORMAT_SIZE(m_specs.format); left -= data_size/sample_size; } // for each frame read as long as there isn't enough data already while((left > 0) && (av_read_frame(m_formatCtx, &packet) >= 0)) { // is it a frame from the audio stream? if(packet.stream_index == m_stream) { // decode the package pkgbuf_pos = decode(packet, m_pkgbuf); // copy to output buffer data_size = AUD_MIN(pkgbuf_pos, left * sample_size); m_convert((data_t*) buf, (data_t*) m_pkgbuf.getBuffer(), data_size / AUD_FORMAT_SIZE(m_specs.format)); buf += data_size / AUD_FORMAT_SIZE(m_specs.format); left -= data_size/sample_size; } av_free_packet(&packet); } // read more data than necessary? if(pkgbuf_pos > data_size) { m_pkgbuf_left = pkgbuf_pos-data_size; memmove(m_pkgbuf.getBuffer(), ((data_t*)m_pkgbuf.getBuffer())+data_size, pkgbuf_pos-data_size); } if((eos = (left > 0))) length -= left; m_position += length; }
int AUD_SuperposeReader::getLength() const { int len1 = m_reader1->getLength(); int len2 = m_reader2->getLength(); if((len1 < 0) || (len2 < 0)) return -1; return AUD_MIN(len1, len2); }
void AUD_Mixer::mix(sample_t* buffer, int start, int length, float volume) { sample_t* out = m_buffer.getBuffer(); length = (AUD_MIN(m_length, length + start) - start) * m_specs.channels; start *= m_specs.channels; for(int i = 0; i < length; i++) out[i + start] += buffer[i] * volume; }
int AUD_FFMPEGReader::read_packet(void* opaque, uint8_t* buf, int buf_size) { AUD_FFMPEGReader* reader = reinterpret_cast<AUD_FFMPEGReader*>(opaque); int size = AUD_MIN(buf_size, reader->m_membuffer->getSize() - reader->m_membufferpos); if(size < 0) return -1; memcpy(buf, ((data_t*)reader->m_membuffer->getBuffer()) + reader->m_membufferpos, size); reader->m_membufferpos += size; return size; }
void AUD_Buffer::resize(int size, bool keep) { if(keep) { data_t* buffer = (data_t*) malloc(size + 16); memcpy(AUD_ALIGN(buffer), AUD_ALIGN(m_buffer), AUD_MIN(size, m_size)); free(m_buffer); m_buffer = buffer; } else m_buffer = (data_t*) realloc(m_buffer, size + 16); m_size = size; }
AUD_Int32s sortVector( AUD_Vector *pVector, AUD_Vector *pSortedIdx ) { AUD_ASSERT( pVector && pSortedIdx ); AUD_ASSERT( pSortedIdx->len <= pVector->len && pSortedIdx->len > 0 ); AUD_ASSERT( pVector->dataType == AUD_DATATYPE_DOUBLE ); AUD_ASSERT( pSortedIdx->dataType == AUD_DATATYPE_INT32S ); AUD_ASSERT( pVector->dataType == AUD_DATATYPE_DOUBLE ); AUD_Int32s i, j, k; pSortedIdx->pInt32s[0] = 0; for ( i = 1; i < pVector->len; i++ ) { for ( j = 0; j < AUD_MIN( pSortedIdx->len, i ); j++ ) { if ( pVector->pDouble[i] > pVector->pDouble[pSortedIdx->pInt32s[j]] ) { for ( k = pSortedIdx->len - 2; k >= j; k-- ) { pSortedIdx->pInt32s[k + 1] = pSortedIdx->pInt32s[k]; } pSortedIdx->pInt32s[j] = i; break; } } if ( j == i && j < pSortedIdx->len ) { pSortedIdx->pInt32s[j] = i; } } #if 0 AUDLOG( "\n" ); for ( i = 0; i < pSortedIdx->len; i++ ) { AUDLOG( "%f, ", pVector->pDouble[pSortedIdx->pInt32s[i]] ); } AUDLOG( "\n" ); #endif return 0; }
void AUD_FFMPEGWriter::write(unsigned int length, sample_t* buffer) { unsigned int samplesize = AUD_SAMPLE_SIZE(m_specs); if(m_input_size) { sample_t* inbuf = m_input_buffer.getBuffer(); while(length) { unsigned int len = AUD_MIN(m_input_size - m_input_samples, length); memcpy(inbuf + m_input_samples * m_specs.channels, buffer, len * samplesize); buffer += len * m_specs.channels; m_input_samples += len; m_position += len; length -= len; if(m_input_samples == m_input_size) { encode(inbuf); m_input_samples = 0; } } } else // PCM data, can write directly! { int samplesize = AUD_SAMPLE_SIZE(m_specs); if(m_output_buffer.getSize() != length * m_specs.channels * m_codecCtx->bits_per_coded_sample / 8) m_output_buffer.resize(length * m_specs.channels * m_codecCtx->bits_per_coded_sample / 8); m_input_buffer.assureSize(length * AUD_MAX(AUD_DEVICE_SAMPLE_SIZE(m_specs), samplesize)); sample_t* buf = m_input_buffer.getBuffer(); m_convert(reinterpret_cast<data_t*>(buf), reinterpret_cast<data_t*>(buffer), length * m_specs.channels); encode(buf); m_position += length; } }
int AUD_JackDevice::jack_mix(jack_nframes_t length, void *data) { AUD_JackDevice* device = (AUD_JackDevice*)data; unsigned int i; int count = device->m_specs.channels; char* buffer; if(device->m_sync) { // play silence while syncing for(unsigned int i = 0; i < count; i++) memset(jack_port_get_buffer(device->m_ports[i], length), 0, length * sizeof(float)); } else { size_t temp; size_t readsamples = jack_ringbuffer_read_space(device->m_ringbuffers[0]); for(i = 1; i < count; i++) if((temp = jack_ringbuffer_read_space(device->m_ringbuffers[i])) < readsamples) readsamples = temp; readsamples = AUD_MIN(readsamples / sizeof(float), length); for(unsigned int i = 0; i < count; i++) { buffer = (char*)jack_port_get_buffer(device->m_ports[i], length); jack_ringbuffer_read(device->m_ringbuffers[i], buffer, readsamples * sizeof(float)); if(readsamples < length) memset(buffer + readsamples * sizeof(float), 0, (length - readsamples) * sizeof(float)); } if(pthread_mutex_trylock(&(device->m_mixingLock)) == 0) { pthread_cond_signal(&(device->m_mixingCondition)); pthread_mutex_unlock(&(device->m_mixingLock)); } } return 0; }
void AUD_SoftwareDevice::AUD_SoftwareHandle::update() { int flags = 0; AUD_Vector3 SL; if(m_relative) SL = -m_location; else SL = m_device->m_location - m_location; float distance = SL * SL; if(distance > 0) distance = sqrt(distance); else flags |= AUD_RENDER_DOPPLER | AUD_RENDER_DISTANCE; if(m_pitch->getSpecs().channels != AUD_CHANNELS_MONO) { m_volume = m_user_volume; m_pitch->setPitch(m_user_pitch); return; } flags = ~(flags | m_flags | m_device->m_flags); // Doppler and Pitch if(flags & AUD_RENDER_DOPPLER) { float vls; if(m_relative) vls = 0; else vls = SL * m_device->m_velocity / distance; float vss = SL * m_velocity / distance; float max = m_device->m_speed_of_sound / m_device->m_doppler_factor; if(vss >= max) { m_pitch->setPitch(AUD_PITCH_MAX); } else { if(vls > max) vls = max; m_pitch->setPitch((m_device->m_speed_of_sound - m_device->m_doppler_factor * vls) / (m_device->m_speed_of_sound - m_device->m_doppler_factor * vss) * m_user_pitch); } } else m_pitch->setPitch(m_user_pitch); if(flags & AUD_RENDER_VOLUME) { // Distance if(flags & AUD_RENDER_DISTANCE) { if(m_device->m_distance_model == AUD_DISTANCE_MODEL_INVERSE_CLAMPED || m_device->m_distance_model == AUD_DISTANCE_MODEL_LINEAR_CLAMPED || m_device->m_distance_model == AUD_DISTANCE_MODEL_EXPONENT_CLAMPED) { distance = AUD_MAX(AUD_MIN(m_distance_max, distance), m_distance_reference); } switch(m_device->m_distance_model) { case AUD_DISTANCE_MODEL_INVERSE: case AUD_DISTANCE_MODEL_INVERSE_CLAMPED: m_volume = m_distance_reference / (m_distance_reference + m_attenuation * (distance - m_distance_reference)); break; case AUD_DISTANCE_MODEL_LINEAR: case AUD_DISTANCE_MODEL_LINEAR_CLAMPED: { float temp = m_distance_max - m_distance_reference; if(temp == 0) { if(distance > m_distance_reference) m_volume = 0.0f; else m_volume = 1.0f; } else m_volume = 1.0f - m_attenuation * (distance - m_distance_reference) / (m_distance_max - m_distance_reference); break; } case AUD_DISTANCE_MODEL_EXPONENT: case AUD_DISTANCE_MODEL_EXPONENT_CLAMPED: if(m_distance_reference == 0) m_volume = 0; else m_volume = pow(distance / m_distance_reference, -m_attenuation); break; default: m_volume = 1.0f; } } else m_volume = 1.0f; // Cone if(flags & AUD_RENDER_CONE) { AUD_Vector3 SZ = m_orientation.getLookAt(); float phi = acos(float(SZ * SL / (SZ.length() * SL.length()))); float t = (phi - m_cone_angle_inner)/(m_cone_angle_outer - m_cone_angle_inner); if(t > 0) { if(t > 1) m_volume *= m_cone_volume_outer; else m_volume *= 1 + t * (m_cone_volume_outer - 1); } } if(m_volume > m_volume_max) m_volume = m_volume_max; else if(m_volume < m_volume_min) m_volume = m_volume_min; // Volume m_volume *= m_user_volume; } // 3D Cue AUD_Quaternion orientation; if(!m_relative) orientation = m_device->m_orientation; AUD_Vector3 Z = orientation.getLookAt(); AUD_Vector3 N = orientation.getUp(); AUD_Vector3 A = N * ((SL * N) / (N * N)) - SL; float Asquare = A * A; if(Asquare > 0) { float phi = acos(float(Z * A / (Z.length() * sqrt(Asquare)))); if(N.cross(Z) * A > 0) phi = -phi; m_mapper->setMonoAngle(phi); } else m_mapper->setMonoAngle(m_relative ? m_user_pan * M_PI / 2.0 : 0); }
void AUD_SequencerReader::read(int& length, bool& eos, sample_t* buffer) { AUD_MutexLock lock(*m_sequence); if(m_sequence->m_status != m_status) { m_device.changeSpecs(m_sequence->m_specs); m_device.setSpeedOfSound(m_sequence->m_speed_of_sound); m_device.setDistanceModel(m_sequence->m_distance_model); m_device.setDopplerFactor(m_sequence->m_doppler_factor); m_status = m_sequence->m_status; } if(m_sequence->m_entry_status != m_entry_status) { std::list<boost::shared_ptr<AUD_SequencerHandle> > handles; AUD_HandleIterator hit = m_handles.begin(); AUD_EntryIterator eit = m_sequence->m_entries.begin(); int result; boost::shared_ptr<AUD_SequencerHandle> handle; while(hit != m_handles.end() && eit != m_sequence->m_entries.end()) { handle = *hit; boost::shared_ptr<AUD_SequencerEntry> entry = *eit; result = handle->compare(entry); if(result < 0) { try { handle = boost::shared_ptr<AUD_SequencerHandle>(new AUD_SequencerHandle(entry, m_device)); handles.push_back(handle); } catch(AUD_Exception&) { } eit++; } else if(result == 0) { handles.push_back(handle); hit++; eit++; } else { handle->stop(); hit++; } } while(hit != m_handles.end()) { (*hit)->stop(); hit++; } while(eit != m_sequence->m_entries.end()) { try { handle = boost::shared_ptr<AUD_SequencerHandle>(new AUD_SequencerHandle(*eit, m_device)); handles.push_back(handle); } catch(AUD_Exception&) { } eit++; } m_handles = handles; m_entry_status = m_sequence->m_entry_status; } AUD_Specs specs = m_sequence->m_specs; int pos = 0; float time = float(m_position) / float(specs.rate); float volume, frame; int len, cfra; AUD_Vector3 v, v2; AUD_Quaternion q; while(pos < length) { frame = time * m_sequence->m_fps; cfra = int(floor(frame)); len = int(ceil((cfra + 1) / m_sequence->m_fps * specs.rate)) - m_position; len = AUD_MIN(length - pos, len); len = AUD_MAX(len, 1); for(AUD_HandleIterator it = m_handles.begin(); it != m_handles.end(); it++) { (*it)->update(time, frame, m_sequence->m_fps); } m_sequence->m_volume.read(frame, &volume); if(m_sequence->m_muted) volume = 0.0f; m_device.setVolume(volume); m_sequence->m_orientation.read(frame, q.get()); m_device.setListenerOrientation(q); m_sequence->m_location.read(frame, v.get()); m_device.setListenerLocation(v); m_sequence->m_location.read(frame + 1, v2.get()); v2 -= v; m_device.setListenerVelocity(v2 * m_sequence->m_fps); m_device.read(reinterpret_cast<data_t*>(buffer + specs.channels * pos), len); pos += len; time += float(len) / float(specs.rate); } m_position += length; eos = false; }
AUD_Int32s denoise_aud( AUD_Int16s *pInBuf, AUD_Int16s *pOutBuf, AUD_Int32s inLen ) { Fft_16s *hFft = NULL; Ifft_16s *hIfft = NULL; AUD_Window16s *hWin = NULL; AUD_Int32s frameSize = 512; AUD_Int32s frameStride = 256; AUD_Int32s frameOverlap = 256; AUD_Int32s nFFT = frameSize; AUD_Int32s nSpecLen = nFFT / 2 + 1; AUD_Int32s nNoiseFrame = 6; // (AUD_Int32s)( ( 0.25 * SAMPLE_RATE - frameSize ) / frameStride + 1 ); AUD_Int32s i, j, k, m, n, ret; AUD_Int32s cleanLen = 0; // pre-emphasis // sig_preemphasis( pInBuf, pInBuf, inLen ); // init hamming module win16s_init( &hWin, AUD_WIN_HAMM, frameSize, 14 ); AUD_ASSERT( hWin ); // init fft handle fft_init( &hFft, nFFT, 15 ); AUD_ASSERT( hFft ); // init ifft handle ifft_init( &hIfft, nFFT, 15 ); AUD_ASSERT( hIfft ); AUD_Int16s *pFrame = (AUD_Int16s*)calloc( frameSize * sizeof(AUD_Int16s), 1 ); AUD_ASSERT( pFrame ); // FFT AUD_Int32s *pFFTMag = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 ); AUD_ASSERT( pFFTMag ); AUD_Int32s *pFFTRe = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 ); AUD_ASSERT( pFFTRe ); AUD_Int32s *pFFTIm = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 ); AUD_ASSERT( pFFTIm ); AUD_Int32s *pFFTCleanRe = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 ); AUD_ASSERT( pFFTCleanRe ); AUD_Int32s *pFFTCleanIm = (AUD_Int32s*)calloc( nFFT * sizeof(AUD_Int32s), 1 ); AUD_ASSERT( pFFTCleanIm ); // noise spectrum AUD_Double *pNoiseEn = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pNoiseEn ); AUD_Double *pNoiseB = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pNoiseB ); AUD_Double *pXPrev = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pXPrev ); AUD_Double *pAb = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pAb ); AUD_Double *pH = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pH ); AUD_Double *pGammak = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pGammak ); AUD_Double *pKsi = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pKsi ); AUD_Double *pLogSigmak = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pLogSigmak ); AUD_Double *pAlpha = (AUD_Double*)calloc( nSpecLen * sizeof(AUD_Double), 1 ); AUD_ASSERT( pAlpha ); AUD_Int32s *pLinToBark = (AUD_Int32s*)calloc( nSpecLen * sizeof(AUD_Int32s), 1 ); AUD_ASSERT( pLinToBark ); AUD_Int16s *pxOld = (AUD_Int16s*)calloc( frameOverlap * sizeof(AUD_Int16s), 1 ); AUD_ASSERT( pxOld ); AUD_Int16s *pxClean = (AUD_Int16s*)calloc( nFFT * sizeof(AUD_Int16s), 1 ); AUD_ASSERT( pxClean ); /* AUD_Int32s critBandEnds[22] = { 0, 100, 200, 300, 400, 510, 630, 770, 920, 1080, 1270, 1480, 1720, 2000, 2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700 }; */ AUD_Int32s critFFTEnds[CRITICAL_BAND_NUM + 1] = { 0, 4, 7, 10, 13, 17, 21, 25, 30, 35, 41, 48, 56, 64, 75, 87, 101, 119, 141, 170, 205, 247, 257 }; // generate linear->bark transform mapping k = 0; for ( i = 0; i < CRITICAL_BAND_NUM; i++ ) { while ( k >= critFFTEnds[i] && k < critFFTEnds[i + 1] ) { pLinToBark[k] = i; k++; } } AUD_Double absThr[CRITICAL_BAND_NUM] = { 38, 31, 22, 18.5, 15.5, 13, 11, 9.5, 8.75, 7.25, 4.75, 2.75, 1.5, 0.5, 0, 0, 0, 0, 2, 7, 12, 15.5 }; AUD_Double dbOffset[CRITICAL_BAND_NUM]; AUD_Double sumn[CRITICAL_BAND_NUM]; AUD_Double spread[CRITICAL_BAND_NUM]; for ( i = 0; i < CRITICAL_BAND_NUM; i++ ) { absThr[i] = pow( 10., absThr[i] / 10. ) / nFFT / ( 65535. * 65535. ); dbOffset[i] = 10. + i; sumn[i] = 0.474 + i; spread[i] = pow( 10., ( 15.81 + 7.5 * sumn[i] - 17.5 * sqrt( 1. + sumn[i] * sumn[i] ) ) / 10. ); } AUD_Double dcGain[CRITICAL_BAND_NUM]; for ( i = 0; i < CRITICAL_BAND_NUM; i++ ) { dcGain[i] = 0.; for ( j = 0; j < CRITICAL_BAND_NUM; j++ ) { dcGain[i] += spread[MABS( i - j )]; } } AUD_Matrix exPatMatrix; exPatMatrix.rows = CRITICAL_BAND_NUM; exPatMatrix.cols = nSpecLen; exPatMatrix.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &exPatMatrix ); AUD_ASSERT( ret == 0 ); // excitation pattern AUD_Int32s index = 0; for ( i = 0; i < exPatMatrix.rows; i++ ) { AUD_Double *pExpatRow = exPatMatrix.pDouble + i * exPatMatrix.cols; for ( j = 0; j < exPatMatrix.cols; j++ ) { index = MABS( i - pLinToBark[j] ); pExpatRow[j] = spread[index]; } } AUD_Int32s frameNum = (inLen - frameSize) / frameStride + 1; AUD_ASSERT( frameNum > nNoiseFrame ); // compute noise mean for ( i = 0; i < nNoiseFrame; i++ ) { win16s_calc( hWin, pInBuf + i * frameSize, pFrame ); fft_mag( hFft, pFrame, frameSize, pFFTMag ); for ( j = 0; j < nSpecLen; j++ ) { pNoiseEn[j] += pFFTMag[j] / 32768. * pFFTMag[j] / 32768.; } } for ( j = 0; j < nSpecLen; j++ ) { pNoiseEn[j] /= nNoiseFrame; } // get cirtical band mean filtered noise power AUD_Int32s k1 = 0, k2 = 0; for ( i = 0; i < CRITICAL_BAND_NUM; i++ ) { k1 = k2; AUD_Double segSum = 0.; while ( k2 >= critFFTEnds[i] && k2 < critFFTEnds[i + 1] ) { segSum += pNoiseEn[k2]; k2++; } segSum /= ( k2 - k1 ); for ( m = k1; m < k2; m++ ) { pNoiseB[m] = segSum; } } #if 0 AUDLOG( "noise band spectrum:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pNoiseB[j] ); } AUDLOG( "\n" ); #endif AUD_Matrix frameMatrix; frameMatrix.rows = nSpecLen; frameMatrix.cols = 1; frameMatrix.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &frameMatrix ); AUD_ASSERT( ret == 0 ); AUD_Double *pFrameEn = frameMatrix.pDouble; AUD_Matrix xMatrix; xMatrix.rows = nSpecLen; xMatrix.cols = 1; xMatrix.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &xMatrix ); AUD_ASSERT( ret == 0 ); AUD_Double *pX = xMatrix.pDouble; AUD_Matrix cMatrix; cMatrix.rows = CRITICAL_BAND_NUM; cMatrix.cols = 1; cMatrix.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &cMatrix ); AUD_ASSERT( ret == 0 ); AUD_Double *pC = cMatrix.pDouble; AUD_Matrix tMatrix; tMatrix.rows = 1; tMatrix.cols = CRITICAL_BAND_NUM; tMatrix.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &tMatrix ); AUD_ASSERT( ret == 0 ); AUD_Double *pT = tMatrix.pDouble; AUD_Matrix tkMatrix; tkMatrix.rows = 1; tkMatrix.cols = nSpecLen; tkMatrix.dataType = AUD_DATATYPE_DOUBLE; ret = createMatrix( &tkMatrix ); AUD_ASSERT( ret == 0 ); AUD_Double *pTk = tkMatrix.pDouble; AUD_Double dB0[CRITICAL_BAND_NUM]; AUD_Double epsilon = pow( 2, -52 ); #define ESTIMATE_MASKTHRESH( sigMatrix, tkMatrix )\ do {\ AUD_Double *pSig = sigMatrix.pDouble; \ for ( m = 0; m < exPatMatrix.rows; m++ ) \ { \ AUD_Double suma = 0.; \ AUD_Double *pExpatRow = exPatMatrix.pDouble + m * exPatMatrix.cols; \ for ( n = 0; n < exPatMatrix.cols; n++ ) \ { \ suma += pExpatRow[n] * pSig[n]; \ } \ pC[m] = suma; \ } \ AUD_Double product = 1.; \ AUD_Double sum = 0.; \ for ( m = 0; m < sigMatrix.rows; m++ ) \ { \ product *= pSig[m]; \ sum += pSig[m]; \ } \ AUD_Double power = 1. / sigMatrix.rows;\ AUD_Double sfmDB = 10. * log10( pow( product, power ) / sum / sigMatrix.rows + epsilon ); \ AUD_Double alpha = AUD_MIN( 1., sfmDB / (-60.) ); \ for ( m = 0; m < tMatrix.cols; m++ ) \ { \ dB0[m] = dbOffset[m] * alpha + 5.5; \ pT[m] = pC[m] / pow( 10., dB0[m] / 10. ) / dcGain[m]; \ pT[m] = AUD_MAX( pT[m], absThr[m] ); \ } \ for ( m = 0; m < tkMatrix.cols; m++ ) \ { \ pTk[m] = pT[pLinToBark[m]]; \ } \ } while ( 0 ) AUD_Double aa = 0.98; AUD_Double mu = 0.98; AUD_Double eta = 0.15; AUD_Double vadDecision; k = 0; // start processing for ( i = 0; i < frameNum; i++ ) { win16s_calc( hWin, pInBuf + i * frameStride, pFrame ); fft_calc( hFft, pFrame, frameSize, pFFTRe, pFFTIm ); // compute SNR vadDecision = 0.; for ( j = 0; j < nSpecLen; j++ ) { pFrameEn[j] = pFFTRe[j] / 32768. * pFFTRe[j] / 32768. + pFFTIm[j] / 32768. * pFFTIm[j] / 32768.; pGammak[j] = AUD_MIN( pFrameEn[j] / pNoiseEn[j], 40. ); if ( i > 0 ) { pKsi[j] = aa * pXPrev[j] / pNoiseEn[j] + ( 1 - aa ) * AUD_MAX( pGammak[j] - 1., 0. ); } else { pKsi[j] = aa + ( 1. - aa ) * AUD_MAX( pGammak[j] - 1., 0. ); } pLogSigmak[j] = pGammak[j] * pKsi[j] / ( 1. + pKsi[j] ) - log( 1. + pKsi[j] ); vadDecision += ( j > 0 ? 2 : 1 ) * pLogSigmak[j]; } vadDecision /= nFFT; #if 0 AUDLOG( "X prev:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pXPrev[j] ); } AUDLOG( "\n" ); #endif #if 0 AUDLOG( "gamma k:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pGammak[j] ); } AUDLOG( "\n" ); #endif #if 0 AUDLOG( "ksi:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pKsi[j] ); } AUDLOG( "\n" ); #endif #if 0 AUDLOG( "log sigma k:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pLogSigmak[j] ); } AUDLOG( "\n" ); #endif // AUDLOG( "vadDecision: %.2f\n", vadDecision ); // re-estimate noise if ( vadDecision < eta ) { for ( j = 0; j < nSpecLen; j++ ) { pNoiseEn[j] = mu * pNoiseEn[j] + ( 1. - mu ) * pFrameEn[j]; } // re-estimate crital band based noise AUD_Int32s k1 = 0, k2 = 0; for ( int band = 0; band < CRITICAL_BAND_NUM; band++ ) { k1 = k2; AUD_Double segSum = 0.; while ( k2 >= critFFTEnds[band] && k2 < critFFTEnds[band + 1] ) { segSum += pNoiseEn[k2]; k2++; } segSum /= ( k2 - k1 ); for ( m = k1; m < k2; m++ ) { pNoiseB[m] = segSum; } } #if 0 AUDLOG( "noise band spectrum:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pNoiseB[j] ); } AUDLOG( "\n" ); #endif } for ( j = 0; j < nSpecLen; j++ ) { pX[j] = AUD_MAX( pFrameEn[j] - pNoiseEn[j], 0.001 ); pXPrev[j] = pFrameEn[j]; } ESTIMATE_MASKTHRESH( xMatrix, tkMatrix ); for ( int iter = 0; iter < 2; iter++ ) { for ( j = 0; j < nSpecLen; j++ ) { pAb[j] = pNoiseB[j] + pNoiseB[j] * pNoiseB[j] / pTk[j]; pFrameEn[j] = pFrameEn[j] * pFrameEn[j] / ( pFrameEn[j] + pAb[j] ); ESTIMATE_MASKTHRESH( frameMatrix, tkMatrix ); #if 0 showMatrix( &tMatrix ); #endif } } #if 0 AUDLOG( "tk:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pTk[j] ); } AUDLOG( "\n" ); #endif pAlpha[0] = ( pNoiseB[0] + pTk[0] ) * ( pNoiseB[0] / pTk[0] ); pH[0] = pFrameEn[0] / ( pFrameEn[0] + pAlpha[0] ); pXPrev[0] *= pH[0] * pH[0]; pFFTCleanRe[0] = 0; pFFTCleanIm[0] = 0; for ( j = 1; j < nSpecLen; j++ ) { pAlpha[j] = ( pNoiseB[j] + pTk[j] ) * ( pNoiseB[j] / pTk[j] ); pH[j] = pFrameEn[j] / ( pFrameEn[j] + pAlpha[j] ); pFFTCleanRe[j] = pFFTCleanRe[nFFT - j] = (AUD_Int32s)round( pH[j] * pFFTRe[j] ); pFFTCleanIm[j] = (AUD_Int32s)round( pH[j] * pFFTIm[j] ); pFFTCleanIm[nFFT - j] = -pFFTCleanIm[j]; pXPrev[j] *= pH[j] * pH[j]; } #if 0 AUDLOG( "denoise transfer function:\n" ); for ( j = 0; j < nSpecLen; j++ ) { AUDLOG( "%.2f, ", pH[j] ); } AUDLOG( "\n" ); #endif #if 0 AUDLOG( "clean FFT with phase:\n" ); for ( j = 0; j < nFFT; j++ ) { AUDLOG( "%d + j%d, ", pFFTCleanRe[j], pFFTCleanIm[j] ); } AUDLOG( "\n" ); #endif ifft_real( hIfft, pFFTCleanRe, pFFTCleanIm, nFFT, pxClean ); #if 0 AUDLOG( "clean speech:\n" ); for ( j = 0; j < nFFT; j++ ) { AUDLOG( "%d, ", pxClean[j] ); } AUDLOG( "\n" ); #endif for ( j = 0; j < frameStride; j++ ) { if ( j < frameOverlap ) { pOutBuf[k + j] = pxOld[j] + pxClean[j]; pxOld[j] = pxClean[frameStride + j]; } else { pOutBuf[k + j] = pxClean[j]; } } k += frameStride; cleanLen += frameStride; } // de-emphasis // sig_deemphasis( pOutBuf, pOutBuf, cleanLen ); win16s_free( &hWin ); fft_free( &hFft ); ifft_free( &hIfft ); free( pFrame ); free( pNoiseEn ); free( pNoiseB ); free( pFFTMag ); free( pFFTRe ); free( pFFTIm ); free( pXPrev ); free( pAb ); free( pH ); free( pFFTCleanRe ); free( pFFTCleanIm ); free( pxOld ); free( pxClean ); free( pGammak ); free( pKsi ); free( pLogSigmak ); free( pAlpha ); free( pLinToBark ); ret = createMatrix( &xMatrix ); AUD_ASSERT( ret == 0 ); ret = destroyMatrix( &exPatMatrix ); AUD_ASSERT( ret == 0 ); ret = destroyMatrix( &frameMatrix ); AUD_ASSERT( ret == 0 ); ret = destroyMatrix( &cMatrix ); AUD_ASSERT( ret == 0 ); ret = destroyMatrix( &tMatrix ); AUD_ASSERT( ret == 0 ); ret = destroyMatrix( &tkMatrix ); AUD_ASSERT( ret == 0 ); return cleanLen; }