void AudioScope::addLastFrameRepeatedWithFadeToScope(int samplesPerChannel) { const int16_t* lastFrameData = reinterpret_cast<const int16_t*>(_scopeLastFrame.data()); int samplesRemaining = samplesPerChannel; int indexOfRepeat = 0; do { int samplesToWriteThisIteration = std::min(samplesRemaining, (int) AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); float fade = calculateRepeatedFrameFadeFactor(indexOfRepeat); addBufferToScope(_scopeOutputLeft, _scopeOutputOffset, lastFrameData, samplesToWriteThisIteration, 0, STEREO_FACTOR, fade); _scopeOutputOffset = addBufferToScope(_scopeOutputRight, _scopeOutputOffset, lastFrameData, samplesToWriteThisIteration, 1, STEREO_FACTOR, fade); samplesRemaining -= samplesToWriteThisIteration; indexOfRepeat++; } while (samplesRemaining > 0); }
int InboundAudioStream::writeLastFrameRepeatedWithFade(int samples) { AudioRingBuffer::ConstIterator frameToRepeat = _ringBuffer.lastFrameWritten(); int frameSize = _ringBuffer.getNumFrameSamples(); int samplesToWrite = samples; int indexOfRepeat = 0; do { int samplesToWriteThisIteration = std::min(samplesToWrite, frameSize); float fade = calculateRepeatedFrameFadeFactor(indexOfRepeat); if (fade == 1.0f) { samplesToWrite -= _ringBuffer.writeSamples(frameToRepeat, samplesToWriteThisIteration); } else { samplesToWrite -= _ringBuffer.writeSamplesWithFade(frameToRepeat, samplesToWriteThisIteration, fade); } indexOfRepeat++; } while (samplesToWrite > 0); return samples; }
int AudioMixer::addStreamToMixForListeningNodeWithStream(PositionalAudioStream* streamToAdd, AvatarAudioStream* listeningNodeStream) { // If repetition with fade is enabled: // If streamToAdd could not provide a frame (it was starved), then we'll mix its previously-mixed frame // This is preferable to not mixing it at all since that's equivalent to inserting silence. // Basically, we'll repeat that last frame until it has a frame to mix. Depending on how many times // we've repeated that frame in a row, we'll gradually fade that repeated frame into silence. // This improves the perceived quality of the audio slightly. float repeatedFrameFadeFactor = 1.0f; if (!streamToAdd->lastPopSucceeded()) { if (_streamSettings._repetitionWithFade && !streamToAdd->getLastPopOutput().isNull()) { // reptition with fade is enabled, and we do have a valid previous frame to repeat. // calculate its fade factor, which depends on how many times it's already been repeated. repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd->getConsecutiveNotMixedCount() - 1); if (repeatedFrameFadeFactor == 0.0f) { return 0; } } else { return 0; } } // at this point, we know streamToAdd's last pop output is valid // if the frame we're about to mix is silent, bail if (streamToAdd->getLastPopOutputLoudness() == 0.0f) { return 0; } float bearingRelativeAngleToSource = 0.0f; float attenuationCoefficient = 1.0f; int numSamplesDelay = 0; float weakChannelAmplitudeRatio = 1.0f; bool shouldAttenuate = (streamToAdd != listeningNodeStream); if (shouldAttenuate) { // if the two stream pointers do not match then these are different streams glm::vec3 relativePosition = streamToAdd->getPosition() - listeningNodeStream->getPosition(); float distanceBetween = glm::length(relativePosition); if (distanceBetween < EPSILON) { distanceBetween = EPSILON; } if (streamToAdd->getLastPopOutputTrailingLoudness() / distanceBetween <= _minAudibilityThreshold) { // according to mixer performance we have decided this does not get to be mixed in // bail out return 0; } ++_sumMixes; if (streamToAdd->getListenerUnattenuatedZone()) { shouldAttenuate = !streamToAdd->getListenerUnattenuatedZone()->contains(listeningNodeStream->getPosition()); } if (streamToAdd->getType() == PositionalAudioStream::Injector) { attenuationCoefficient *= reinterpret_cast<InjectedAudioStream*>(streamToAdd)->getAttenuationRatio(); } shouldAttenuate = shouldAttenuate && distanceBetween > ATTENUATION_EPSILON_DISTANCE; if (shouldAttenuate) { glm::quat inverseOrientation = glm::inverse(listeningNodeStream->getOrientation()); float distanceSquareToSource = glm::dot(relativePosition, relativePosition); float radius = 0.0f; if (streamToAdd->getType() == PositionalAudioStream::Injector) { radius = reinterpret_cast<InjectedAudioStream*>(streamToAdd)->getRadius(); } if (radius == 0 || (distanceSquareToSource > radius * radius)) { // this is either not a spherical source, or the listener is outside the sphere if (radius > 0) { // this is a spherical source - the distance used for the coefficient // needs to be the closest point on the boundary to the source // ovveride the distance to the node with the distance to the point on the // boundary of the sphere distanceSquareToSource -= (radius * radius); } else { // calculate the angle delivery for off-axis attenuation glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd->getOrientation()) * relativePosition; float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedListenerPosition)); const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO)); // multiply the current attenuation coefficient by the calculated off axis coefficient attenuationCoefficient *= offAxisCoefficient; } glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; if (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE) { // calculate the distance coefficient using the distance to this node float distanceCoefficient = 1 - (logf(distanceBetween / ATTENUATION_BEGINS_AT_DISTANCE) / logf(2.0f) * ATTENUATION_AMOUNT_PER_DOUBLING_IN_DISTANCE); if (distanceCoefficient < 0) { distanceCoefficient = 0; } // multiply the current attenuation coefficient by the distance coefficient attenuationCoefficient *= distanceCoefficient; } // project the rotated source position vector onto the XZ plane rotatedSourcePosition.y = 0.0f; // produce an oriented angle about the y-axis bearingRelativeAngleToSource = glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, 1.0f, 0.0f)); const float PHASE_AMPLITUDE_RATIO_AT_90 = 0.5; // figure out the number of samples of delay and the ratio of the amplitude // in the weak channel for audio spatialization float sinRatio = fabsf(sinf(bearingRelativeAngleToSource)); numSamplesDelay = SAMPLE_PHASE_DELAY_AT_90 * sinRatio; weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio); } } } AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd->getLastPopOutput(); if (!streamToAdd->isStereo() && shouldAttenuate) { // this is a mono stream, which means it gets full attenuation and spatialization // if the bearing relative angle to source is > 0 then the delayed channel is the right one int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0; int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0; int16_t correctStreamSample[2], delayStreamSample[2]; int delayedChannelIndex = 0; const int SINGLE_STEREO_OFFSET = 2; float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor; for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { // setup the int16_t variables for the two sample sets correctStreamSample[0] = streamPopOutput[s / 2] * attenuationAndFade; correctStreamSample[1] = streamPopOutput[(s / 2) + 1] * attenuationAndFade; delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset; delayStreamSample[0] = correctStreamSample[0] * weakChannelAmplitudeRatio; delayStreamSample[1] = correctStreamSample[1] * weakChannelAmplitudeRatio; _clientSamples[s + goodChannelOffset] += correctStreamSample[0]; _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] += correctStreamSample[1]; _clientSamples[delayedChannelIndex] += delayStreamSample[0]; _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] += delayStreamSample[1]; } if (numSamplesDelay > 0) { // if there was a sample delay for this stream, we need to pull samples prior to the popped output // to stick at the beginning float attenuationAndWeakChannelRatioAndFade = attenuationCoefficient * weakChannelAmplitudeRatio * repeatedFrameFadeFactor; AudioRingBuffer::ConstIterator delayStreamPopOutput = streamPopOutput - numSamplesDelay; // TODO: delayStreamPopOutput may be inside the last frame written if the ringbuffer is completely full // maybe make AudioRingBuffer have 1 extra frame in its buffer for (int i = 0; i < numSamplesDelay; i++) { int parentIndex = i * 2; _clientSamples[parentIndex + delayedChannelOffset] += *delayStreamPopOutput * attenuationAndWeakChannelRatioAndFade; ++delayStreamPopOutput; } } } else { int stereoDivider = streamToAdd->isStereo() ? 1 : 2; if (!shouldAttenuate) { attenuationCoefficient = 1.0f; } float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor; for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s++) { _clientSamples[s] = glm::clamp(_clientSamples[s] + (int)(streamPopOutput[s / stereoDivider] * attenuationAndFade), MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); } } if (_enableFilter && shouldAttenuate) { glm::vec3 relativePosition = streamToAdd->getPosition() - listeningNodeStream->getPosition(); if (relativePosition.z < 0) { // if the source is behind us AudioFilterHSF1s& penumbraFilter = streamToAdd->getFilter(); // calculate penumbra angle float headPenumbraAngle = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(relativePosition)); // normalize penumbra angle float normalizedHeadPenumbraAngle = headPenumbraAngle / PI_OVER_TWO; if (normalizedHeadPenumbraAngle < EPSILON) { normalizedHeadPenumbraAngle = EPSILON; } const float SQUARE_ROOT_OF_TWO_OVER_TWO = 0.71f; const float FILTER_CUTOFF_FREQUENCY_HZ = 4000.0f; float penumbraFilterGain; float penumbraFilterFrequency; float penumbraFilterSlope; // calculate the updated gain. this will be tuned over time. // consider this only a crude-first pass at correlating gain, freq and slope with penumbra angle. penumbraFilterGain = SQUARE_ROOT_OF_TWO_OVER_TWO * (normalizedHeadPenumbraAngle + SQUARE_ROOT_OF_TWO_OVER_TWO); penumbraFilterFrequency = FILTER_CUTOFF_FREQUENCY_HZ; // constant frequency penumbraFilterSlope = SQUARE_ROOT_OF_TWO_OVER_TWO; // constant slope qDebug() << "penumbra gain=" << penumbraFilterGain << ", penumbraAngle=" << normalizedHeadPenumbraAngle; // set the gain on both filter channels penumbraFilter.setParameters(0, 0, SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGain, penumbraFilterSlope); penumbraFilter.setParameters(0, 1, SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGain, penumbraFilterSlope); penumbraFilter.render(_clientSamples, _clientSamples, NETWORK_BUFFER_LENGTH_SAMPLES_STEREO / 2); } } return 1; }
void AudioMixerSlave::addStream(AudioMixerClientData& listenerNodeData, const QUuid& sourceNodeID, const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd, bool throttle) { ++stats.totalMixes; // to reduce artifacts we call the HRTF functor for every source, even if throttled or silent // this ensures the correct tail from last mixed block and the correct spatialization of next first block // check if this is a server echo of a source back to itself bool isEcho = (&streamToAdd == &listeningNodeStream); glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition(); float distance = glm::max(glm::length(relativePosition), EPSILON); float gain = computeGain(listenerNodeData, listeningNodeStream, streamToAdd, relativePosition, isEcho); float azimuth = isEcho ? 0.0f : computeAzimuth(listeningNodeStream, listeningNodeStream, relativePosition); const int HRTF_DATASET_INDEX = 1; if (!streamToAdd.lastPopSucceeded()) { bool forceSilentBlock = true; if (!streamToAdd.getLastPopOutput().isNull()) { bool isInjector = dynamic_cast<const InjectedAudioStream*>(&streamToAdd); // in an injector, just go silent - the injector has likely ended // in other inputs (microphone, &c.), repeat with fade to avoid the harsh jump to silence if (!isInjector) { // calculate its fade factor, which depends on how many times it's already been repeated. float fadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1); if (fadeFactor > 0.0f) { // apply the fadeFactor to the gain gain *= fadeFactor; forceSilentBlock = false; } } } if (forceSilentBlock) { // call renderSilent with a forced silent block to reduce artifacts // (this is not done for stereo streams since they do not go through the HRTF) if (!streamToAdd.isStereo() && !isEcho) { // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {}; hrtf.renderSilent(silentMonoBlock, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfSilentRenders; } return; } } // grab the stream from the ring buffer AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput(); // stereo sources are not passed through HRTF if (streamToAdd.isStereo()) { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { _mixSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE); } ++stats.manualStereoMixes; return; } // echo sources are not passed through HRTF if (isEcho) { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) { auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE); _mixSamples[i] += monoSample; _mixSamples[i + 1] += monoSample; } ++stats.manualEchoMixes; return; } // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); streamPopOutput.readSamples(_bufferSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); if (streamToAdd.getLastPopOutputLoudness() == 0.0f) { // call renderSilent to reduce artifacts hrtf.renderSilent(_bufferSamples, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfSilentRenders; return; } if (throttle) { // call renderSilent with actual frame data and a gain of 0.0f to reduce artifacts hrtf.renderSilent(_bufferSamples, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfThrottleRenders; return; } hrtf.render(_bufferSamples, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfRenders; }
int AudioMixer::addStreamToMixForListeningNodeWithStream(AudioMixerClientData* listenerNodeData, const QUuid& streamUUID, PositionalAudioStream* streamToAdd, AvatarAudioStream* listeningNodeStream) { // If repetition with fade is enabled: // If streamToAdd could not provide a frame (it was starved), then we'll mix its previously-mixed frame // This is preferable to not mixing it at all since that's equivalent to inserting silence. // Basically, we'll repeat that last frame until it has a frame to mix. Depending on how many times // we've repeated that frame in a row, we'll gradually fade that repeated frame into silence. // This improves the perceived quality of the audio slightly. bool showDebug = false; // (randFloat() < 0.05f); float repeatedFrameFadeFactor = 1.0f; if (!streamToAdd->lastPopSucceeded()) { if (_streamSettings._repetitionWithFade && !streamToAdd->getLastPopOutput().isNull()) { // reptition with fade is enabled, and we do have a valid previous frame to repeat. // calculate its fade factor, which depends on how many times it's already been repeated. repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd->getConsecutiveNotMixedCount() - 1); if (repeatedFrameFadeFactor == 0.0f) { return 0; } } else { return 0; } } // at this point, we know streamToAdd's last pop output is valid // if the frame we're about to mix is silent, bail if (streamToAdd->getLastPopOutputLoudness() == 0.0f) { return 0; } float bearingRelativeAngleToSource = 0.0f; float attenuationCoefficient = 1.0f; int numSamplesDelay = 0; float weakChannelAmplitudeRatio = 1.0f; // Is the source that I am mixing my own? bool sourceIsSelf = (streamToAdd == listeningNodeStream); glm::vec3 relativePosition = streamToAdd->getPosition() - listeningNodeStream->getPosition(); float distanceBetween = glm::length(relativePosition); if (distanceBetween < EPSILON) { distanceBetween = EPSILON; } if (streamToAdd->getLastPopOutputTrailingLoudness() / distanceBetween <= _minAudibilityThreshold) { // according to mixer performance we have decided this does not get to be mixed in // bail out return 0; } ++_sumMixes; if (streamToAdd->getType() == PositionalAudioStream::Injector) { attenuationCoefficient *= reinterpret_cast<InjectedAudioStream*>(streamToAdd)->getAttenuationRatio(); if (showDebug) { qDebug() << "AttenuationRatio: " << reinterpret_cast<InjectedAudioStream*>(streamToAdd)->getAttenuationRatio(); } } if (showDebug) { qDebug() << "distance: " << distanceBetween; } glm::quat inverseOrientation = glm::inverse(listeningNodeStream->getOrientation()); if (!sourceIsSelf && (streamToAdd->getType() == PositionalAudioStream::Microphone)) { // source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd->getOrientation()) * relativePosition; float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedListenerPosition)); const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO)); if (showDebug) { qDebug() << "angleOfDelivery" << angleOfDelivery << "offAxisCoefficient: " << offAxisCoefficient; } // multiply the current attenuation coefficient by the calculated off axis coefficient attenuationCoefficient *= offAxisCoefficient; } float attenuationPerDoublingInDistance = _attenuationPerDoublingInDistance; for (int i = 0; i < _zonesSettings.length(); ++i) { if (_audioZones[_zonesSettings[i].source].contains(streamToAdd->getPosition()) && _audioZones[_zonesSettings[i].listener].contains(listeningNodeStream->getPosition())) { attenuationPerDoublingInDistance = _zonesSettings[i].coefficient; break; } } if (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE) { // calculate the distance coefficient using the distance to this node float distanceCoefficient = 1 - (logf(distanceBetween / ATTENUATION_BEGINS_AT_DISTANCE) / logf(2.0f) * attenuationPerDoublingInDistance); if (distanceCoefficient < 0) { distanceCoefficient = 0; } // multiply the current attenuation coefficient by the distance coefficient attenuationCoefficient *= distanceCoefficient; if (showDebug) { qDebug() << "distanceCoefficient: " << distanceCoefficient; } } if (!sourceIsSelf) { // Compute sample delay for the two ears to create phase panning glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; // project the rotated source position vector onto the XZ plane rotatedSourcePosition.y = 0.0f; // produce an oriented angle about the y-axis bearingRelativeAngleToSource = glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, 1.0f, 0.0f)); const float PHASE_AMPLITUDE_RATIO_AT_90 = 0.5; // figure out the number of samples of delay and the ratio of the amplitude // in the weak channel for audio spatialization float sinRatio = fabsf(sinf(bearingRelativeAngleToSource)); numSamplesDelay = SAMPLE_PHASE_DELAY_AT_90 * sinRatio; weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio); if (distanceBetween < RADIUS_OF_HEAD) { // Diminish phase panning if source would be inside head numSamplesDelay *= distanceBetween / RADIUS_OF_HEAD; weakChannelAmplitudeRatio += (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio) * distanceBetween / RADIUS_OF_HEAD; } } if (showDebug) { qDebug() << "attenuation: " << attenuationCoefficient; qDebug() << "bearingRelativeAngleToSource: " << bearingRelativeAngleToSource << " numSamplesDelay: " << numSamplesDelay; } AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd->getLastPopOutput(); if (!streamToAdd->isStereo()) { // this is a mono stream, which means it gets full attenuation and spatialization // we need to do several things in this process: // 1) convert from mono to stereo by copying each input sample into the left and right output samples // 2) // 2) apply an attenuation AND fade to all samples (left and right) // 3) based on the bearing relative angle to the source we will weaken and delay either the left or // right channel of the input into the output // 4) because one of these channels is delayed, we will need to use historical samples from // the input stream for that delayed channel // Mono input to stereo output (item 1 above) int OUTPUT_SAMPLES_PER_INPUT_SAMPLE = 2; int inputSampleCount = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO / OUTPUT_SAMPLES_PER_INPUT_SAMPLE; int maxOutputIndex = AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; // attenuation and fade applied to all samples (item 2 above) float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor; // determine which side is weak and delayed (item 3 above) bool rightSideWeakAndDelayed = (bearingRelativeAngleToSource > 0.0f); // since we're converting from mono to stereo, we'll use these two indices to step through // the output samples. we'll increment each index independently in the loop int leftDestinationIndex = 0; int rightDestinationIndex = 1; // One of our two channels will be delayed (determined below). We'll use this index to step // through filling in our output with the historical samples for the delayed channel. (item 4 above) int delayedChannelHistoricalAudioOutputIndex; // All samples will be attenuated by at least this much float leftSideAttenuation = attenuationAndFade; float rightSideAttenuation = attenuationAndFade; // The weak/delayed channel will be attenuated by this additional amount float attenuationAndWeakChannelRatioAndFade = attenuationAndFade * weakChannelAmplitudeRatio; // Now, based on the determination of which side is weak and delayed, set up our true starting point // for our indexes, as well as the appropriate attenuation for each channel if (rightSideWeakAndDelayed) { delayedChannelHistoricalAudioOutputIndex = rightDestinationIndex; rightSideAttenuation = attenuationAndWeakChannelRatioAndFade; rightDestinationIndex += (numSamplesDelay * OUTPUT_SAMPLES_PER_INPUT_SAMPLE); } else { delayedChannelHistoricalAudioOutputIndex = leftDestinationIndex; leftSideAttenuation = attenuationAndWeakChannelRatioAndFade; leftDestinationIndex += (numSamplesDelay * OUTPUT_SAMPLES_PER_INPUT_SAMPLE); } // If there was a sample delay for this stream, we need to pull samples prior to the official start of the input // and stick those samples at the beginning of the output. We only need to loop through this for the weak/delayed // side, since the normal side is fully handled below. (item 4 above) if (numSamplesDelay > 0) { // TODO: delayStreamSourceSamples may be inside the last frame written if the ringbuffer is completely full // maybe make AudioRingBuffer have 1 extra frame in its buffer AudioRingBuffer::ConstIterator delayStreamSourceSamples = streamPopOutput - numSamplesDelay; for (int i = 0; i < numSamplesDelay; i++) { int16_t originalHistoricalSample = *delayStreamSourceSamples; _preMixSamples[delayedChannelHistoricalAudioOutputIndex] += originalHistoricalSample * attenuationAndWeakChannelRatioAndFade; ++delayStreamSourceSamples; // move our input pointer delayedChannelHistoricalAudioOutputIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE; // move our output sample } } // Here's where we copy the MONO input to the STEREO output, and account for delay and weak side attenuation for (int inputSample = 0; inputSample < inputSampleCount; inputSample++) { int16_t originalSample = streamPopOutput[inputSample]; int16_t leftSideSample = originalSample * leftSideAttenuation; int16_t rightSideSample = originalSample * rightSideAttenuation; // since we might be delayed, don't write beyond our maxOutputIndex if (leftDestinationIndex <= maxOutputIndex) { _preMixSamples[leftDestinationIndex] += leftSideSample; } if (rightDestinationIndex <= maxOutputIndex) { _preMixSamples[rightDestinationIndex] += rightSideSample; } leftDestinationIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE; rightDestinationIndex += OUTPUT_SAMPLES_PER_INPUT_SAMPLE; } } else { int stereoDivider = streamToAdd->isStereo() ? 1 : 2; float attenuationAndFade = attenuationCoefficient * repeatedFrameFadeFactor; for (int s = 0; s < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; s++) { _preMixSamples[s] = glm::clamp(_preMixSamples[s] + (int)(streamPopOutput[s / stereoDivider] * attenuationAndFade), AudioConstants::MIN_SAMPLE_VALUE, AudioConstants::MAX_SAMPLE_VALUE); } } if (!sourceIsSelf && _enableFilter && !streamToAdd->ignorePenumbraFilter()) { const float TWO_OVER_PI = 2.0f / PI; const float ZERO_DB = 1.0f; const float NEGATIVE_ONE_DB = 0.891f; const float NEGATIVE_THREE_DB = 0.708f; const float FILTER_GAIN_AT_0 = ZERO_DB; // source is in front const float FILTER_GAIN_AT_90 = NEGATIVE_ONE_DB; // source is incident to left or right ear const float FILTER_GAIN_AT_180 = NEGATIVE_THREE_DB; // source is behind const float FILTER_CUTOFF_FREQUENCY_HZ = 1000.0f; const float penumbraFilterFrequency = FILTER_CUTOFF_FREQUENCY_HZ; // constant frequency const float penumbraFilterSlope = NEGATIVE_THREE_DB; // constant slope float penumbraFilterGainL; float penumbraFilterGainR; // variable gain calculation broken down by quadrant if (-bearingRelativeAngleToSource < -PI_OVER_TWO && -bearingRelativeAngleToSource > -PI) { penumbraFilterGainL = TWO_OVER_PI * (FILTER_GAIN_AT_0 - FILTER_GAIN_AT_180) * (-bearingRelativeAngleToSource + PI_OVER_TWO) + FILTER_GAIN_AT_0; penumbraFilterGainR = TWO_OVER_PI * (FILTER_GAIN_AT_90 - FILTER_GAIN_AT_180) * (-bearingRelativeAngleToSource + PI_OVER_TWO) + FILTER_GAIN_AT_90; } else if (-bearingRelativeAngleToSource <= PI && -bearingRelativeAngleToSource > PI_OVER_TWO) { penumbraFilterGainL = TWO_OVER_PI * (FILTER_GAIN_AT_180 - FILTER_GAIN_AT_90) * (-bearingRelativeAngleToSource - PI) + FILTER_GAIN_AT_180; penumbraFilterGainR = TWO_OVER_PI * (FILTER_GAIN_AT_180 - FILTER_GAIN_AT_0) * (-bearingRelativeAngleToSource - PI) + FILTER_GAIN_AT_180; } else if (-bearingRelativeAngleToSource <= PI_OVER_TWO && -bearingRelativeAngleToSource > 0) { penumbraFilterGainL = TWO_OVER_PI * (FILTER_GAIN_AT_90 - FILTER_GAIN_AT_0) * (-bearingRelativeAngleToSource - PI_OVER_TWO) + FILTER_GAIN_AT_90; penumbraFilterGainR = FILTER_GAIN_AT_0; } else { penumbraFilterGainL = FILTER_GAIN_AT_0; penumbraFilterGainR = TWO_OVER_PI * (FILTER_GAIN_AT_0 - FILTER_GAIN_AT_90) * (-bearingRelativeAngleToSource) + FILTER_GAIN_AT_0; } if (distanceBetween < RADIUS_OF_HEAD) { // Diminish effect if source would be inside head penumbraFilterGainL += (1.0f - penumbraFilterGainL) * (1.0f - distanceBetween / RADIUS_OF_HEAD); penumbraFilterGainR += (1.0f - penumbraFilterGainR) * (1.0f - distanceBetween / RADIUS_OF_HEAD); } bool wantDebug = false; if (wantDebug) { qDebug() << "gainL=" << penumbraFilterGainL << "gainR=" << penumbraFilterGainR << "angle=" << -bearingRelativeAngleToSource; } // Get our per listener/source data so we can get our filter AudioFilterHSF1s& penumbraFilter = listenerNodeData->getListenerSourcePairData(streamUUID)->getPenumbraFilter(); // set the gain on both filter channels penumbraFilter.setParameters(0, 0, AudioConstants::SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGainL, penumbraFilterSlope); penumbraFilter.setParameters(0, 1, AudioConstants::SAMPLE_RATE, penumbraFilterFrequency, penumbraFilterGainR, penumbraFilterSlope); penumbraFilter.render(_preMixSamples, _preMixSamples, AudioConstants::NETWORK_FRAME_SAMPLES_STEREO / 2); } // Actually mix the _preMixSamples into the _mixSamples here. for (int s = 0; s < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; s++) { _mixSamples[s] = glm::clamp(_mixSamples[s] + _preMixSamples[s], AudioConstants::MIN_SAMPLE_VALUE, AudioConstants::MAX_SAMPLE_VALUE); } return 1; }
void AudioMixer::addStreamToMixForListeningNodeWithStream(AudioMixerClientData& listenerNodeData, const PositionalAudioStream& streamToAdd, const QUuid& sourceNodeID, const AvatarAudioStream& listeningNodeStream) { // to reduce artifacts we calculate the gain and azimuth for every source for this listener // even if we are not going to end up mixing in this source ++_totalMixes; // this ensures that the tail of any previously mixed audio or the first block of new audio sounds correct // check if this is a server echo of a source back to itself bool isEcho = (&streamToAdd == &listeningNodeStream); glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition(); // figure out the distance between source and listener float distance = glm::max(glm::length(relativePosition), EPSILON); // figure out the gain for this source at the listener float gain = gainForSource(streamToAdd, listeningNodeStream, relativePosition, isEcho); // figure out the azimuth to this source at the listener float azimuth = isEcho ? 0.0f : azimuthForSource(streamToAdd, listeningNodeStream, relativePosition); float repeatedFrameFadeFactor = 1.0f; static const int HRTF_DATASET_INDEX = 1; if (!streamToAdd.lastPopSucceeded()) { bool forceSilentBlock = true; if (_streamSettings._repetitionWithFade && !streamToAdd.getLastPopOutput().isNull()) { // reptition with fade is enabled, and we do have a valid previous frame to repeat // so we mix the previously-mixed block // this is preferable to not mixing it at all to avoid the harsh jump to silence // we'll repeat the last block until it has a block to mix // and we'll gradually fade that repeated block into silence. // calculate its fade factor, which depends on how many times it's already been repeated. repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1); if (repeatedFrameFadeFactor > 0.0f) { // apply the repeatedFrameFadeFactor to the gain gain *= repeatedFrameFadeFactor; forceSilentBlock = false; } } if (forceSilentBlock) { // we're deciding not to repeat either since we've already done it enough times or repetition with fade is disabled // in this case we will call renderSilent with a forced silent block // this ensures the correct tail from the previously mixed block and the correct spatialization of first block // of any upcoming audio if (!streamToAdd.isStereo() && !isEcho) { // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); // this is not done for stereo streams since they do not go through the HRTF static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {}; hrtf.renderSilent(silentMonoBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++_hrtfSilentRenders;; } return; } } // grab the stream from the ring buffer AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput(); if (streamToAdd.isStereo() || isEcho) { // this is a stereo source or server echo so we do not pass it through the HRTF // simply apply our calculated gain to each sample if (streamToAdd.isStereo()) { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { _mixedSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE); } ++_manualStereoMixes; } else { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) { auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE); _mixedSamples[i] += monoSample; _mixedSamples[i + 1] += monoSample; } ++_manualEchoMixes; } return; } // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); static int16_t streamBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL]; streamPopOutput.readSamples(streamBlock, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); // if the frame we're about to mix is silent, simply call render silent and move on if (streamToAdd.getLastPopOutputLoudness() == 0.0f) { // silent frame from source // we still need to call renderSilent via the HRTF for mono source hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++_hrtfSilentRenders; return; } if (_performanceThrottlingRatio > 0.0f && streamToAdd.getLastPopOutputTrailingLoudness() / glm::length(relativePosition) <= _minAudibilityThreshold) { // the mixer is struggling so we're going to drop off some streams // we call renderSilent via the HRTF with the actual frame data and a gain of 0.0 hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++_hrtfStruggleRenders; return; } ++_hrtfRenders; // mono stream, call the HRTF with our block and calculated azimuth and gain hrtf.render(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); }