int AudioMixer::prepareMixForListeningNode(Node* node) { AvatarAudioStream* nodeAudioStream = ((AudioMixerClientData*) node->getLinkedData())->getAvatarAudioStream(); // zero out the client mix for this node memset(_clientSamples, 0, NETWORK_BUFFER_LENGTH_BYTES_STEREO); // loop through all other nodes that have sufficient audio to mix int streamsMixed = 0; foreach (const SharedNodePointer& otherNode, NodeList::getInstance()->getNodeHash()) { if (otherNode->getLinkedData()) { AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData(); // enumerate the ARBs attached to the otherNode and add all that should be added to mix const QHash<QUuid, PositionalAudioStream*>& otherNodeAudioStreams = otherNodeClientData->getAudioStreams(); QHash<QUuid, PositionalAudioStream*>::ConstIterator i; for (i = otherNodeAudioStreams.constBegin(); i != otherNodeAudioStreams.constEnd(); i++) { PositionalAudioStream* otherNodeStream = i.value(); if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) { streamsMixed += addStreamToMixForListeningNodeWithStream(otherNodeStream, nodeAudioStream); } } } } return streamsMixed; }
float computeGain(const AudioMixerClientData& listenerNodeData, const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd, const glm::vec3& relativePosition, bool isEcho) { float gain = 1.0f; // injector: apply attenuation if (streamToAdd.getType() == PositionalAudioStream::Injector) { gain *= reinterpret_cast<const InjectedAudioStream*>(&streamToAdd)->getAttenuationRatio(); // avatar: apply fixed off-axis attenuation to make them quieter as they turn away } else if (!isEcho && (streamToAdd.getType() == PositionalAudioStream::Microphone)) { glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd.getOrientation()) * relativePosition; // source directivity is based on angle of emission, in local coordinates glm::vec3 direction = glm::normalize(rotatedListenerPosition); float angleOfDelivery = fastAcosf(glm::clamp(-direction.z, -1.0f, 1.0f)); // UNIT_NEG_Z is "forward" const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (angleOfDelivery * (OFF_AXIS_ATTENUATION_STEP / PI_OVER_TWO)); gain *= offAxisCoefficient; // apply master gain, only to avatars gain *= listenerNodeData.getMasterAvatarGain(); } auto& audioZones = AudioMixer::getAudioZones(); auto& zoneSettings = AudioMixer::getZoneSettings(); // find distance attenuation coefficient float attenuationPerDoublingInDistance = AudioMixer::getAttenuationPerDoublingInDistance(); for (int i = 0; i < zoneSettings.length(); ++i) { if (audioZones[zoneSettings[i].source].contains(streamToAdd.getPosition()) && audioZones[zoneSettings[i].listener].contains(listeningNodeStream.getPosition())) { attenuationPerDoublingInDistance = zoneSettings[i].coefficient; break; } } // distance attenuation const float ATTENUATION_START_DISTANCE = 1.0f; float distance = glm::length(relativePosition); assert(ATTENUATION_START_DISTANCE > EPSILON); if (distance >= ATTENUATION_START_DISTANCE) { // translate the zone setting to gain per log2(distance) float g = 1.0f - attenuationPerDoublingInDistance; g = glm::clamp(g, EPSILON, 1.0f); // calculate the distance coefficient using the distance to this node float distanceCoefficient = fastExp2f(fastLog2f(g) * fastLog2f(distance/ATTENUATION_START_DISTANCE)); // multiply the current attenuation coefficient by the distance coefficient gain *= distanceCoefficient; } return gain; }
float AudioMixer::gainForSource(const PositionalAudioStream& streamToAdd, const AvatarAudioStream& listeningNodeStream, const glm::vec3& relativePosition, bool isEcho) { float gain = 1.0f; float distanceBetween = glm::length(relativePosition); if (distanceBetween < EPSILON) { distanceBetween = EPSILON; } if (streamToAdd.getType() == PositionalAudioStream::Injector) { gain *= reinterpret_cast<const InjectedAudioStream*>(&streamToAdd)->getAttenuationRatio(); } if (!isEcho && (streamToAdd.getType() == PositionalAudioStream::Microphone)) { // source is another avatar, apply fixed off-axis attenuation to make them quieter as they turn away from listener glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd.getOrientation()) * relativePosition; float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedListenerPosition)); const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO)); // multiply the current attenuation coefficient by the calculated off axis coefficient gain *= offAxisCoefficient; } float attenuationPerDoublingInDistance = _attenuationPerDoublingInDistance; for (int i = 0; i < _zonesSettings.length(); ++i) { if (_audioZones[_zonesSettings[i].source].contains(streamToAdd.getPosition()) && _audioZones[_zonesSettings[i].listener].contains(listeningNodeStream.getPosition())) { attenuationPerDoublingInDistance = _zonesSettings[i].coefficient; break; } } if (distanceBetween >= ATTENUATION_BEGINS_AT_DISTANCE) { // translate the zone setting to gain per log2(distance) float g = 1.0f - attenuationPerDoublingInDistance; g = (g < EPSILON) ? EPSILON : g; g = (g > 1.0f) ? 1.0f : g; // calculate the distance coefficient using the distance to this node float distanceCoefficient = exp2f(log2f(g) * log2f(distanceBetween/ATTENUATION_BEGINS_AT_DISTANCE)); // multiply the current attenuation coefficient by the distance coefficient gain *= distanceCoefficient; } return gain; }
void AudioMixerClientData::checkBuffersBeforeFrameSend() { QHash<QUuid, PositionalAudioStream*>::ConstIterator i; for (i = _audioStreams.constBegin(); i != _audioStreams.constEnd(); i++) { PositionalAudioStream* stream = i.value(); if (stream->popFrames(1, true) > 0) { stream->updateLastPopOutputLoudnessAndTrailingLoudness(); } } }
void AudioMixerClientData::sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode) { auto nodeList = DependencyManager::get<NodeList>(); // The append flag is a boolean value that will be packed right after the header. // This flag allows the client to know when it has received all stats packets, so it can group any downstream effects, // and clear its cache of injector stream stats; it helps to prevent buildup of dead audio stream stats in the client. quint8 appendFlag = AudioStreamStats::START; auto streamsCopy = getAudioStreams(); // pack and send stream stats packets until all audio streams' stats are sent int numStreamStatsRemaining = int(streamsCopy.size()); auto it = streamsCopy.cbegin(); while (numStreamStatsRemaining > 0) { auto statsPacket = NLPacket::create(PacketType::AudioStreamStats); int numStreamStatsRoomFor = (int)(statsPacket->size() - sizeof(quint8) - sizeof(quint16)) / sizeof(AudioStreamStats); // calculate the number of stream stats to follow quint16 numStreamStatsToPack = std::min(numStreamStatsRemaining, numStreamStatsRoomFor); // is this the terminal packet? if (numStreamStatsRemaining <= numStreamStatsToPack) { appendFlag |= AudioStreamStats::END; } // pack the append flag in this packet statsPacket->writePrimitive(appendFlag); appendFlag = 0; // pack the number of stream stats to follow statsPacket->writePrimitive(numStreamStatsToPack); // pack the calculated number of stream stats for (int i = 0; i < numStreamStatsToPack; i++) { PositionalAudioStream* stream = it->second.get(); stream->perSecondCallbackForUpdatingStats(); AudioStreamStats streamStats = stream->getAudioStreamStats(); statsPacket->writePrimitive(streamStats); ++it; } numStreamStatsRemaining -= numStreamStatsToPack; // send the current packet nodeList->sendPacket(std::move(statsPacket), *destinationNode); } }
void AudioMixerClientData::sendAudioStreamStatsPackets(const SharedNodePointer& destinationNode) { // since audio stream stats packets are sent periodically, this is a good place to remove our dead injected streams. removeDeadInjectedStreams(); auto nodeList = DependencyManager::get<NodeList>(); // The append flag is a boolean value that will be packed right after the header. The first packet sent // inside this method will have 0 for this flag, while every subsequent packet will have 1 for this flag. // The sole purpose of this flag is so the client can clear its map of injected audio stream stats when // it receives a packet with an appendFlag of 0. This prevents the buildup of dead audio stream stats in the client. quint8 appendFlag = 0; // pack and send stream stats packets until all audio streams' stats are sent int numStreamStatsRemaining = _audioStreams.size(); QHash<QUuid, PositionalAudioStream*>::ConstIterator audioStreamsIterator = _audioStreams.constBegin(); while (numStreamStatsRemaining > 0) { auto statsPacket = NLPacket::create(PacketType::AudioStreamStats); // pack the append flag in this packet statsPacket->writePrimitive(appendFlag); appendFlag = 1; int numStreamStatsRoomFor = (statsPacket->size() - sizeof(quint8) - sizeof(quint16)) / sizeof(AudioStreamStats); // calculate and pack the number of stream stats to follow quint16 numStreamStatsToPack = std::min(numStreamStatsRemaining, numStreamStatsRoomFor); statsPacket->writePrimitive(numStreamStatsToPack); // pack the calculated number of stream stats for (int i = 0; i < numStreamStatsToPack; i++) { PositionalAudioStream* stream = audioStreamsIterator.value(); stream->perSecondCallbackForUpdatingStats(); AudioStreamStats streamStats = stream->getAudioStreamStats(); statsPacket->writePrimitive(streamStats); audioStreamsIterator++; } numStreamStatsRemaining -= numStreamStatsToPack; // send the current packet nodeList->sendPacket(std::move(statsPacket), *destinationNode); } }
void AudioMixerSlave::mixStream(AudioMixerClientData& listenerNodeData, const QUuid& sourceNodeID, const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd) { // only add the stream to the mix if it has a valid position, we won't know how to mix it otherwise if (streamToAdd.hasValidPosition()) { addStream(listenerNodeData, sourceNodeID, listeningNodeStream, streamToAdd, false); } }
float approximateGain(const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd) { float gain = 1.0f; // injector: apply attenuation if (streamToAdd.getType() == PositionalAudioStream::Injector) { gain *= reinterpret_cast<const InjectedAudioStream*>(&streamToAdd)->getAttenuationRatio(); } // avatar: skip attenuation - it is too costly to approximate // distance attenuation: approximate, ignore zone-specific attenuations glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition(); float distance = glm::length(relativePosition); return gain / distance; // avatar: skip master gain - it is constant for all streams }
void AudioMixerClientData::removeDeadInjectedStreams() { const int INJECTOR_CONSECUTIVE_NOT_MIXED_AFTER_STARTED_THRESHOLD = 100; // we have this second threshold in case the injected audio is so short that the injected stream // never even reaches its desired size, which means it will never start. const int INJECTOR_CONSECUTIVE_NOT_MIXED_THRESHOLD = 1000; QHash<QUuid, PositionalAudioStream*>::Iterator i = _audioStreams.begin(), end = _audioStreams.end(); while (i != end) { PositionalAudioStream* audioStream = i.value(); if (audioStream->getType() == PositionalAudioStream::Injector && audioStream->isStarved()) { int notMixedThreshold = audioStream->hasStarted() ? INJECTOR_CONSECUTIVE_NOT_MIXED_AFTER_STARTED_THRESHOLD : INJECTOR_CONSECUTIVE_NOT_MIXED_THRESHOLD; if (audioStream->getConsecutiveNotMixedCount() >= notMixedThreshold) { delete audioStream; i = _audioStreams.erase(i); continue; } } ++i; } }
int AudioMixer::prepareMixForListeningNode(Node* node) { AvatarAudioStream* nodeAudioStream = static_cast<AudioMixerClientData*>(node->getLinkedData())->getAvatarAudioStream(); AudioMixerClientData* listenerNodeData = static_cast<AudioMixerClientData*>(node->getLinkedData()); // zero out the client mix for this node memset(_preMixSamples, 0, sizeof(_preMixSamples)); memset(_mixSamples, 0, sizeof(_mixSamples)); // loop through all other nodes that have sufficient audio to mix int streamsMixed = 0; DependencyManager::get<NodeList>()->eachNode([&](const SharedNodePointer& otherNode){ if (otherNode->getLinkedData()) { AudioMixerClientData* otherNodeClientData = (AudioMixerClientData*) otherNode->getLinkedData(); // enumerate the ARBs attached to the otherNode and add all that should be added to mix const QHash<QUuid, PositionalAudioStream*>& otherNodeAudioStreams = otherNodeClientData->getAudioStreams(); QHash<QUuid, PositionalAudioStream*>::ConstIterator i; for (i = otherNodeAudioStreams.constBegin(); i != otherNodeAudioStreams.constEnd(); i++) { PositionalAudioStream* otherNodeStream = i.value(); QUuid streamUUID = i.key(); if (otherNodeStream->getType() == PositionalAudioStream::Microphone) { streamUUID = otherNode->getUUID(); } if (*otherNode != *node || otherNodeStream->shouldLoopbackForNode()) { streamsMixed += addStreamToMixForListeningNodeWithStream(listenerNodeData, streamUUID, otherNodeStream, nodeAudioStream); } } } }); return streamsMixed; }
float approximateGain(const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd, const glm::vec3& relativePosition) { float gain = 1.0f; // injector: apply attenuation if (streamToAdd.getType() == PositionalAudioStream::Injector) { gain *= reinterpret_cast<const InjectedAudioStream*>(&streamToAdd)->getAttenuationRatio(); } // avatar: skip attenuation - it is too costly to approximate // distance attenuation: approximate, ignore zone-specific attenuations // this is a good approximation for streams further than ATTENUATION_START_DISTANCE // those streams closer will be amplified; amplifying close streams is acceptable // when throttling, as close streams are expected to be heard by a user float distance = glm::length(relativePosition); return gain / distance; // avatar: skip master gain - it is constant for all streams }
void AudioMixerSlave::addStream(AudioMixerClientData& listenerNodeData, const QUuid& sourceNodeID, const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd, bool throttle) { ++stats.totalMixes; // to reduce artifacts we call the HRTF functor for every source, even if throttled or silent // this ensures the correct tail from last mixed block and the correct spatialization of next first block // check if this is a server echo of a source back to itself bool isEcho = (&streamToAdd == &listeningNodeStream); glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition(); float distance = glm::max(glm::length(relativePosition), EPSILON); float gain = computeGain(listenerNodeData, listeningNodeStream, streamToAdd, relativePosition, isEcho); float azimuth = isEcho ? 0.0f : computeAzimuth(listeningNodeStream, listeningNodeStream, relativePosition); const int HRTF_DATASET_INDEX = 1; if (!streamToAdd.lastPopSucceeded()) { bool forceSilentBlock = true; if (!streamToAdd.getLastPopOutput().isNull()) { bool isInjector = dynamic_cast<const InjectedAudioStream*>(&streamToAdd); // in an injector, just go silent - the injector has likely ended // in other inputs (microphone, &c.), repeat with fade to avoid the harsh jump to silence if (!isInjector) { // calculate its fade factor, which depends on how many times it's already been repeated. float fadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1); if (fadeFactor > 0.0f) { // apply the fadeFactor to the gain gain *= fadeFactor; forceSilentBlock = false; } } } if (forceSilentBlock) { // call renderSilent with a forced silent block to reduce artifacts // (this is not done for stereo streams since they do not go through the HRTF) if (!streamToAdd.isStereo() && !isEcho) { // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {}; hrtf.renderSilent(silentMonoBlock, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfSilentRenders; } return; } } // grab the stream from the ring buffer AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput(); // stereo sources are not passed through HRTF if (streamToAdd.isStereo()) { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { _mixSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE); } ++stats.manualStereoMixes; return; } // echo sources are not passed through HRTF if (isEcho) { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) { auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE); _mixSamples[i] += monoSample; _mixSamples[i + 1] += monoSample; } ++stats.manualEchoMixes; return; } // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); streamPopOutput.readSamples(_bufferSamples, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); if (streamToAdd.getLastPopOutputLoudness() == 0.0f) { // call renderSilent to reduce artifacts hrtf.renderSilent(_bufferSamples, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfSilentRenders; return; } if (throttle) { // call renderSilent with actual frame data and a gain of 0.0f to reduce artifacts hrtf.renderSilent(_bufferSamples, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfThrottleRenders; return; } hrtf.render(_bufferSamples, _mixSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++stats.hrtfRenders; }
float computeGain(float masterListenerGain, const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd, const glm::vec3& relativePosition, float distance, bool isEcho) { float gain = 1.0f; // injector: apply attenuation if (streamToAdd.getType() == PositionalAudioStream::Injector) { gain *= reinterpret_cast<const InjectedAudioStream*>(&streamToAdd)->getAttenuationRatio(); // avatar: apply fixed off-axis attenuation to make them quieter as they turn away } else if (!isEcho && (streamToAdd.getType() == PositionalAudioStream::Microphone)) { glm::vec3 rotatedListenerPosition = glm::inverse(streamToAdd.getOrientation()) * relativePosition; // source directivity is based on angle of emission, in local coordinates glm::vec3 direction = glm::normalize(rotatedListenerPosition); float angleOfDelivery = fastAcosf(glm::clamp(-direction.z, -1.0f, 1.0f)); // UNIT_NEG_Z is "forward" const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (angleOfDelivery * (OFF_AXIS_ATTENUATION_STEP / PI_OVER_TWO)); gain *= offAxisCoefficient; // apply master gain, only to avatars gain *= masterListenerGain; } auto& audioZones = AudioMixer::getAudioZones(); auto& zoneSettings = AudioMixer::getZoneSettings(); // find distance attenuation coefficient float attenuationPerDoublingInDistance = AudioMixer::getAttenuationPerDoublingInDistance(); for (const auto& settings : zoneSettings) { if (audioZones[settings.source].area.contains(streamToAdd.getPosition()) && audioZones[settings.listener].area.contains(listeningNodeStream.getPosition())) { attenuationPerDoublingInDistance = settings.coefficient; break; } } if (attenuationPerDoublingInDistance < 0.0f) { // translate a negative zone setting to distance limit const float MIN_DISTANCE_LIMIT = ATTN_DISTANCE_REF + 1.0f; // silent after 1m float distanceLimit = std::max(-attenuationPerDoublingInDistance, MIN_DISTANCE_LIMIT); // calculate the LINEAR attenuation using the distance to this node // reference attenuation of 0dB at distance = ATTN_DISTANCE_REF float d = distance - ATTN_DISTANCE_REF; gain *= std::max(1.0f - d / (distanceLimit - ATTN_DISTANCE_REF), 0.0f); gain = std::min(gain, ATTN_GAIN_MAX); } else { // translate a positive zone setting to gain per log2(distance) const float MIN_ATTENUATION_COEFFICIENT = 0.001f; // -60dB per log2(distance) float g = glm::clamp(1.0f - attenuationPerDoublingInDistance, MIN_ATTENUATION_COEFFICIENT, 1.0f); // calculate the LOGARITHMIC attenuation using the distance to this node // reference attenuation of 0dB at distance = ATTN_DISTANCE_REF float d = (1.0f / ATTN_DISTANCE_REF) * std::max(distance, HRTF_NEARFIELD_MIN); gain *= fastExp2f(fastLog2f(g) * fastLog2f(d)); gain = std::min(gain, ATTN_GAIN_MAX); } return gain; }
int AudioMixerClientData::parseData(NLPacket& packet) { PacketType packetType = packet.getType(); if (packetType == PacketType::AudioStreamStats) { // skip over header, appendFlag, and num stats packed packet.seek(sizeof(quint8) + sizeof(quint16)); // read the downstream audio stream stats packet.readPrimitive(&_downstreamAudioStreamStats); return packet.pos(); } else { PositionalAudioStream* matchingStream = NULL; if (packetType == PacketType::MicrophoneAudioWithEcho || packetType == PacketType::MicrophoneAudioNoEcho || packetType == PacketType::SilentAudioFrame) { QUuid nullUUID = QUuid(); if (!_audioStreams.contains(nullUUID)) { // we don't have a mic stream yet, so add it // read the channel flag to see if our stream is stereo or not packet.seek(sizeof(quint16)); quint8 channelFlag; packet.readPrimitive(&channelFlag); bool isStereo = channelFlag == 1; _audioStreams.insert(nullUUID, matchingStream = new AvatarAudioStream(isStereo, AudioMixer::getStreamSettings())); } else { matchingStream = _audioStreams.value(nullUUID); } } else if (packetType == PacketType::InjectAudio) { // this is injected audio // grab the stream identifier for this injected audio packet.seek(sizeof(quint16)); QUuid streamIdentifier = QUuid::fromRfc4122(packet.readWithoutCopy(NUM_BYTES_RFC4122_UUID)); bool isStereo; packet.readPrimitive(&isStereo); if (!_audioStreams.contains(streamIdentifier)) { // we don't have this injected stream yet, so add it _audioStreams.insert(streamIdentifier, matchingStream = new InjectedAudioStream(streamIdentifier, isStereo, AudioMixer::getStreamSettings())); } else { matchingStream = _audioStreams.value(streamIdentifier); } } // seek to the beginning of the packet so that the next reader is in the right spot packet.seek(0); return matchingStream->parseData(packet); } return 0; }
int AudioMixerClientData::parseData(ReceivedMessage& message) { PacketType packetType = message.getType(); if (packetType == PacketType::AudioStreamStats) { // skip over header, appendFlag, and num stats packed message.seek(sizeof(quint8) + sizeof(quint16)); // read the downstream audio stream stats message.readPrimitive(&_downstreamAudioStreamStats); return message.getPosition(); } else { PositionalAudioStream* matchingStream = NULL; bool isMicStream = false; if (packetType == PacketType::MicrophoneAudioWithEcho || packetType == PacketType::MicrophoneAudioNoEcho || packetType == PacketType::SilentAudioFrame) { QUuid nullUUID = QUuid(); if (!_audioStreams.contains(nullUUID)) { // we don't have a mic stream yet, so add it // read the channel flag to see if our stream is stereo or not message.seek(sizeof(quint16)); quint8 channelFlag; message.readPrimitive(&channelFlag); bool isStereo = channelFlag == 1; _audioStreams.insert(nullUUID, matchingStream = new AvatarAudioStream(isStereo, AudioMixer::getStreamSettings())); } else { matchingStream = _audioStreams.value(nullUUID); } isMicStream = true; } else if (packetType == PacketType::InjectAudio) { // this is injected audio // grab the stream identifier for this injected audio message.seek(sizeof(quint16)); QUuid streamIdentifier = QUuid::fromRfc4122(message.readWithoutCopy(NUM_BYTES_RFC4122_UUID)); bool isStereo; message.readPrimitive(&isStereo); if (!_audioStreams.contains(streamIdentifier)) { // we don't have this injected stream yet, so add it _audioStreams.insert(streamIdentifier, matchingStream = new InjectedAudioStream(streamIdentifier, isStereo, AudioMixer::getStreamSettings())); } else { matchingStream = _audioStreams.value(streamIdentifier); } } // seek to the beginning of the packet so that the next reader is in the right spot message.seek(0); // check the overflow count before we parse data auto overflowBefore = matchingStream->getOverflowCount(); auto parseResult = matchingStream->parseData(message); if (matchingStream->getOverflowCount() > overflowBefore) { qDebug() << "Just overflowed on stream from" << message.getSourceID() << "at" << message.getSenderSockAddr(); qDebug() << "This stream is for" << (isMicStream ? "microphone audio" : "injected audio"); } return parseResult; } return 0; }
void AudioMixer::addStreamToMixForListeningNodeWithStream(AudioMixerClientData& listenerNodeData, const PositionalAudioStream& streamToAdd, const QUuid& sourceNodeID, const AvatarAudioStream& listeningNodeStream) { // to reduce artifacts we calculate the gain and azimuth for every source for this listener // even if we are not going to end up mixing in this source ++_totalMixes; // this ensures that the tail of any previously mixed audio or the first block of new audio sounds correct // check if this is a server echo of a source back to itself bool isEcho = (&streamToAdd == &listeningNodeStream); glm::vec3 relativePosition = streamToAdd.getPosition() - listeningNodeStream.getPosition(); // figure out the distance between source and listener float distance = glm::max(glm::length(relativePosition), EPSILON); // figure out the gain for this source at the listener float gain = gainForSource(streamToAdd, listeningNodeStream, relativePosition, isEcho); // figure out the azimuth to this source at the listener float azimuth = isEcho ? 0.0f : azimuthForSource(streamToAdd, listeningNodeStream, relativePosition); float repeatedFrameFadeFactor = 1.0f; static const int HRTF_DATASET_INDEX = 1; if (!streamToAdd.lastPopSucceeded()) { bool forceSilentBlock = true; if (_streamSettings._repetitionWithFade && !streamToAdd.getLastPopOutput().isNull()) { // reptition with fade is enabled, and we do have a valid previous frame to repeat // so we mix the previously-mixed block // this is preferable to not mixing it at all to avoid the harsh jump to silence // we'll repeat the last block until it has a block to mix // and we'll gradually fade that repeated block into silence. // calculate its fade factor, which depends on how many times it's already been repeated. repeatedFrameFadeFactor = calculateRepeatedFrameFadeFactor(streamToAdd.getConsecutiveNotMixedCount() - 1); if (repeatedFrameFadeFactor > 0.0f) { // apply the repeatedFrameFadeFactor to the gain gain *= repeatedFrameFadeFactor; forceSilentBlock = false; } } if (forceSilentBlock) { // we're deciding not to repeat either since we've already done it enough times or repetition with fade is disabled // in this case we will call renderSilent with a forced silent block // this ensures the correct tail from the previously mixed block and the correct spatialization of first block // of any upcoming audio if (!streamToAdd.isStereo() && !isEcho) { // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); // this is not done for stereo streams since they do not go through the HRTF static int16_t silentMonoBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL] = {}; hrtf.renderSilent(silentMonoBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++_hrtfSilentRenders;; } return; } } // grab the stream from the ring buffer AudioRingBuffer::ConstIterator streamPopOutput = streamToAdd.getLastPopOutput(); if (streamToAdd.isStereo() || isEcho) { // this is a stereo source or server echo so we do not pass it through the HRTF // simply apply our calculated gain to each sample if (streamToAdd.isStereo()) { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; ++i) { _mixedSamples[i] += float(streamPopOutput[i] * gain / AudioConstants::MAX_SAMPLE_VALUE); } ++_manualStereoMixes; } else { for (int i = 0; i < AudioConstants::NETWORK_FRAME_SAMPLES_STEREO; i += 2) { auto monoSample = float(streamPopOutput[i / 2] * gain / AudioConstants::MAX_SAMPLE_VALUE); _mixedSamples[i] += monoSample; _mixedSamples[i + 1] += monoSample; } ++_manualEchoMixes; } return; } // get the existing listener-source HRTF object, or create a new one auto& hrtf = listenerNodeData.hrtfForStream(sourceNodeID, streamToAdd.getStreamIdentifier()); static int16_t streamBlock[AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL]; streamPopOutput.readSamples(streamBlock, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); // if the frame we're about to mix is silent, simply call render silent and move on if (streamToAdd.getLastPopOutputLoudness() == 0.0f) { // silent frame from source // we still need to call renderSilent via the HRTF for mono source hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++_hrtfSilentRenders; return; } if (_performanceThrottlingRatio > 0.0f && streamToAdd.getLastPopOutputTrailingLoudness() / glm::length(relativePosition) <= _minAudibilityThreshold) { // the mixer is struggling so we're going to drop off some streams // we call renderSilent via the HRTF with the actual frame data and a gain of 0.0 hrtf.renderSilent(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, 0.0f, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); ++_hrtfStruggleRenders; return; } ++_hrtfRenders; // mono stream, call the HRTF with our block and calculated azimuth and gain hrtf.render(streamBlock, _mixedSamples, HRTF_DATASET_INDEX, azimuth, distance, gain, AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL); }