int AudioMixerClientData::parseData(const QByteArray& packet) { PacketType packetType = packetTypeForPacket(packet); if (packetType == PacketTypeMicrophoneAudioWithEcho || packetType == PacketTypeMicrophoneAudioNoEcho || packetType == PacketTypeSilentAudioFrame) { // grab the AvatarAudioRingBuffer from the vector (or create it if it doesn't exist) AvatarAudioRingBuffer* avatarRingBuffer = getAvatarAudioRingBuffer(); // read the first byte after the header to see if this is a stereo or mono buffer quint8 channelFlag = packet.at(numBytesForPacketHeader(packet)); bool isStereo = channelFlag == 1; if (avatarRingBuffer && avatarRingBuffer->isStereo() != isStereo) { // there's a mismatch in the buffer channels for the incoming and current buffer // so delete our current buffer and create a new one _ringBuffers.removeOne(avatarRingBuffer); avatarRingBuffer->deleteLater(); avatarRingBuffer = NULL; } if (!avatarRingBuffer) { // we don't have an AvatarAudioRingBuffer yet, so add it avatarRingBuffer = new AvatarAudioRingBuffer(isStereo); _ringBuffers.push_back(avatarRingBuffer); } // ask the AvatarAudioRingBuffer instance to parse the data avatarRingBuffer->parseData(packet); } else { // this is injected audio // grab the stream identifier for this injected audio QUuid streamIdentifier = QUuid::fromRfc4122(packet.mid(numBytesForPacketHeader(packet), NUM_BYTES_RFC4122_UUID)); InjectedAudioRingBuffer* matchingInjectedRingBuffer = NULL; for (unsigned int i = 0; i < _ringBuffers.size(); i++) { if (_ringBuffers[i]->getType() == PositionalAudioRingBuffer::Injector && ((InjectedAudioRingBuffer*) _ringBuffers[i])->getStreamIdentifier() == streamIdentifier) { matchingInjectedRingBuffer = (InjectedAudioRingBuffer*) _ringBuffers[i]; } } if (!matchingInjectedRingBuffer) { // we don't have a matching injected audio ring buffer, so add it matchingInjectedRingBuffer = new InjectedAudioRingBuffer(streamIdentifier); _ringBuffers.push_back(matchingInjectedRingBuffer); } matchingInjectedRingBuffer->parseData(packet); } return 0; }
int AudioMixerClientData::parseData(const QByteArray& packet) { PacketType packetType = packetTypeForPacket(packet); if (packetType == PacketTypeMicrophoneAudioWithEcho || packetType == PacketTypeMicrophoneAudioNoEcho || packetType == PacketTypeSilentAudioFrame) { // grab the AvatarAudioRingBuffer from the vector (or create it if it doesn't exist) AvatarAudioRingBuffer* avatarRingBuffer = getAvatarAudioRingBuffer(); if (!avatarRingBuffer) { // we don't have an AvatarAudioRingBuffer yet, so add it avatarRingBuffer = new AvatarAudioRingBuffer(); _ringBuffers.push_back(avatarRingBuffer); } // ask the AvatarAudioRingBuffer instance to parse the data avatarRingBuffer->parseData(packet); } else { // this is injected audio // grab the stream identifier for this injected audio QUuid streamIdentifier = QUuid::fromRfc4122(packet.mid(numBytesForPacketHeader(packet), NUM_BYTES_RFC4122_UUID)); InjectedAudioRingBuffer* matchingInjectedRingBuffer = NULL; for (unsigned int i = 0; i < _ringBuffers.size(); i++) { if (_ringBuffers[i]->getType() == PositionalAudioRingBuffer::Injector && ((InjectedAudioRingBuffer*) _ringBuffers[i])->getStreamIdentifier() == streamIdentifier) { matchingInjectedRingBuffer = (InjectedAudioRingBuffer*) _ringBuffers[i]; } } if (!matchingInjectedRingBuffer) { // we don't have a matching injected audio ring buffer, so add it matchingInjectedRingBuffer = new InjectedAudioRingBuffer(streamIdentifier); _ringBuffers.push_back(matchingInjectedRingBuffer); } matchingInjectedRingBuffer->parseData(packet); } return 0; }
void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd, AvatarAudioRingBuffer* listeningNodeBuffer) { float bearingRelativeAngleToSource = 0.0f; float attenuationCoefficient = 1.0f; int numSamplesDelay = 0; float weakChannelAmplitudeRatio = 1.0f; const int PHASE_DELAY_AT_90 = 20; if (bufferToAdd != listeningNodeBuffer) { // if the two buffer pointers do not match then these are different buffers glm::vec3 listenerPosition = listeningNodeBuffer->getPosition(); glm::vec3 relativePosition = bufferToAdd->getPosition() - listeningNodeBuffer->getPosition(); glm::quat inverseOrientation = glm::inverse(listeningNodeBuffer->getOrientation()); float distanceSquareToSource = glm::dot(relativePosition, relativePosition); float radius = 0.0f; if (bufferToAdd->getType() == PositionalAudioRingBuffer::Injector) { InjectedAudioRingBuffer* injectedBuffer = (InjectedAudioRingBuffer*) bufferToAdd; radius = injectedBuffer->getRadius(); attenuationCoefficient *= injectedBuffer->getAttenuationRatio(); } if (radius == 0 || (distanceSquareToSource > radius * radius)) { // this is either not a spherical source, or the listener is outside the sphere if (radius > 0) { // this is a spherical source - the distance used for the coefficient // needs to be the closest point on the boundary to the source // ovveride the distance to the node with the distance to the point on the // boundary of the sphere distanceSquareToSource -= (radius * radius); } else { // calculate the angle delivery for off-axis attenuation glm::vec3 rotatedListenerPosition = glm::inverse(bufferToAdd->getOrientation()) * relativePosition; float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedListenerPosition)); const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / 90.0f)); // multiply the current attenuation coefficient by the calculated off axis coefficient attenuationCoefficient *= offAxisCoefficient; } glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; const float DISTANCE_SCALE = 2.5f; const float GEOMETRIC_AMPLITUDE_SCALAR = 0.3f; const float DISTANCE_LOG_BASE = 2.5f; const float DISTANCE_SCALE_LOG = logf(DISTANCE_SCALE) / logf(DISTANCE_LOG_BASE); // calculate the distance coefficient using the distance to this node float distanceCoefficient = powf(GEOMETRIC_AMPLITUDE_SCALAR, DISTANCE_SCALE_LOG + (0.5f * logf(distanceSquareToSource) / logf(DISTANCE_LOG_BASE)) - 1); distanceCoefficient = std::min(1.0f, distanceCoefficient); // multiply the current attenuation coefficient by the distance coefficient attenuationCoefficient *= distanceCoefficient; // project the rotated source position vector onto the XZ plane rotatedSourcePosition.y = 0.0f; // produce an oriented angle about the y-axis bearingRelativeAngleToSource = glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, 1.0f, 0.0f)); const float PHASE_AMPLITUDE_RATIO_AT_90 = 0.5; // figure out the number of samples of delay and the ratio of the amplitude // in the weak channel for audio spatialization float sinRatio = fabsf(sinf(glm::radians(bearingRelativeAngleToSource))); numSamplesDelay = PHASE_DELAY_AT_90 * sinRatio; weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio); } } // if the bearing relative angle to source is > 0 then the delayed channel is the right one int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0; int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0; for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 2) { if ((s / 2) < numSamplesDelay) { // pull the earlier sample for the delayed channel int earlierSample = (*bufferToAdd)[(s / 2) - numSamplesDelay] * attenuationCoefficient * weakChannelAmplitudeRatio; _clientSamples[s + delayedChannelOffset] = glm::clamp(_clientSamples[s + delayedChannelOffset] + earlierSample, MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); } // pull the current sample for the good channel int16_t currentSample = (*bufferToAdd)[s / 2] * attenuationCoefficient; _clientSamples[s + goodChannelOffset] = glm::clamp(_clientSamples[s + goodChannelOffset] + currentSample, MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); if ((s / 2) + numSamplesDelay < NETWORK_BUFFER_LENGTH_SAMPLES_PER_CHANNEL) { // place the current sample at the right spot in the delayed channel int16_t clampedSample = glm::clamp((int) (_clientSamples[s + (numSamplesDelay * 2) + delayedChannelOffset] + (currentSample * weakChannelAmplitudeRatio)), MIN_SAMPLE_VALUE, MAX_SAMPLE_VALUE); _clientSamples[s + (numSamplesDelay * 2) + delayedChannelOffset] = clampedSample; } } }
int AudioMixerClientData::parseData(const QByteArray& packet) { // parse sequence number for this packet int numBytesPacketHeader = numBytesForPacketHeader(packet); const char* sequenceAt = packet.constData() + numBytesPacketHeader; quint16 sequence = *(reinterpret_cast<const quint16*>(sequenceAt)); PacketType packetType = packetTypeForPacket(packet); if (packetType == PacketTypeMicrophoneAudioWithEcho || packetType == PacketTypeMicrophoneAudioNoEcho || packetType == PacketTypeSilentAudioFrame) { _incomingAvatarAudioSequenceNumberStats.sequenceNumberReceived(sequence); // grab the AvatarAudioRingBuffer from the vector (or create it if it doesn't exist) AvatarAudioRingBuffer* avatarRingBuffer = getAvatarAudioRingBuffer(); // read the first byte after the header to see if this is a stereo or mono buffer quint8 channelFlag = packet.at(numBytesForPacketHeader(packet) + sizeof(quint16)); bool isStereo = channelFlag == 1; if (avatarRingBuffer && avatarRingBuffer->isStereo() != isStereo) { // there's a mismatch in the buffer channels for the incoming and current buffer // so delete our current buffer and create a new one _ringBuffers.removeOne(avatarRingBuffer); avatarRingBuffer->deleteLater(); avatarRingBuffer = NULL; } if (!avatarRingBuffer) { // we don't have an AvatarAudioRingBuffer yet, so add it avatarRingBuffer = new AvatarAudioRingBuffer(isStereo, AudioMixer::getUseDynamicJitterBuffers()); _ringBuffers.push_back(avatarRingBuffer); } // ask the AvatarAudioRingBuffer instance to parse the data avatarRingBuffer->parseData(packet); } else if (packetType == PacketTypeInjectAudio) { // this is injected audio // grab the stream identifier for this injected audio QUuid streamIdentifier = QUuid::fromRfc4122(packet.mid(numBytesForPacketHeader(packet) + sizeof(quint16), NUM_BYTES_RFC4122_UUID)); if (!_incomingInjectedAudioSequenceNumberStatsMap.contains(streamIdentifier)) { _incomingInjectedAudioSequenceNumberStatsMap.insert(streamIdentifier, SequenceNumberStats(INCOMING_SEQ_STATS_HISTORY_LENGTH)); } _incomingInjectedAudioSequenceNumberStatsMap[streamIdentifier].sequenceNumberReceived(sequence); InjectedAudioRingBuffer* matchingInjectedRingBuffer = NULL; for (int i = 0; i < _ringBuffers.size(); i++) { if (_ringBuffers[i]->getType() == PositionalAudioRingBuffer::Injector && ((InjectedAudioRingBuffer*) _ringBuffers[i])->getStreamIdentifier() == streamIdentifier) { matchingInjectedRingBuffer = (InjectedAudioRingBuffer*) _ringBuffers[i]; } } if (!matchingInjectedRingBuffer) { // we don't have a matching injected audio ring buffer, so add it matchingInjectedRingBuffer = new InjectedAudioRingBuffer(streamIdentifier, AudioMixer::getUseDynamicJitterBuffers()); _ringBuffers.push_back(matchingInjectedRingBuffer); } matchingInjectedRingBuffer->parseData(packet); } else if (packetType == PacketTypeAudioStreamStats) { const char* dataAt = packet.data(); // skip over header, appendFlag, and num stats packed dataAt += (numBytesPacketHeader + sizeof(quint8) + sizeof(quint16)); // read the downstream audio stream stats memcpy(&_downstreamAudioStreamStats, dataAt, sizeof(AudioStreamStats)); } return 0; }
void AudioMixer::addBufferToMixForListeningNodeWithBuffer(PositionalAudioRingBuffer* bufferToAdd, AvatarAudioRingBuffer* listeningNodeBuffer) { float bearingRelativeAngleToSource = 0.0f; float attenuationCoefficient = 1.0f; int numSamplesDelay = 0; float weakChannelAmplitudeRatio = 1.0f; if (bufferToAdd != listeningNodeBuffer) { // if the two buffer pointers do not match then these are different buffers glm::vec3 relativePosition = bufferToAdd->getPosition() - listeningNodeBuffer->getPosition(); glm::quat inverseOrientation = glm::inverse(listeningNodeBuffer->getOrientation()); float distanceSquareToSource = glm::dot(relativePosition, relativePosition); float radius = 0.0f; if (bufferToAdd->getType() == PositionalAudioRingBuffer::Injector) { InjectedAudioRingBuffer* injectedBuffer = (InjectedAudioRingBuffer*) bufferToAdd; radius = injectedBuffer->getRadius(); attenuationCoefficient *= injectedBuffer->getAttenuationRatio(); } if (radius == 0 || (distanceSquareToSource > radius * radius)) { // this is either not a spherical source, or the listener is outside the sphere if (radius > 0) { // this is a spherical source - the distance used for the coefficient // needs to be the closest point on the boundary to the source // ovveride the distance to the node with the distance to the point on the // boundary of the sphere distanceSquareToSource -= (radius * radius); } else { // calculate the angle delivery for off-axis attenuation glm::vec3 rotatedListenerPosition = glm::inverse(bufferToAdd->getOrientation()) * relativePosition; float angleOfDelivery = glm::angle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedListenerPosition)); const float MAX_OFF_AXIS_ATTENUATION = 0.2f; const float OFF_AXIS_ATTENUATION_FORMULA_STEP = (1 - MAX_OFF_AXIS_ATTENUATION) / 2.0f; float offAxisCoefficient = MAX_OFF_AXIS_ATTENUATION + (OFF_AXIS_ATTENUATION_FORMULA_STEP * (angleOfDelivery / PI_OVER_TWO)); // multiply the current attenuation coefficient by the calculated off axis coefficient attenuationCoefficient *= offAxisCoefficient; } glm::vec3 rotatedSourcePosition = inverseOrientation * relativePosition; const float DISTANCE_SCALE = 2.5f; const float GEOMETRIC_AMPLITUDE_SCALAR = 0.3f; const float DISTANCE_LOG_BASE = 2.5f; const float DISTANCE_SCALE_LOG = logf(DISTANCE_SCALE) / logf(DISTANCE_LOG_BASE); // calculate the distance coefficient using the distance to this node float distanceCoefficient = powf(GEOMETRIC_AMPLITUDE_SCALAR, DISTANCE_SCALE_LOG + (0.5f * logf(distanceSquareToSource) / logf(DISTANCE_LOG_BASE)) - 1); distanceCoefficient = std::min(1.0f, distanceCoefficient); // multiply the current attenuation coefficient by the distance coefficient attenuationCoefficient *= distanceCoefficient; // project the rotated source position vector onto the XZ plane rotatedSourcePosition.y = 0.0f; // produce an oriented angle about the y-axis bearingRelativeAngleToSource = glm::orientedAngle(glm::vec3(0.0f, 0.0f, -1.0f), glm::normalize(rotatedSourcePosition), glm::vec3(0.0f, 1.0f, 0.0f)); const float PHASE_AMPLITUDE_RATIO_AT_90 = 0.5; // figure out the number of samples of delay and the ratio of the amplitude // in the weak channel for audio spatialization float sinRatio = fabsf(sinf(bearingRelativeAngleToSource)); numSamplesDelay = SAMPLE_PHASE_DELAY_AT_90 * sinRatio; weakChannelAmplitudeRatio = 1 - (PHASE_AMPLITUDE_RATIO_AT_90 * sinRatio); } } // if the bearing relative angle to source is > 0 then the delayed channel is the right one int delayedChannelOffset = (bearingRelativeAngleToSource > 0.0f) ? 1 : 0; int goodChannelOffset = delayedChannelOffset == 0 ? 1 : 0; const int16_t* nextOutputStart = bufferToAdd->getNextOutput(); const int16_t* bufferStart = bufferToAdd->getBuffer(); int ringBufferSampleCapacity = bufferToAdd->getSampleCapacity(); int16_t correctBufferSample[2], delayBufferSample[2]; int delayedChannelIndex = 0; const int SINGLE_STEREO_OFFSET = 2; for (int s = 0; s < NETWORK_BUFFER_LENGTH_SAMPLES_STEREO; s += 4) { // setup the int16_t variables for the two sample sets correctBufferSample[0] = nextOutputStart[s / 2] * attenuationCoefficient; correctBufferSample[1] = nextOutputStart[(s / 2) + 1] * attenuationCoefficient; delayedChannelIndex = s + (numSamplesDelay * 2) + delayedChannelOffset; delayBufferSample[0] = correctBufferSample[0] * weakChannelAmplitudeRatio; delayBufferSample[1] = correctBufferSample[1] * weakChannelAmplitudeRatio; __m64 bufferSamples = _mm_set_pi16(_clientSamples[s + goodChannelOffset], _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET], _clientSamples[delayedChannelIndex], _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET]); __m64 addedSamples = _mm_set_pi16(correctBufferSample[0], correctBufferSample[1], delayBufferSample[0], delayBufferSample[1]); // perform the MMX add (with saturation) of two correct and delayed samples __m64 mmxResult = _mm_adds_pi16(bufferSamples, addedSamples); int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult); // assign the results from the result of the mmx arithmetic _clientSamples[s + goodChannelOffset] = shortResults[3]; _clientSamples[s + goodChannelOffset + SINGLE_STEREO_OFFSET] = shortResults[2]; _clientSamples[delayedChannelIndex] = shortResults[1]; _clientSamples[delayedChannelIndex + SINGLE_STEREO_OFFSET] = shortResults[0]; } // The following code is pretty gross and redundant, but AFAIK it's the best way to avoid // too many conditionals in handling the delay samples at the beginning of _clientSamples. // Basically we try to take the samples in batches of four, and then handle the remainder // conditionally to get rid of the rest. const int DOUBLE_STEREO_OFFSET = 4; const int TRIPLE_STEREO_OFFSET = 6; if (numSamplesDelay > 0) { // if there was a sample delay for this buffer, we need to pull samples prior to the nextOutput // to stick at the beginning float attenuationAndWeakChannelRatio = attenuationCoefficient * weakChannelAmplitudeRatio; const int16_t* delayNextOutputStart = nextOutputStart - numSamplesDelay; if (delayNextOutputStart < bufferStart) { delayNextOutputStart = bufferStart + ringBufferSampleCapacity - numSamplesDelay; } int i = 0; while (i + 3 < numSamplesDelay) { // handle the first cases where we can MMX add four samples at once int parentIndex = i * 2; __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset]); __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, delayNextOutputStart[i + 3] * attenuationAndWeakChannelRatio); __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult); _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; _clientSamples[parentIndex + TRIPLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[0]; // push the index i += 4; } int parentIndex = i * 2; if (i + 2 < numSamplesDelay) { // MMX add only three delayed samples __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset], 0); __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, delayNextOutputStart[i + 2] * attenuationAndWeakChannelRatio, 0); __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult); _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; _clientSamples[parentIndex + DOUBLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[1]; } else if (i + 1 < numSamplesDelay) { // MMX add two delayed samples __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset], 0, 0); __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, delayNextOutputStart[i + 1] * attenuationAndWeakChannelRatio, 0, 0); __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult); _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; _clientSamples[parentIndex + SINGLE_STEREO_OFFSET + delayedChannelOffset] = shortResults[2]; } else if (i < numSamplesDelay) { // MMX add a single delayed sample __m64 bufferSamples = _mm_set_pi16(_clientSamples[parentIndex + delayedChannelOffset], 0, 0, 0); __m64 addSamples = _mm_set_pi16(delayNextOutputStart[i] * attenuationAndWeakChannelRatio, 0, 0, 0); __m64 mmxResult = _mm_adds_pi16(bufferSamples, addSamples); int16_t* shortResults = reinterpret_cast<int16_t*>(&mmxResult); _clientSamples[parentIndex + delayedChannelOffset] = shortResults[3]; } } }