Example #1
0
void HRTFPanner::pan(double desiredAzimuth,
                     double elevation,
                     const AudioBus* inputBus,
                     AudioBus* outputBus,
                     size_t framesToProcess,
                     AudioBus::ChannelInterpretation channelInterpretation) {
  unsigned numInputChannels = inputBus ? inputBus->numberOfChannels() : 0;

  bool isInputGood = inputBus && numInputChannels >= 1 && numInputChannels <= 2;
  ASSERT(isInputGood);

  bool isOutputGood = outputBus && outputBus->numberOfChannels() == 2 &&
                      framesToProcess <= outputBus->length();
  ASSERT(isOutputGood);

  if (!isInputGood || !isOutputGood) {
    if (outputBus)
      outputBus->zero();
    return;
  }

  HRTFDatabase* database = m_databaseLoader->database();
  if (!database) {
    outputBus->copyFrom(*inputBus, channelInterpretation);
    return;
  }

  // IRCAM HRTF azimuths values from the loaded database is reversed from the
  // panner's notion of azimuth.
  double azimuth = -desiredAzimuth;

  bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0;
  ASSERT(isAzimuthGood);
  if (!isAzimuthGood) {
    outputBus->zero();
    return;
  }

  // Normally, we'll just be dealing with mono sources.
  // If we have a stereo input, implement stereo panning with left source
  // processed by left HRTF, and right source by right HRTF.
  const AudioChannel* inputChannelL =
      inputBus->channelByType(AudioBus::ChannelLeft);
  const AudioChannel* inputChannelR =
      numInputChannels > 1 ? inputBus->channelByType(AudioBus::ChannelRight)
                           : nullptr;

  // Get source and destination pointers.
  const float* sourceL = inputChannelL->data();
  const float* sourceR = numInputChannels > 1 ? inputChannelR->data() : sourceL;
  float* destinationL =
      outputBus->channelByType(AudioBus::ChannelLeft)->mutableData();
  float* destinationR =
      outputBus->channelByType(AudioBus::ChannelRight)->mutableData();

  double azimuthBlend;
  int desiredAzimuthIndex =
      calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);

  // Initially snap azimuth and elevation values to first values encountered.
  if (m_azimuthIndex1 == UninitializedAzimuth) {
    m_azimuthIndex1 = desiredAzimuthIndex;
    m_elevation1 = elevation;
  }
  if (m_azimuthIndex2 == UninitializedAzimuth) {
    m_azimuthIndex2 = desiredAzimuthIndex;
    m_elevation2 = elevation;
  }

  // Cross-fade / transition over a period of around 45 milliseconds.
  // This is an empirical value tuned to be a reasonable trade-off between
  // smoothness and speed.
  const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096;

  // Check for azimuth and elevation changes, initiating a cross-fade if needed.
  if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) {
    if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) {
      // Cross-fade from 1 -> 2
      m_crossfadeIncr = 1 / fadeFrames;
      m_azimuthIndex2 = desiredAzimuthIndex;
      m_elevation2 = elevation;
    }
  }
  if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) {
    if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) {
      // Cross-fade from 2 -> 1
      m_crossfadeIncr = -1 / fadeFrames;
      m_azimuthIndex1 = desiredAzimuthIndex;
      m_elevation1 = elevation;
    }
  }

  // This algorithm currently requires that we process in power-of-two size
  // chunks at least AudioUtilities::kRenderQuantumFrames.
  ASSERT(1UL << static_cast<int>(log2(framesToProcess)) == framesToProcess);
  DCHECK_GE(framesToProcess, AudioUtilities::kRenderQuantumFrames);

  const unsigned framesPerSegment = AudioUtilities::kRenderQuantumFrames;
  const unsigned numberOfSegments = framesToProcess / framesPerSegment;

  for (unsigned segment = 0; segment < numberOfSegments; ++segment) {
    // Get the HRTFKernels and interpolated delays.
    HRTFKernel* kernelL1;
    HRTFKernel* kernelR1;
    HRTFKernel* kernelL2;
    HRTFKernel* kernelR2;
    double frameDelayL1;
    double frameDelayR1;
    double frameDelayL2;
    double frameDelayR2;
    database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1,
                                             m_elevation1, kernelL1, kernelR1,
                                             frameDelayL1, frameDelayR1);
    database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2,
                                             m_elevation2, kernelL2, kernelR2,
                                             frameDelayL2, frameDelayR2);

    bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2;
    ASSERT(areKernelsGood);
    if (!areKernelsGood) {
      outputBus->zero();
      return;
    }

    ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds &&
           frameDelayR1 / sampleRate() < MaxDelayTimeSeconds);
    ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds &&
           frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);

    // Crossfade inter-aural delays based on transitions.
    double frameDelayL =
        (1 - m_crossfadeX) * frameDelayL1 + m_crossfadeX * frameDelayL2;
    double frameDelayR =
        (1 - m_crossfadeX) * frameDelayR1 + m_crossfadeX * frameDelayR2;

    // Calculate the source and destination pointers for the current segment.
    unsigned offset = segment * framesPerSegment;
    const float* segmentSourceL = sourceL + offset;
    const float* segmentSourceR = sourceR + offset;
    float* segmentDestinationL = destinationL + offset;
    float* segmentDestinationR = destinationR + offset;

    // First run through delay lines for inter-aural time difference.
    m_delayLineL.setDelayFrames(frameDelayL);
    m_delayLineR.setDelayFrames(frameDelayR);
    m_delayLineL.process(segmentSourceL, segmentDestinationL, framesPerSegment);
    m_delayLineR.process(segmentSourceR, segmentDestinationR, framesPerSegment);

    bool needsCrossfading = m_crossfadeIncr;

    // Have the convolvers render directly to the final destination if we're not
    // cross-fading.
    float* convolutionDestinationL1 =
        needsCrossfading ? m_tempL1.data() : segmentDestinationL;
    float* convolutionDestinationR1 =
        needsCrossfading ? m_tempR1.data() : segmentDestinationR;
    float* convolutionDestinationL2 =
        needsCrossfading ? m_tempL2.data() : segmentDestinationL;
    float* convolutionDestinationR2 =
        needsCrossfading ? m_tempR2.data() : segmentDestinationR;

    // Now do the convolutions.
    // Note that we avoid doing convolutions on both sets of convolvers if we're
    // not currently cross-fading.

    if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
      m_convolverL1.process(kernelL1->fftFrame(), segmentDestinationL,
                            convolutionDestinationL1, framesPerSegment);
      m_convolverR1.process(kernelR1->fftFrame(), segmentDestinationR,
                            convolutionDestinationR1, framesPerSegment);
    }

    if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
      m_convolverL2.process(kernelL2->fftFrame(), segmentDestinationL,
                            convolutionDestinationL2, framesPerSegment);
      m_convolverR2.process(kernelR2->fftFrame(), segmentDestinationR,
                            convolutionDestinationR2, framesPerSegment);
    }

    if (needsCrossfading) {
      // Apply linear cross-fade.
      float x = m_crossfadeX;
      float incr = m_crossfadeIncr;
      for (unsigned i = 0; i < framesPerSegment; ++i) {
        segmentDestinationL[i] = (1 - x) * convolutionDestinationL1[i] +
                                 x * convolutionDestinationL2[i];
        segmentDestinationR[i] = (1 - x) * convolutionDestinationR1[i] +
                                 x * convolutionDestinationR2[i];
        x += incr;
      }
      // Update cross-fade value from local.
      m_crossfadeX = x;

      if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) {
        // We've fully made the crossfade transition from 1 -> 2.
        m_crossfadeSelection = CrossfadeSelection2;
        m_crossfadeX = 1;
        m_crossfadeIncr = 0;
      } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) {
        // We've fully made the crossfade transition from 2 -> 1.
        m_crossfadeSelection = CrossfadeSelection1;
        m_crossfadeX = 0;
        m_crossfadeIncr = 0;
      }
    }
  }
}
Example #2
0
void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBus* inputBus, AudioBus* outputBus, size_t framesToProcess)
{
    unsigned numInputChannels = inputBus ? inputBus->numberOfChannels() : 0;

    bool isInputGood = inputBus &&  numInputChannels >= 1 && numInputChannels <= 2;
    ASSERT(isInputGood);

    bool isOutputGood = outputBus && outputBus->numberOfChannels() == 2 && framesToProcess <= outputBus->length();
    ASSERT(isOutputGood);

    if (!isInputGood || !isOutputGood) {
        if (outputBus)
            outputBus->zero();
        return;
    }

    // This code only runs as long as the context is alive and after database has been loaded.
    HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();    
    ASSERT(database);
    if (!database) {
        outputBus->zero();
        return;
    }

    // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth.
    double azimuth = -desiredAzimuth;

    bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0;
    ASSERT(isAzimuthGood);
    if (!isAzimuthGood) {
        outputBus->zero();
        return;
    }

    // Normally, we'll just be dealing with mono sources.
    // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF.
    const AudioChannel* inputChannelL = inputBus->channelByType(AudioBus::ChannelLeft);
    const AudioChannel* inputChannelR = numInputChannels > 1 ? inputBus->channelByType(AudioBus::ChannelRight) : 0;

    // Get source and destination pointers.
    const float* sourceL = inputChannelL->data();
    const float* sourceR = numInputChannels > 1 ? inputChannelR->data() : sourceL;
    float* destinationL = outputBus->channelByType(AudioBus::ChannelLeft)->mutableData();
    float* destinationR = outputBus->channelByType(AudioBus::ChannelRight)->mutableData();

    double azimuthBlend;
    int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);

    // This algorithm currently requires that we process in power-of-two size chunks at least 128.
    ASSERT(1UL << static_cast<int>(log2(framesToProcess)) == framesToProcess);
    ASSERT(framesToProcess >= 128);
    
    const unsigned framesPerSegment = 128;
    const unsigned numberOfSegments = framesToProcess / framesPerSegment;

    for (unsigned segment = 0; segment < numberOfSegments; ++segment) {
        if (m_isFirstRender) {
            // Snap exactly to desired position (first time and after reset()).
            m_azimuthIndex = desiredAzimuthIndex;
            m_isFirstRender = false;
        } else {
            // Each segment renders with an azimuth index closer by one to the desired azimuth index.
            // Because inter-aural time delay is mostly a factor of azimuth and the delay is where the clicks and graininess come from,
            // we don't bother smoothing the elevations.
            int numberOfAzimuths = database->numberOfAzimuths();
            bool wrap = wrapDistance(m_azimuthIndex, desiredAzimuthIndex, numberOfAzimuths);
            if (wrap) {
                if (m_azimuthIndex < desiredAzimuthIndex)
                    m_azimuthIndex = (m_azimuthIndex - 1 + numberOfAzimuths) % numberOfAzimuths;
                else if (m_azimuthIndex > desiredAzimuthIndex)
                    m_azimuthIndex = (m_azimuthIndex + 1) % numberOfAzimuths;
            } else {
                if (m_azimuthIndex < desiredAzimuthIndex)
                    m_azimuthIndex = (m_azimuthIndex + 1) % numberOfAzimuths;
                else if (m_azimuthIndex > desiredAzimuthIndex)
                    m_azimuthIndex = (m_azimuthIndex - 1 + numberOfAzimuths) % numberOfAzimuths;
            }
        }
        
        // Get the HRTFKernels and interpolated delays.    
        HRTFKernel* kernelL;
        HRTFKernel* kernelR;
        double frameDelayL;
        double frameDelayR;
        database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex, elevation, kernelL, kernelR, frameDelayL, frameDelayR);

        ASSERT(kernelL && kernelR);
        if (!kernelL || !kernelR) {
            outputBus->zero();
            return;
        }
        
        ASSERT(frameDelayL / sampleRate() < MaxDelayTimeSeconds && frameDelayR / sampleRate() < MaxDelayTimeSeconds);
            
        // Calculate the source and destination pointers for the current segment.
        unsigned offset = segment * framesPerSegment;
        const float* segmentSourceL = sourceL + offset;
        const float* segmentSourceR = sourceR + offset;
        float* segmentDestinationL = destinationL + offset;
        float* segmentDestinationR = destinationR + offset;

        // First run through delay lines for inter-aural time difference.
        m_delayLineL.setDelayFrames(frameDelayL);
        m_delayLineR.setDelayFrames(frameDelayR);
        m_delayLineL.process(segmentSourceL, segmentDestinationL, framesPerSegment);
        m_delayLineR.process(segmentSourceR, segmentDestinationR, framesPerSegment);

        // Now do the convolutions in-place.
        m_convolverL.process(kernelL->fftFrame(), segmentDestinationL, segmentDestinationL, framesPerSegment);
        m_convolverR.process(kernelR->fftFrame(), segmentDestinationR, segmentDestinationR, framesPerSegment);
    }
}
Example #3
0
void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioChunk* inputBus, AudioChunk* outputBus)
{
    unsigned numInputChannels =
        inputBus->IsNull() ? 0 : inputBus->mChannelData.Length();

    MOZ_ASSERT(numInputChannels <= 2);
    MOZ_ASSERT(inputBus->mDuration == WEBAUDIO_BLOCK_SIZE);

    bool isOutputGood = outputBus && outputBus->mChannelData.Length() == 2 && outputBus->mDuration == WEBAUDIO_BLOCK_SIZE;
    MOZ_ASSERT(isOutputGood);

    if (!isOutputGood) {
        if (outputBus)
            outputBus->SetNull(outputBus->mDuration);
        return;
    }

    HRTFDatabase* database = m_databaseLoader->database();
    if (!database) { // not yet loaded
        outputBus->SetNull(outputBus->mDuration);
        return;
    }

    // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth.
    double azimuth = -desiredAzimuth;

    bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0;
    MOZ_ASSERT(isAzimuthGood);
    if (!isAzimuthGood) {
        outputBus->SetNull(outputBus->mDuration);
        return;
    }

    // Normally, we'll just be dealing with mono sources.
    // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF.

    // Get destination pointers.
    float* destinationL =
        static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0]));
    float* destinationR =
        static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1]));

    double azimuthBlend;
    int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);

    // Initially snap azimuth and elevation values to first values encountered.
    if (m_azimuthIndex1 == UninitializedAzimuth) {
        m_azimuthIndex1 = desiredAzimuthIndex;
        m_elevation1 = elevation;
    }
    if (m_azimuthIndex2 == UninitializedAzimuth) {
        m_azimuthIndex2 = desiredAzimuthIndex;
        m_elevation2 = elevation;
    }

    // Cross-fade / transition over a period of around 45 milliseconds.
    // This is an empirical value tuned to be a reasonable trade-off between
    // smoothness and speed.
    const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096;

    // Check for azimuth and elevation changes, initiating a cross-fade if needed.
    if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) {
        if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) {
            // Cross-fade from 1 -> 2
            m_crossfadeIncr = 1 / fadeFrames;
            m_azimuthIndex2 = desiredAzimuthIndex;
            m_elevation2 = elevation;
        }
    }
    if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) {
        if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) {
            // Cross-fade from 2 -> 1
            m_crossfadeIncr = -1 / fadeFrames;
            m_azimuthIndex1 = desiredAzimuthIndex;
            m_elevation1 = elevation;
        }
    }

    // Get the HRTFKernels and interpolated delays.
    HRTFKernel* kernelL1;
    HRTFKernel* kernelR1;
    HRTFKernel* kernelL2;
    HRTFKernel* kernelR2;
    double frameDelayL1;
    double frameDelayR1;
    double frameDelayL2;
    double frameDelayR2;
    database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1);
    database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2);

    bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2;
    MOZ_ASSERT(areKernelsGood);
    if (!areKernelsGood) {
        outputBus->SetNull(outputBus->mDuration);
        return;
    }

    MOZ_ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds);
    MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);

    // Crossfade inter-aural delays based on transitions.
    double frameDelaysL[WEBAUDIO_BLOCK_SIZE];
    double frameDelaysR[WEBAUDIO_BLOCK_SIZE];
    {
      float x = m_crossfadeX;
      float incr = m_crossfadeIncr;
      for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
        frameDelaysL[i] = (1 - x) * frameDelayL1 + x * frameDelayL2;
        frameDelaysR[i] = (1 - x) * frameDelayR1 + x * frameDelayR2;
        x += incr;
      }
    }

    // First run through delay lines for inter-aural time difference.
    m_delayLine.Write(*inputBus);
    // "Speakers" means a mono input is read into both outputs (with possibly
    // different delays).
    m_delayLine.ReadChannel(frameDelaysL, outputBus, 0,
                            ChannelInterpretation::Speakers);
    m_delayLine.ReadChannel(frameDelaysR, outputBus, 1,
                            ChannelInterpretation::Speakers);
    m_delayLine.NextBlock();

    bool needsCrossfading = m_crossfadeIncr;

    // Have the convolvers render directly to the final destination if we're not cross-fading.
    float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.Elements() : destinationL;
    float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.Elements() : destinationR;
    float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.Elements() : destinationL;
    float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.Elements() : destinationR;

    // Now do the convolutions.
    // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading.

    if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
        m_convolverL1.process(kernelL1->fftFrame(), destinationL, convolutionDestinationL1, WEBAUDIO_BLOCK_SIZE);
        m_convolverR1.process(kernelR1->fftFrame(), destinationR, convolutionDestinationR1, WEBAUDIO_BLOCK_SIZE);
    }

    if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
        m_convolverL2.process(kernelL2->fftFrame(), destinationL, convolutionDestinationL2, WEBAUDIO_BLOCK_SIZE);
        m_convolverR2.process(kernelR2->fftFrame(), destinationR, convolutionDestinationR2, WEBAUDIO_BLOCK_SIZE);
    }

    if (needsCrossfading) {
        // Apply linear cross-fade.
        float x = m_crossfadeX;
        float incr = m_crossfadeIncr;
        for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
            destinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i];
            destinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i];
            x += incr;
        }
        // Update cross-fade value from local.
        m_crossfadeX = x;

        if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) {
            // We've fully made the crossfade transition from 1 -> 2.
            m_crossfadeSelection = CrossfadeSelection2;
            m_crossfadeX = 1;
            m_crossfadeIncr = 0;
        } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) {
            // We've fully made the crossfade transition from 2 -> 1.
            m_crossfadeSelection = CrossfadeSelection1;
            m_crossfadeX = 0;
            m_crossfadeIncr = 0;
        }
    }
}