bool Model::initialize(const SignalBank &input) { outputModules_.clear(); modules_.clear(); if(!initializeInternal(input)) { LOUDNESS_ERROR(name_ << ": Not initialised!"); return 0; } else { if (outputModules_.empty()) { LOUDNESS_ERROR(name_ << ": Invalid, this Model has no outputs."); return 0; } LOUDNESS_DEBUG(name_ << ": initialised."); nModules_ = (int)modules_.size(); //initialise all from root module modules_[0] -> initialize(input); configureSignalBankAggregation(); LOUDNESS_DEBUG(name_ << ": Module targets set and initialised."); initialized_ = 1; return 1; } }
void DynamicLoudnessCH2012::configureModelParameters(const string& setName) { //common to all setRate(1000); setOuterEarType(OME::ANSIS342007_FREEFIELD); setSpectrumSampledUniformly(true); setHoppingGoertzelDFTUsed(false); setExcitationPatternInterpolated(false); setInterpolationCubic(true); setSpecificLoudnessOutput(true); setBinauralInhibitionUsed(true); setPresentationDiotic(true); setFirstSampleAtWindowCentre(true); setFilterSpacingInCams(0.1); setCompressionCriterionInCams(0.0); attackTimeSTL_ = 0.016; releaseTimeSTL_ = 0.032; attackTimeLTL_ = 0.1; releaseTimeLTL_ = 2.0; if (setName == "Faster") { setFilterSpacingInCams(0.5); setCompressionCriterionInCams(0.3); LOUDNESS_DEBUG(name_ << ": using a filter spacing of 0.5 Cams" << " with 0.3 Cam spectral compression criterion."); } else if (setName != "CH2012") { configureModelParameters("CH2012"); LOUDNESS_DEBUG(name_ << "Using Settings from Chen and Hu 2012 paper."); } }
void Window::normaliseWindow(RealVec &window, const Normalisation& normalisation) { if (normalisation != NONE) { double x = 0.0; double sum = 0.0, sumSquares = 0.0; double normFactor = 1.0; uint wSize = window.size(); for(uint i=0; i < wSize; i++) { x = window[i]; sum += x; sumSquares += x*x; } switch (normalisation) { case (ENERGY): normFactor = sqrt(wSize/sumSquares); LOUDNESS_DEBUG(name_ << ": Normalising for energy."); break; case (AMPLITUDE): normFactor = wSize/sum; LOUDNESS_DEBUG(name_ << ": Normalising for amplitude."); break; default: normFactor = sqrt(wSize/sumSquares); } LOUDNESS_DEBUG(name_ << ": Normalising window using factor: " << normFactor); for(uint i=0; i < wSize; i++) window[i] *= normFactor; } }
bool Model::initialize(const TrackBank &input) { if(!initializeInternal(input)) { LOUDNESS_ERROR(name_ << ": Not initialised!"); return 0; } else { LOUDNESS_DEBUG(name_ << ": initialised."); //set up the chain nModules_ = (int)modules_.size(); for(int i=0; i<nModules_-1; i++) modules_[i]->setTargetModule(modules_[i+1].get()); //initialise all modules_[0]->initialize(input); LOUDNESS_DEBUG(name_ << ": Targets set and all modules initialised."); initialized_ = 1; return 1; } }
void Window::generateWindow(RealVec &window, const WindowType& windowType, bool periodic) { switch (windowType_) { case HANN: hann(window, periodic); LOUDNESS_DEBUG(name_ << ": Using a Hann window."); break; default: hann(window, periodic); LOUDNESS_DEBUG(name_ << ": Using a Hann window."); break; } }
bool RoexBankANSIS342007::initializeInternal(const SignalBank &input) { //number of input components int nChannels = input.getNChannels(); //number of roex filters to use nFilters_ = std::floor((camHi_ - camLo_) / camStep_) + 1; LOUDNESS_DEBUG(name_ << " filter spacing in Cams: " << camStep_ << " Total number of filters: " << nFilters_); //initialize output SignalBank output_.initialize (input.getNSources(), input.getNEars(), nFilters_, 1, input.getFs()); output_.setFrameRate (input.getFrameRate()); output_.setChannelSpacingInCams (camStep_); //see ANSI S3.4 2007 p.11 const Real p51_1k = 4000.0 / centreFreqToCambridgeERB (1000.0); //pcomp is slope per component pcomp_.assign (nChannels, 0.0); //comp_level holds level per ERB on each component compLevel_.assign (nChannels, 0.0); //p upper is level invariant pu_.assign (nFilters_, 0.0); //p lower is level dependent pl_.assign (nFilters_, 0.0); //fill the above arrays Real cam = 0.0, erb = 0.0, fc = 0.0; for (int i = 0; i < nChannels; ++i) { fc = input.getCentreFreq(i); erb = centreFreqToCambridgeERB (fc); pcomp_[i] = 4.0 * fc / erb; } for (int i = 0; i < nFilters_; ++i) { //filter frequency in Cams cam = camLo_ + (i * camStep_); //filter frequency in Hz fc = camToHertz (cam); //get the ERB of the filter erb = centreFreqToCambridgeERB (fc); //ANSI S3.4 sec 3.5 p.11 pu_[i] = 4.0 * fc / erb; pl_[i] = 0.35 * (pu_[i] / p51_1k); output_.setCentreFreq (i, fc); } return 1; }
Model::Model(string name, bool isDynamic) : name_(name), isDynamic_(isDynamic), rate_(0.0) { LOUDNESS_DEBUG(name_ << ": Constructed."); }
ARAverager::ARAverager(Real attackTime, Real releaseTime) : Module("ARAverager"), attackTime_(attackTime), releaseTime_(releaseTime) { LOUDNESS_DEBUG(name_ << ": Constructed."); }
bool SpecificLoudnessANSIS342007::initializeInternal(const SignalBank &input) { LOUDNESS_ASSERT(input.getNChannels() > 1, name_ << ": Insufficient number of input channels."); //c value from ANSI 2007 parameterC_ = 0.046871; if (updateParameterCForBinauralInhibition_) { parameterC_ /= 0.75; LOUDNESS_DEBUG(name_ << ": Scaling parameter C for binaural inhibition model: " << parameterC_); } //Number of filters below 500Hz nFiltersLT500_ = 0; Real eThrqdB500Hz = internalExcitation(500); //fill loudness parameter vectors for (int i = 0; i < input.getNChannels(); i++) { Real fc = input.getCentreFreq(i); if (fc < 500) { Real eThrqdB = internalExcitation(fc); eThrqParam_.push_back(pow(10, eThrqdB/10.0)); Real gdB = eThrqdB500Hz - eThrqdB; parameterG_.push_back(pow(10, gdB/10.0)); parameterA_.push_back(gdBToA(gdB)); parameterAlpha_.push_back(gdBToAlpha(gdB)); nFiltersLT500_++; } } LOUDNESS_DEBUG(name_ << ": number of filters <500 Hz: " << nFiltersLT500_); //output SignalBank output_.initialize(input); return 1; }
bool Window::initializeInternal(const SignalBank &input) { LOUDNESS_ASSERT(input.getNSamples() == length_[0], name_ << ": Number of input samples does not equal the largest window size!"); //number of windows nWindows_ = (int)length_.size(); LOUDNESS_DEBUG(name_ << ": Number of windows = " << nWindows_); window_.resize(nWindows_); //Largest window should be the first largestWindowSize_ = length_[0]; LOUDNESS_DEBUG(name_ << ": Largest window size = " << largestWindowSize_); //first window (largest) does not require a shift windowOffset_.push_back(0); //check if we are using multi windows on one input channel int nOutputChannels = input.getNChannels(); if((input.getNChannels()==1) && (nWindows_>1)) { LOUDNESS_DEBUG(name_ << ": Using parallel windows"); parallelWindows_ = true; nOutputChannels = nWindows_; //if so, calculate the delay int alignmentSample = largestWindowSize_ / 2; LOUDNESS_DEBUG(name_ << ": Alignment sample = " << alignmentSample); for(int w=1; w<nWindows_; w++) { int thisCentreSample = length_[w] / 2; int thisWindowOffset = alignmentSample - thisCentreSample; windowOffset_.push_back(thisWindowOffset); LOUDNESS_DEBUG(name_ << ": Centre sample for window " << w << " = " << thisCentreSample); LOUDNESS_DEBUG(name_ << ": Offset for window " << w << " = " << thisWindowOffset); } } else { LOUDNESS_ASSERT(input.getNChannels() == nWindows_, "Multiple channels but incorrect window specification."); } //generate the normalised window functions for (int w=0; w<nWindows_; w++) { window_[w].assign(length_[w],0.0); generateWindow(window_[w], windowType_, periodic_); normaliseWindow(window_[w], normalisation_); LOUDNESS_DEBUG(name_ << ": Length of window " << w << " = " << window_[w].size()); } //initialise the output signal output_.initialize(input.getNEars(), nOutputChannels, largestWindowSize_, input.getFs()); output_.setFrameRate(input.getFrameRate()); return 1; }
void Model::configureSignalBankAggregation() { for (const auto &outputName : outputsToAggregate_) { auto search = outputModules_.find(outputName); if (search != outputModules_.end()) { LOUDNESS_DEBUG(name_ << ": Aggregating : " << search -> first); search -> second -> setOutputAggregated(true); } } }
void DynamicLoudnessGM2002::configureSmoothingTimes(const string& author) { if (author == "GM2002") { attackTimeSTL_ = -0.001/log(1-0.045); releaseTimeSTL_ = -0.001/log(1-0.02); attackTimeLTL_ = -0.001/log(1-0.01); releaseTimeLTL_ = -0.001/log(1-0.0005); LOUDNESS_DEBUG(name_ << ": Time-constants from 2002 paper"); } else if (author == "MGS2003") { attackTimeSTL_ = -0.001/log(1-0.045); releaseTimeSTL_ = -0.001/log(1-0.02); attackTimeLTL_ = -0.001/log(1-0.01); releaseTimeLTL_ = -0.001/log(1-0.005); LOUDNESS_DEBUG(name_ << ": Modified time-constants from 2003 paper"); } else { configureSmoothingTimes("GM2002"); } }
bool SpecificPartialLoudnessMGB1997::initializeInternal(const SignalBank &input) { //c value from ANSI 2007 parameterC_ = 0.046871; if (useANSISpecificLoudness_) { yearExp_ = 0.2; parameterC2_ = parameterC_ / std::pow(1.0707, 0.5); } else { yearExp_ = 0.5; parameterC2_ = parameterC_ / std::pow(1040000.0, 0.5); } if (updateParameterCForBinauralInhibition_) { parameterC_ /= 0.75; LOUDNESS_DEBUG(name_ << ": Scaling parameter C for binaural inhibition model: " << parameterC_); } Real eThrqdB500Hz = internalExcitation(500); //fill loudness parameter vectors for (int i = 0; i < input.getNChannels(); i++) { Real fc = input.getCentreFreq (i); Real eThrqdB = internalExcitation (fc); Real gdB = eThrqdB500Hz - eThrqdB; eThrqParam_.push_back (std::pow (10, eThrqdB / 10.0)); gParam_.push_back (std::pow (10, gdB / 10.0)); aParam_.push_back (gdBToA (gdB)); alphaParam_.push_back (gdBToAlpha (gdB)); kParam_.push_back (std::pow (10, kdB (fc) / 10.0)); } //output SignalBank output_.initialize (input); return 1; }
bool ARAverager::initializeInternal(const SignalBank &input) { //filter coefficients attackCoef_ = 1 - exp(-1.0 / (input.getFrameRate() * attackTime_)); releaseCoef_ = 1 - exp(-1.0 / (input.getFrameRate() * releaseTime_)); LOUDNESS_DEBUG(name_ << ": Input frame rate: " << input.getFrameRate() << ". Attack time: " << attackTime_ << ". Attack coefficient: " << attackCoef_ << ". Release time: " << releaseTime_ << ". Release coefficient: " << releaseCoef_); //output SignalBank output_.initialize(input); return 1; }
bool DynamicPartialLoudnessGM::initializeInternal(const TrackBank &input) { /* * Outer-Middle ear filter */ //if filter coefficients have not been provided //use spectral weighting to approximate outer and middle ear response bool weightSpectrum = false; if(pathToFilterCoefs_.empty()) { LOUDNESS_WARNING(name_ << ": No filter coefficients, opting to weight power spectrum."); weightSpectrum = true; //should we use for HPF for low freqs? default is true if(hpf_) { LOUDNESS_DEBUG(name_ << ": Using HPF."); modules_.push_back(unique_ptr<Module> (new Butter(3, 0, 50.0))); } } else { //otherwise, load them //load numpy array holding the filter coefficients cnpy::NpyArray arr = cnpy::npy_load(pathToFilterCoefs_); Real *data = reinterpret_cast<Real*> (arr.data); //check if filter is IIR or FIR bool iir = false; if(arr.shape[0]==2) iir = true; //load the coefficients RealVec bCoefs, aCoefs; for(unsigned int i=0; i<arr.shape[1];i++) { bCoefs.push_back(data[i]); if(iir) aCoefs.push_back(data[i+arr.shape[1]]); } //create module if(iir) modules_.push_back(unique_ptr<Module> (new IIR(input.getNTracks(), bCoefs, aCoefs))); else modules_.push_back(unique_ptr<Module> (new FIR(bCoefs))); //clean up delete [] data; } /* * Frame generator for spectrogram */ int windowSize = round(0.064*input.getFs()); int hopSize = round(timeStep_*input.getFs()); modules_.push_back(unique_ptr<Module> (new FrameGenerator(windowSize, hopSize))); /* * Multi-resolution spectrogram */ RealVec bandFreqsHz {10, 80, 500, 1250, 2540, 4050, 15001}; //window spec RealVec windowSizeSecs {0.064, 0.032, 0.016, 0.008, 0.004, 0.002}; //create appropriate power spectrum module if(stereoToMono_) { modules_.push_back(unique_ptr<Module> (new PowerSpectrumAndSpatialDetection(bandFreqsHz, windowSizeSecs, uniform_))); } else { modules_.push_back(unique_ptr<Module> (new PowerSpectrum(bandFreqsHz, windowSizeSecs, uniform_))); } /* * Compression */ if(compressionCriterion_ > 0) modules_.push_back(unique_ptr<Module> (new CompressSpectrum(compressionCriterion_))); /* * Spectral weighting if necessary */ if(weightSpectrum) { OME::MiddleEarType middleEar = OME::ANSI; OME::OuterEarType outerEar = OME::ANSI_FREE; if(hpf_) middleEar = OME::ANSI_HPF; if(diffuseField_) outerEar = OME::ANSI_DIFFUSE; modules_.push_back(unique_ptr<Module> (new WeightSpectrum(middleEar, outerEar))); } /* * Roex filters */ if(fastBank_) { modules_.push_back(unique_ptr<Module> (new FastRoexBank(filterSpacing_, interpRoexBank_))); } else { modules_.push_back(unique_ptr<Module> (new RoexBankANSIS3407(1.8, 38.9, filterSpacing_))); } /* * Specific loudness */ modules_.push_back(unique_ptr<Module> (new SpecificPartialLoudnessGM())); /* * Loudness integration */ modules_.push_back(unique_ptr<Module> (new IntegratedPartialLoudnessGM(diotic_, true))); return 1; }
bool DynamicLoudnessGM2002::initializeInternal(const SignalBank &input) { /* * Outer-Middle ear filter */ //if filter coefficients have not been provided //use spectral weighting to approximate outer and middle ear response bool weightSpectrum = false; if (pathToFilterCoefs_.empty()) { weightSpectrum = true; //should we use for useHpf for low freqs? default is true if (middleEarFilter_ == OME::ANSIS342007_MIDDLE_EAR_HPF) { modules_.push_back(unique_ptr<Module> (new Butter(3, 0, 50.0))); } } else { //otherwise, load them //load numpy array holding the filter coefficients cnpy::NpyArray arr = cnpy::npy_load(pathToFilterCoefs_); Real *data = reinterpret_cast<Real*> (arr.data); //check if filter is IIR or FIR bool iir = false; if(arr.shape[0]==2) iir = true; //load the coefficients RealVec bCoefs, aCoefs; for(unsigned int i=0; i<arr.shape[1];i++) { bCoefs.push_back(data[i]); if(iir) aCoefs.push_back(data[i+arr.shape[1]]); } //create module if(iir) modules_.push_back(unique_ptr<Module> (new IIR(bCoefs, aCoefs))); else modules_.push_back(unique_ptr<Module> (new FIR(bCoefs))); //clean up delete [] data; } /* * Multi-resolution spectrogram * 10 Hz to include energy caused by sidebands for frequencies near * 20Hz but exclude DC. * 15001 Hz so top frequencies included. */ RealVec bandFreqsHz {10, 80, 500, 1250, 2540, 4050, 15001}; //window spec RealVec windowSizeSecs {0.064, 0.032, 0.016, 0.008, 0.004, 0.002}; vector<int> windowSizeSamples(6, 0); //round to nearest sample and force to be even such that centre samples //are aligned (using periodic Hann window) for (int w = 0; w < 6; w++) { if (isSpectralResolutionDoubled_) windowSizeSecs[w] *= 2; windowSizeSamples[w] = (int)round(windowSizeSecs[w] * input.getFs()); windowSizeSamples[w] += windowSizeSamples[w] % 2; } // hop size to the nearest sample int hopSize = round(input.getFs() / rate_); //power spectrum if (isHoppingGoertzelDFTUsed_) { compressionCriterionInCams_ = 0; modules_.push_back(unique_ptr<Module> (new HoppingGoertzelDFT(bandFreqsHz, windowSizeSamples, hopSize, true, true))); //outputModules_["PowerSpectrum"] = modules_.back().get(); } else { modules_.push_back(unique_ptr<Module> (new FrameGenerator(windowSizeSamples[0], hopSize, isFirstSampleAtWindowCentre_))); //windowing: Periodic hann window modules_.push_back(unique_ptr<Module> (new Window(Window::HANN, windowSizeSamples, true))); modules_.push_back(unique_ptr<Module> (new PowerSpectrum(bandFreqsHz, windowSizeSamples, isSpectrumSampledUniformly_))); } /* * Compression */ if((compressionCriterionInCams_ > 0) && (isSpectrumSampledUniformly_)) { modules_.push_back(unique_ptr<Module> (new CompressSpectrum(compressionCriterionInCams_))); } /* * Spectral weighting if necessary */ if (weightSpectrum) { if ((middleEarFilter_ != OME::NONE) || (outerEarFilter_ != OME::NONE)) { modules_.push_back(unique_ptr<Module> (new WeightSpectrum(middleEarFilter_, outerEarFilter_))); } } int lastSpectrumIdx = modules_.size()-1; /* * Roex filters */ if(isRoexBankFast_) { modules_.push_back(unique_ptr<Module> (new FastRoexBank(filterSpacingInCams_, isExcitationPatternInterpolated_, isInterpolationCubic_))); } else { modules_.push_back(unique_ptr<Module> (new RoexBankANSIS342007(1.8, 38.9, filterSpacingInCams_))); } outputModules_["Excitation"] = modules_.back().get(); /* * Specific loudness */ isBinauralInhibitionUsed_ = isBinauralInhibitionUsed_ * (input.getNEars() == 2); modules_.push_back(unique_ptr<Module> (new SpecificLoudnessANSIS342007(isSpecificLoudnessANSIS342007_, isBinauralInhibitionUsed_))); /* * Binaural inhibition */ if (isBinauralInhibitionUsed_) { modules_.push_back(unique_ptr<Module> (new BinauralInhibitionMG2007)); } outputModules_["SpecificLoudness"] = modules_.back().get(); /* * Instantaneous loudness */ modules_.push_back(unique_ptr<Module> (new InstantaneousLoudness(1.0, isPresentationDiotic_))); outputModules_["InstantaneousLoudness"] = modules_.back().get(); /* * Short-term loudness */ modules_.push_back(unique_ptr<Module> (new ARAverager(attackTimeSTL_, releaseTimeSTL_))); outputModules_["ShortTermLoudness"] = modules_.back().get(); /* * Long-term loudness */ modules_.push_back(unique_ptr<Module> (new ARAverager(attackTimeLTL_, releaseTimeLTL_))); outputModules_["LongTermLoudness"] = modules_.back().get(); //configure targets configureLinearTargetModuleChain(); // Masking conditions if ((input.getNSources() > 1) && (isPartialLoudnessUsed_)) { LOUDNESS_DEBUG(name_ << ": Setting up modules for partial loudness..."); // Excitation transformation based on all sources modules_.push_back(unique_ptr<Module> (new MultiSourceRoexBank(filterSpacingInCams_))); outputModules_["MultiSourceExcitation"] = modules_.back().get(); int moduleIdx = modules_.size() - 1; // Push spectrum to second excitation transformation stage Module* ptrToWeightedSpectrum = modules_[lastSpectrumIdx].get(); ptrToWeightedSpectrum -> addTargetModule (*outputModules_["MultiSourceExcitation"]); // Partial loudness modules_.push_back(unique_ptr<Module> (new SpecificPartialLoudnessMGB1997( isSpecificLoudnessANSIS342007_, isBinauralInhibitionUsed_))); if (isBinauralInhibitionUsed_) modules_.push_back(unique_ptr<Module> (new BinauralInhibitionMG2007)); outputModules_["SpecificPartialLoudness"] = modules_.back().get(); /* * Instantaneous partial loudness */ modules_.push_back(unique_ptr<Module> (new InstantaneousLoudness(1.0, isPresentationDiotic_))); outputModules_["InstantaneousPartialLoudness"] = modules_.back().get(); /* * Short-term partial loudness */ modules_.push_back(unique_ptr<Module> (new ARAverager(attackTimeSTL_, releaseTimeSTL_))); outputModules_["ShortTermPartialLoudness"] = modules_.back().get(); /* * Long-term partial loudness */ modules_.push_back(unique_ptr<Module> (new ARAverager(attackTimeLTL_, releaseTimeLTL_))); outputModules_["LongTermPartialLoudness"] = modules_.back().get(); // configure targets for second (parallel) chain configureLinearTargetModuleChain(moduleIdx); } return 1; }
void DynamicLoudnessGM2002::configureModelParameters(const string& setName) { //common to all setRate(1000); setOuterEarFilter(OME::ANSIS342007_FREEFIELD); setMiddleEarFilter(OME::ANSIS342007_MIDDLE_EAR_HPF); setSpectrumSampledUniformly(true); setHoppingGoertzelDFTUsed(false); setSpectralResolutionDoubled(false); setExcitationPatternInterpolated(false); setInterpolationCubic(true); setFilterSpacingInCams(0.25); setCompressionCriterionInCams(0.0); setRoexBankFast(false); setSpecificLoudnessANSIS342007(false); setFirstSampleAtWindowCentre(true); setPresentationDiotic(true); setBinauralInhibitionUsed(true); setPartialLoudnessUsed(true); configureSmoothingTimes("GM2002"); if (setName != "GM2002") { if (setName == "Faster") { setRoexBankFast(true); setExcitationPatternInterpolated(true); setFilterSpacingInCams(0.5); setCompressionCriterionInCams(0.2); LOUDNESS_DEBUG(name_ << ": Using faster params for Glasberg and Moore's 2002 model."); } else if (setName == "Recent") { configureSmoothingTimes("MGS2003"); setSpecificLoudnessANSIS342007(true); LOUDNESS_DEBUG(name_ << ": Using updated " << "time-constants from 2003 paper and high-level specific " << "loudness equation (ANSI S3.4 2007)."); } else if (setName == "FasterAndRecent") { configureSmoothingTimes("MGS2003"); setSpecificLoudnessANSIS342007(true); setRoexBankFast(true); setExcitationPatternInterpolated(true); setFilterSpacingInCams(0.5); setCompressionCriterionInCams(0.2); LOUDNESS_DEBUG(name_ << ": Using faster params and " << "updated time-constants from 2003 paper and " << "high-level specific loudness equation (ANSI S3.4 2007)."); } else if (setName == "WEAR2015") { setSpecificLoudnessANSIS342007(true); setRoexBankFast(true); setExcitationPatternInterpolated(false); setFilterSpacingInCams(1.25); setCompressionCriterionInCams(0.7); LOUDNESS_DEBUG(name_ << ": Using faster params as per Ward et al. (2015) and " << "high-level specific loudness equation (ANSI S3.4 2007)."); } else { LOUDNESS_DEBUG(name_ << ": Using original params from Glasberg and Moore 2002."); } } }
bool DynamicLoudnessCH2012::initializeInternal(const SignalBank &input) { //if filter coefficients have not been provided //use spectral weighting to approximate outer and middle ear response if(!pathToFilterCoefs_.empty()) { //load numpy array holding the filter coefficients cnpy::NpyArray arr = cnpy::npy_load(pathToFilterCoefs_); Real *data = reinterpret_cast<Real*> (arr.data); //check if filter is IIR or FIR bool iir = false; if(arr.shape[0]==2) iir = true; //load the coefficients RealVec bCoefs, aCoefs; for(unsigned int i=0; i<arr.shape[1];i++) { bCoefs.push_back(data[i]); if(iir) aCoefs.push_back(data[i+arr.shape[1]]); } //create module if(iir) { modules_.push_back(unique_ptr<Module> (new IIR(bCoefs, aCoefs))); } else { modules_.push_back(unique_ptr<Module> (new FIR(bCoefs))); } //clean up delete [] data; } /* * Multi-resolution spectrogram */ RealVec bandFreqsHz {10, 80, 500, 1250, 2540, 4050, 16001}; //window spec RealVec windowSizeSecs {0.128, 0.064, 0.032, 0.016, 0.008, 0.004}; vector<int> windowSizeSamples(6,0); //round to nearest sample and force to be even such that centre samples //are aligned (using periodic Hann window) for(int w=0; w<6; w++) { windowSizeSamples[w] = (int)round(windowSizeSecs[w] * input.getFs()); windowSizeSamples[w] += windowSizeSamples[w] % 2; } // hop size to the nearest sample int hopSize = round(input.getFs() / rate_); //power spectrum if (isHoppingGoertzelDFTUsed_) { compressionCriterionInCams_ = 0; modules_.push_back(unique_ptr<Module> (new HoppingGoertzelDFT(bandFreqsHz, windowSizeSamples, hopSize, true, true))); } else { modules_.push_back(unique_ptr<Module> (new FrameGenerator(windowSizeSamples[0], hopSize, isFirstSampleAtWindowCentre_))); //windowing: Periodic hann window modules_.push_back(unique_ptr<Module> (new Window(Window::HANN, windowSizeSamples, true))); modules_.push_back(unique_ptr<Module> (new PowerSpectrum(bandFreqsHz, windowSizeSamples, isSpectrumSampledUniformly_))); } /* * Compression */ if((compressionCriterionInCams_ > 0) && (isSpectrumSampledUniformly_)) { modules_.push_back(unique_ptr<Module> (new CompressSpectrum(compressionCriterionInCams_))); } /* * Spectral weighting */ if(pathToFilterCoefs_.empty()) { modules_.push_back(unique_ptr<Module> (new WeightSpectrum(OME::CHGM2011_MIDDLE_EAR, outerEarType_))); } /* * Roex filters */ // Set up scaling factors depending on output config Real doubleRoexBankfactor, instantaneousLoudnessFactor; if (isSpecificLoudnessOutput_) { doubleRoexBankfactor = 1.53e-8; instantaneousLoudnessFactor = 1.0; LOUDNESS_DEBUG(name_ << ": Excitation pattern will be scaled for specific loudness"); } else { doubleRoexBankfactor = 1.0; instantaneousLoudnessFactor = 1.53e-8; } isBinauralInhibitionUsed_ = isBinauralInhibitionUsed_ * (input.getNEars() == 2) * isSpecificLoudnessOutput_; if (isBinauralInhibitionUsed_) doubleRoexBankfactor /= 0.75; modules_.push_back(unique_ptr<Module> (new DoubleRoexBank(1.5, 40.2, filterSpacingInCams_, doubleRoexBankfactor, isExcitationPatternInterpolated_, isInterpolationCubic_))); /* * Binaural inhibition */ if (isBinauralInhibitionUsed_) { modules_.push_back(unique_ptr<Module> (new BinauralInhibitionMG2007)); } else { LOUDNESS_DEBUG(name_ << ": No binaural inhibition."); } outputModules_["SpecificLoudness"] = modules_.back().get(); /* * Instantaneous loudness */ modules_.push_back(unique_ptr<Module> (new InstantaneousLoudness(instantaneousLoudnessFactor, isPresentationDiotic_))); outputModules_["InstantaneousLoudness"] = modules_.back().get(); /* * Short-term loudness */ modules_.push_back(unique_ptr<Module> (new ARAverager(attackTimeSTL_, releaseTimeSTL_))); outputModules_["ShortTermLoudness"] = modules_.back().get(); /* * Long-term loudness */ modules_.push_back(unique_ptr<Module> (new ARAverager(attackTimeLTL_, releaseTimeLTL_))); outputModules_["LongTermLoudness"] = modules_.back().get(); //configure targets configureLinearTargetModuleChain(); //Option to provide PeakFollower if (isPeakSTLFollowerUsed_) { modules_.push_back(unique_ptr<Module> (new PeakFollower(2.0))); outputModules_["PeakShortTermLoudness"] = modules_.back().get(); outputModules_["ShortTermLoudness"] -> addTargetModule (*outputModules_["PeakShortTermLoudness"]); } return 1; }
OME::OME(const Filter& middleEarType, const Filter& outerEarType) : middleEarType_(middleEarType), outerEarType_(outerEarType) { LOUDNESS_DEBUG("OME: Constructed."); }
bool CompressSpectrum::initializeInternal(const SignalBank &input) { LOUDNESS_ASSERT(input.getNChannels() > 1, name_ << ": Insufficient number of channels."); /* * This code is sloppy due to along time spent figuring how * to implement the damn thing. * It's currently in two parts, one that searches for the limits of each * summation range in order to satisfy summation criterion. * The other that finds the average Centre frequencies per compressed band. */ int nChannels = input.getNChannels(); int i=0, binIdxPrev = 0; Real dif = hertzToCam(input.getCentreFreq(1)) - hertzToCam(input.getCentreFreq(0)); int groupSize = max(2.0, std::floor(alpha_/(dif))); int groupSizePrev = groupSize; vector<int> groupSizeStore, binIdx; while(i < nChannels-1) { //compute different between adjacent bins on Cam scale dif = hertzToCam(input.getCentreFreq(i+1)) - hertzToCam(input.getCentreFreq(i)); //Check if we can sum bins in group size if(dif < (alpha_/double(groupSize))) { /* * from here we can group bins in groupSize * whilst maintaining alpha spacing */ //Check we have zero idx if((binIdx.size() < 1) && (i>0)) { binIdx.push_back(0); groupSizeStore.push_back(1); } /* * This line ensures the next group starts at the next multiple of the previous * groupSize above the previous starting position. * This is why you sometimes get finer resolution than the criterion */ int store = ceil((i-binIdxPrev)/double(groupSizePrev))*groupSizePrev+binIdxPrev; /* * This line is cheeky; it re-evaluates the groupSize at the new multiple * in attempt to maintain alpha spacing, I'm not 100% but the algorithm * seems to satisfy various criteria */ if((store > 0) && (store < nChannels)) { dif = hertzToCam(input.getCentreFreq(store)) - hertzToCam(input.getCentreFreq(store-1)); groupSize = max((double)groupSize, std::floor(alpha_/dif)); } //fill variables groupSizePrev = groupSize; binIdxPrev = store; //storage binIdx.push_back(store); groupSizeStore.push_back(groupSize); //print "Bin: %d, Binnew: %d, composite bin size: %d" % (i, store, groupSize) //Move i along i = store+groupSize; //increment groupSize for wider group groupSize += 1; } else i += 1; } //add the final frequency if(binIdx[binIdx.size()-1] < nChannels) binIdx.push_back(nChannels); //PART 2 //compressed spectrum RealVec cfs; Real fa = 0; int count = 0; int j = 0; i = 0; while(i < nChannels) { //bounds check out? if(i<binIdx[j+1]) { fa += input.getCentreFreq(i); count++; if (count==groupSizeStore[j]) { //upper limit upperBandIdx_.push_back(i+1); //+1 for < conditional //set the output frequency cfs.push_back(fa/count); count = 0; fa = 0; } i++; } else j++; } //add the final component if it didn't make it if (count>0) { cfs.push_back(fa/count); upperBandIdx_.push_back(i); } //check #if defined(DEBUG) Real freqLimit = 0.0; for(unsigned int i=0; i<cfs.size()-1; i++) { if((hertzToCam(cfs[i+1]) - hertzToCam(cfs[i])) > alpha_) freqLimit = cfs[i]; } LOUDNESS_DEBUG("CompressSpectrum: Criterion satisfied above " << freqLimit << " Hz."); #endif //set output SignalBank output_.initialize(input.getNSources(), input.getNEars(), cfs.size(), 1, input.getFs()); output_.setCentreFreqs(cfs); output_.setFrameRate(input.getFrameRate()); LOUDNESS_DEBUG(name_ << ": Number of bins comprising the compressed spectrum: " << output_.getNChannels()); return 1; }
bool FastRoexBank::initializeInternal(const SignalBank &input) { //Camstep limit is 0.1 if(camStep_ <= 0.1) isExcitationPatternInterpolated_ = false; /* * Level per ERB precalculations */ rectBinIndices_.resize(input.getNChannels()); for (int i = 0; i < input.getNChannels(); ++i) { //ERB number of Centre frequency Real cam = freqToCam (input.getCentreFreq(i)); //rectangular ERB band edges in Hz Real freqLo = camToFreq (cam - 0.5); Real freqHi = camToFreq (cam + 0.5); //lower and upper bin indices rectBinIndices_[i].resize(2); rectBinIndices_[i][0] = i; rectBinIndices_[i][1] = i + 1; /* Find components falling within the band: * This follows the Fortran code in Glasberg and Moore's 1990 paper * which uses the inclusive interval [fLo, fHi]. An alternative * approach is to find the nearest DFT bins to the lower and upper * band edges, which would require a nearest search (bins may not be * uniformly spaced). Decided to go with the first prodedure for * simplicity and follow original code. */ bool first = true; int j = 0; while (j < input.getNChannels()) { if (input.getCentreFreq(j) > freqHi) break; else if (input.getCentreFreq(j) >= freqLo) { if(first) rectBinIndices_[i][0] = j; first = false; rectBinIndices_[i][1] = j + 1; } j++; } } /* * Excitation pattern variables */ //number of roex filters to use Real camLo = 1.8; Real camHi = 38.9; nFilters_ = std::floor((camHi - camLo) / camStep_) + 1; LOUDNESS_DEBUG(name_ << ": interpolation applied: " << isExcitationPatternInterpolated_ << " filter spacing in Cams: " << camStep_ << " Total number of filters: " << nFilters_); //see ANSI S3.4 2007 p.11 const Real p51_1k = 4000.0 / freqToERB (1000.0); //p upper is level invariant pu_.assign (nFilters_, 0.0); //p lower is level dependent pl_.assign (nFilters_, 0.0); //comp_level holds level per ERB on each component compLevel_.assign (input.getNChannels(), 0.0); //centre freqs in Hz fc_.assign (nFilters_, 0.0); //initialize output SignalBank if (isExcitationPatternInterpolated_) { //centre freqs in cams cams_.assign (nFilters_, 0.0); //required for log interpolation excitationLevel_.assign (nFilters_, 0.0); //372 filters over [1.8, 38.9] in 0.1 steps output_.initialize (input.getNEars(), 372, 1, input.getFs()); output_.setChannelSpacingInCams (0.1); for (int i = 0; i < 372; ++i) output_.setCentreFreq (i, camToFreq (1.8 + i * 0.1)); } else { output_.initialize (input.getNEars(), nFilters_, 1, input.getFs()); output_.setChannelSpacingInCams (camStep_); } output_.setFrameRate (input.getFrameRate()); //fill the above arrays and calculate roex filter response for upper skirt for (int i = 0; i < nFilters_; ++i) { //filter frequency in Cams Real cam = camLo + (i * camStep_); //filter frequency in Hz fc_[i] = camToFreq (cam); if (isExcitationPatternInterpolated_) cams_[i] = cam; else output_.setCentreFreq (i, fc_[i]); //some redundancy here //get the ERB of the filter Real erb = freqToERB (fc_[i]); //ANSI S3.4 sec 3.5 p.11 pu_[i] = 4.0 * fc_[i] / erb; //from Eq (3) pl_[i] = 0.35 * (pu_[i] / p51_1k); } //generate lookup table for rounded exponential generateRoexTable(1024); return 1; }
bool PowerSpectrum::initializeInternal(const SignalBank &input) { ffts_.clear(); //number of windows int nWindows = (int)windowSizes_.size(); LOUDNESS_ASSERT(input.getNChannels() == nWindows, name_ << ": Number of channels do not match number of windows"); LOUDNESS_ASSERT((int)bandFreqsHz_.size() == (nWindows + 1), name_ << ": Number of frequency bands should equal number of input channels + 1."); LOUDNESS_ASSERT(!anyAscendingValues(windowSizes_), name_ << ": Window lengths must be in descending order."); //work out FFT configuration (constrain to power of 2) int largestWindowSize = input.getNSamples(); vector<int> fftSize(nWindows, nextPowerOfTwo(largestWindowSize)); if(sampleSpectrumUniformly_) { ffts_.push_back(unique_ptr<FFT> (new FFT(fftSize[0]))); ffts_[0] -> initialize(); } else { for(int w=0; w<nWindows; w++) { fftSize[w] = nextPowerOfTwo(windowSizes_[w]); ffts_.push_back(unique_ptr<FFT> (new FFT(fftSize[w]))); ffts_[w] -> initialize(); } } //desired bins indices (lo and hi) per band bandBinIndices_.resize(nWindows); normFactor_.resize(nWindows); int fs = input.getFs(); int nBins = 0; for(int i=0; i<nWindows; i++) { //bin indices to use for compiled spectrum bandBinIndices_[i].resize(2); //These are NOT the nearest components but satisfies f_k in [f_lo, f_hi) bandBinIndices_[i][0] = ceil(bandFreqsHz_[i]*fftSize[i]/fs); // use < bandBinIndices_[i][1] to exclude upper bin bandBinIndices_[i][1] = ceil(bandFreqsHz_[i+1]*fftSize[i]/fs); LOUDNESS_ASSERT(bandBinIndices_[i][1]>0, name_ << ": No components found in band number " << i); //exclude DC and Nyquist if found int nyqIdx = (fftSize[i]/2) + (fftSize[i]%2); if(bandBinIndices_[i][0]==0) { LOUDNESS_WARNING(name_ << ": DC found...excluding."); bandBinIndices_[i][0] = 1; } if((bandBinIndices_[i][1]-1) >= nyqIdx) { LOUDNESS_WARNING(name_ << ": Bin is >= nyquist...excluding."); bandBinIndices_[i][1] = nyqIdx; } nBins += bandBinIndices_[i][1]-bandBinIndices_[i][0]; //Power spectrum normalisation Real refSquared = referenceValue_ * referenceValue_; switch (normalisation_) { case NONE: normFactor_[i] = 1.0 / refSquared; break; case ENERGY: normFactor_[i] = 2.0/(fftSize[i] * refSquared); break; case AVERAGE_POWER: normFactor_[i] = 2.0/(fftSize[i] * windowSizes_[i] * refSquared); break; default: normFactor_[i] = 2.0/(fftSize[i] * refSquared); } LOUDNESS_DEBUG(name_ << ": Normalisation factor : " << normFactor_[i]); } //total number of bins in the output spectrum LOUDNESS_DEBUG(name_ << ": Total number of bins comprising the output spectrum: " << nBins); //initialize the output SignalBank output_.initialize(input.getNEars(), nBins, 1, fs); output_.setFrameRate(input.getFrameRate()); //output frequencies in Hz int j = 0, k = 0; for(int i=0; i<nWindows; i++) { j = bandBinIndices_[i][0]; while(j < bandBinIndices_[i][1]) output_.setCentreFreq(k++, (j++)*fs/(Real)fftSize[i]); LOUDNESS_DEBUG(name_ << ": Included freq Hz (band low): " << fs * bandBinIndices_[i][0] / float(fftSize[i]) << ": Included freq Hz (band high): " << fs * (bandBinIndices_[i][1] - 1) / float(fftSize[i])); } return 1; }
bool StationaryLoudnessCHGM2011::initializeInternal(const SignalBank &input) { /* * Weighting filter */ modules_.push_back(unique_ptr<Module> (new WeightSpectrum (OME::CHGM2011_MIDDLE_EAR, outerEarFilter_))); // Set up scaling factors depending on output config Real doubleRoexBankfactor, instantaneousLoudnessFactor; if (isSpecificLoudnessOutput_) { doubleRoexBankfactor = 1.53e-8; instantaneousLoudnessFactor = 1.0; LOUDNESS_DEBUG(name_ << ": Excitation pattern will be scaled for specific loudness"); } else { doubleRoexBankfactor = 1.0; instantaneousLoudnessFactor = 1.53e-8; } isBinauralInhibitionUsed_ = isBinauralInhibitionUsed_ * (input.getNEars() == 2) * isSpecificLoudnessOutput_; if (isBinauralInhibitionUsed_) doubleRoexBankfactor /= 0.75; modules_.push_back(unique_ptr<Module> (new DoubleRoexBank(1.5, 40.2, filterSpacingInCams_, doubleRoexBankfactor, false, false))); /* * Binaural inhibition */ if (isBinauralInhibitionUsed_) { modules_.push_back(unique_ptr<Module> (new BinauralInhibitionMG2007)); } else { LOUDNESS_DEBUG(name_ << ": No binaural inhibition."); } outputModules_["SpecificLoudness"] = modules_.back().get(); /* * Instantaneous loudness */ modules_.push_back(unique_ptr<Module> (new InstantaneousLoudness(instantaneousLoudnessFactor, isPresentationDiotic_))); outputModules_["Loudness"] = modules_.back().get(); //configure targets configureLinearTargetModuleChain(); // Masking conditions if ((input.getNSources() > 1) && (isPartialLoudnessUsed_)) { LOUDNESS_DEBUG(name_ << ": Setting up modules for partial loudness..."); modules_.push_back(unique_ptr<Module> (new MultiSourceDoubleRoexBank (1.5, 40.2, filterSpacingInCams_, doubleRoexBankfactor, false, false))); int moduleIdx = modules_.size() - 1; // Push spectrum to second excitation transformation stage Module* ptrToWeightedSpectrum = modules_[0].get(); ptrToWeightedSpectrum -> addTargetModule (*modules_.back().get()); modules_.push_back(unique_ptr<Module> (new SpecificPartialLoudnessCHGM2011())); /* int moduleIdx = modules_.size() - 1; Module* ptrToExcitation = modules_[1].get(); ptrToExcitation -> addTargetModule (*modules_.back().get()); */ if (isBinauralInhibitionUsed_) { modules_.push_back(unique_ptr<Module> (new BinauralInhibitionMG2007)); } outputModules_["SpecificPartialLoudness"] = modules_.back().get(); modules_.push_back(unique_ptr<Module> (new InstantaneousLoudness(1.0, isPresentationDiotic_))); outputModules_["PartialLoudness"] = modules_.back().get(); // configure targets for second (parallel) chain configureLinearTargetModuleChain(moduleIdx); } return 1; }
Model::Model(string name, bool dynamicModel) : name_(name), dynamicModel_(dynamicModel) { LOUDNESS_DEBUG(name_ << ": Constructed."); }