int MCMCGRDiagnosticPSRF::calcDiagnosticsForLoop() { if (keepLogs) { // store the current runningSumAllChains as well runningSumOverall.push_back(runningSumAllChains); } // convergence diagnostics calculations for v's // the Ws: average, over chains, of sample variance of scalar value cxsc::real thisW = sumOfSampleVariancesOverChains/(chains * 1.0); #ifdef MYDEBUG_CALCS_EXTRA cout << "and thisW = " << thisW << endl; #endif Ws.push_back(thisW); // the Bs size_t statesForCalcs = states - statesNotInCalcs; cxsc::real thisB = (1.0/( (chains - 1) * statesForCalcs ) * ( sumOfSquaresOfRunningSums - (runningSumAllChains * runningSumAllChains/(chains * 1.0)) ) ); Bs.push_back(thisB); #ifdef MYDEBUG_CALCS //check thisB is correct, doing it the long way // runningSumPtr has one running sum for each chain RealVec chainAverages; cxsc::real accRunningSums(0.0); for (RealVecItr it = runningSum.begin(); it < runningSum.end(); ++it) { cxsc::real thisChainRunningSum = (*it); cxsc::real thisChainAv = thisChainRunningSum/(statesForCalcs * 1.0); chainAverages.push_back(thisChainAv); accRunningSums+=thisChainRunningSum; } cxsc::real overallAv = accRunningSums/(statesForCalcs * chains * 1.0); cxsc::dotprecision accDiffs(0.0); for (RealVecItr it = chainAverages.begin(); it < chainAverages.end(); ++it) { cxsc::real thisDiff = (*it) - overallAv; // sum up the squares of the differences compared to overall average cxsc::accumulate(accDiffs, thisDiff, thisDiff); } cxsc::real altB = rnd(accDiffs)*( statesForCalcs/(chains - 1.0) ); cout << "\nthisB for v's is\t" << thisB << endl; cout << "altB for v's is\t" << altB << endl; //assert(thisB == altB); #endif // the estimated var(v) cxsc::real thisVarV(0.0); if (statesForCalcs > 1) { thisVarV = statesForCalcs/(statesForCalcs-1.0) * thisW + (1.0/statesForCalcs)*thisB; } #ifndef OLDCALCMETHOD if (statesForCalcs > 1) thisVarV +=thisB/(1.0*chains*statesForCalcs); #endif estVarV.push_back(thisVarV); // the rhats cxsc::real thisRhat(0.0); // allow division by 0 if w = 0 when var does not if (thisW > 0.0 || thisVarV > 0.0) { thisRhat = thisVarV/thisW; } rhat.push_back(thisRhat); #ifdef MYDEBUG_CALCS_EXTRA cout << "thisRhat = " << thisRhat << " - press any key " << endl; getchar(); #endif if ( (thisRhat <= 1.0 + tol) && (thisRhat >= 1.0 - tol) ) { // if we have not been converged before on this scalar value if (!rhatDiagnosticFlag) { #ifdef MYDEBUG cout << "\n" << getScalarsName() << " convergence test satisfied in state " << states << " (states in calcs = " << statesForCalcs << ")"<< endl; #endif // set the flag for this scalar value rhatDiagnosticFlag = 1; } } else { // not converged on this scalar value // if we were okay on this scalar value before if (rhatDiagnosticFlag) { #ifdef MYDEBUG cout << "\n--------- Note: " << getScalarsName() << " convergence test now NOT satisfied in state " << states << endl; #endif rhatDiagnosticFlag = 0; // update the flag } } if (keepLogs) { // store the flag as well, as a real, which is a fudge... rhatFlag.push_back(rhatDiagnosticFlag); } // end of checking diagnostic for v's return rhatDiagnosticFlag; } // end calculations
//----------------------------------------------------------------------- // optimizing over smoothing parameter Temp - // getting best histogram over parameter in [t_lo, t_hi] by max/minmising // a CV based score //----------------------------------------------------------------------- bool selectPriorByLv1OutCV (RVecData & Data, double t_lo, double t_hi, int LocalMaxTempIterations, int MaxTempIterations, RealVec & Lv1OutCVScores, vector<double> & Temperatures, double & t_opt, double & Lv1OutCVScores_opt, size_t minPoints, double minVolume, int chooseStarts, int keep, bool stopOnMaxPosterior, string postFileName, string checkPostFileNameBase, string burstsFileBaseName, bool printHist, int precPQ, bool CarvingMaxPosterior, unsigned long int seedStarts) { bool success=false; int TempIterations=0; std::vector<double> LocalTemperatures; do{// outer temp iterations if (TempIterations!=0){ std::vector<size_t> indtop(Lv1OutCVScores.size()); topk(Lv1OutCVScores, indtop); double tBest =Temperatures[indtop[0]]; double t2ndBest =Temperatures[indtop[1]]; double t3rdBest =Temperatures[indtop[2]]; double tWorst = max(abs(tBest-t2ndBest),abs(tBest-t3rdBest)); t_lo = max(t_lo,tBest-tWorst); t_hi = tBest+tWorst; // cout << t_lo << " , " << t_hi << " : "<< tBest << " , " << t2ndBest << " , " << t3rdBest << endl; getchar(); if (t_hi-t_lo<0.001){ //|| abs(Lv1OutCVScores[indtop[0]]-Lv1OutCVScores[indtop[1]])<0.00001) { cout << "reaching temp values < 0.001 or Lv1OutCVScores diff < 0.00001"<<endl; getchar(); break; } } double t_Delta=(t_hi-t_lo)/double(LocalMaxTempIterations-1); LocalTemperatures.clear(); for(int i=0; i<LocalMaxTempIterations; i++){ Temperatures.push_back(t_lo+(double(i))*t_Delta); LocalTemperatures.push_back(t_lo+(double(i))*t_Delta); } int LocalTempIterations=0; do { //LogTemperaturePrior logPrior(Temperatures[TempIterations]);//t_lo); //LogTemperaturePrior logPrior(LocalTemperatures[LocalTempIterations]);//t_lo); double temperatureNow = LocalTemperatures[LocalTempIterations]; seedStarts += TempIterations; // a container for our histograms std::vector< subpavings::AdaptiveHistogram* > hists; // a container for our PCFs of histograms std::vector< subpavings::PiecewiseConstantFunction* > pcfs; bool succPQMCopt = false; //ostringstream strs; strs << temperatureNow; //string burstsFileBaseNameNow = burstsFileBaseName+"_"+strs.str(); succPQMCopt = optPQMCAdapHist (Data, temperatureNow, hists, pcfs, minPoints, minVolume, chooseStarts, keep, stopOnMaxPosterior, postFileName, checkPostFileNameBase, burstsFileBaseName, printHist, precPQ, CarvingMaxPosterior, seedStarts); real lv1outCVScore = pcfs[0]->getLeave1OutCVScore(*hists[0]); Lv1OutCVScores.push_back(-1.0*lv1outCVScore);// -1.0* so we want to max to be min LocalTempIterations++; //to free all the contents of pcfs at the end for (size_t i = 0; i < pcfs.size(); ++i) { if (NULL != pcfs[i]) delete pcfs[i]; pcfs[i] = NULL;} //to free all the contents of hists at the end if(CarvingMaxPosterior) { for (size_t i = 0; i < hists.size(); ++i) { if (NULL != hists[i]) delete hists[i]; hists[i] = NULL;} } } while (LocalTempIterations<LocalMaxTempIterations);// && CVgain>0.1); TempIterations++; } while (TempIterations<MaxTempIterations);// && CVgain>0.1); //////////////////////////////////////////////////////////////////////////////////////////////// cout << "Temperatures:" << endl; cout << Temperatures << endl; cout << "- Lv1OutCVScores: (looking for maximum of -1.0*Lv1OutCVScores)" << endl; cout << Lv1OutCVScores << endl << endl; cout << "Temp Iteration Number " << TempIterations << endl; //getchar(); cout << "MaxTempIterations = " << MaxTempIterations << endl; //getchar(); vector<size_t> indtop(Lv1OutCVScores.size()); topk(Lv1OutCVScores, indtop); t_opt = _double(Temperatures[indtop[0]]); Lv1OutCVScores_opt = _double(Lv1OutCVScores[indtop[0]]); success=true; return success; }
//----------------------------------------------------------------------- // optimizing over smoothing parameter Temp - // getting best histogram over parameter in [t_lo, t_hi] by max/minmising // a CV based score //----------------------------------------------------------------------- // this is a slower and generic K-fold CV method -- USE Leave1Out for Histograms!!! // CAUTON: held out Lkl is used as an example and should be replaced //with the appropriate scoring rule for maximization over parameter in [t_lo, t_hi] bool selectPriorByLlkCV (RVecData & transformedData, size_t K, double t_lo, double t_hi, int LocalMaxTempIterations, int MaxTempIterations, RealVec & AvgHeldOutLkls, vector<double> & Temperatures, double & t_opt, double & AvgHeldOutLkls_opt, size_t minPoints, int chooseStarts, int keep, bool stopOnMaxPosterior, string postFileName, string checkPostFileNameBase, int precPQ, unsigned long int seedStarts) { RealVec AvgHeldOutEmpiricalDeviations; // first get a root box containing all points AdaptiveHistogram adhA0;//main hist object bool successfulInsertion = false; successfulInsertion = adhA0.insertFromRVec(transformedData);//insert transformed data if (!successfulInsertion) throw std::runtime_error("Failed to insert transformed data"); //transformedData.clear();//keep the transformed data!! size_t n = adhA0.getRootCounter(); size_t d = adhA0.getDimensions (); //cout << "transformed data: n = " << n << endl; getchar(); bool success=false; const size_t N = adhA0.getRootCounter();//size of successfully inserted transformed data const size_t KofN = N/K; //cout << KofN << endl; getchar(); size_t nTrain; adhA0.clearAllHistData();//clear the transformed data to make space during CV RVecData transformedDataT;//container to keep the transformed Training data from first burst RVecData transformedDataV;//container to keep the transformed Validation data from first burst // set up for permutations const gsl_rng_type * T; gsl_rng * r; gsl_permutation * p = gsl_permutation_alloc (N); gsl_permutation * q = gsl_permutation_alloc (N); gsl_rng_env_setup(); T = gsl_rng_default; r = gsl_rng_alloc (T); //printf ("initial permutation:"); gsl_permutation_init (p); //gsl_permutation_fprintf (stdout, p, " %u"); printf ("\n"); getchar(); //////////////////////////////////////////////////////////////////////////////////////////////// int TempIterations=0; std::vector<double> LocalTemperatures; do{// outer temp iterations if (TempIterations!=0){ std::vector<size_t> indtop(AvgHeldOutLkls.size()); topk(AvgHeldOutLkls, indtop); double tBest =Temperatures[indtop[0]]; double t2ndBest =Temperatures[indtop[1]]; double t3rdBest =Temperatures[indtop[2]]; double tWorst = max(abs(tBest-t2ndBest),abs(tBest-t3rdBest)); t_lo = max(0.00001,tBest-tWorst); t_hi = tBest+tWorst; // cout << t_lo << " , " << t_hi << " : "<< tBest << " , " << t2ndBest << " , " << t3rdBest << endl; getchar(); if (t_hi-t_lo<0.001 || abs(AvgHeldOutLkls[indtop[0]]-AvgHeldOutLkls[indtop[1]])<0.01) { cout << "reaching temp values < 0.001 or likl diff < 1.0"<<endl; getchar(); break; } } double t_Delta=(t_hi-t_lo)/double(LocalMaxTempIterations-1); LocalTemperatures.clear(); for(int i=0; i<LocalMaxTempIterations; i++){ Temperatures.push_back(t_lo+(double(i))*t_Delta); LocalTemperatures.push_back(t_lo+(double(i))*t_Delta); } int LocalTempIterations=0; do { //LogTemperaturePrior logPrior(Temperatures[TempIterations]);//t_lo); LogTemperaturePrior logPrior(LocalTemperatures[LocalTempIterations]);//t_lo); seedStarts += TempIterations; real HeldOutLkl = 0.0; real HeldOutEmpiricalDeviation = 0.0; // a container for our histograms at various temperatures AdaptiveHistogram adhA0cv(adhA0.getRootBox()); // make adh for CV with root box from adhA0 for (int cvI=1; cvI<K; cvI++)//K-fold CV loop { gsl_ran_shuffle (r, p->data, N, sizeof(size_t)); successfulInsertion = false; std::vector< subpavings::AdaptiveHistogram* > histsT; adhA0cv.clearAllHistData();//clear the cv histogram before insertion adhA0cv.mergeUp();//Merge the possibly multileaf cv histogram up to just root. transformedDataV.clear(); transformedDataT.clear();//clear Cv containers for(size_t i=0; i<KofN; i++) transformedDataV.push_back(transformedData[gsl_permutation_get(p,i)]); for(size_t i=KofN; i<N; i++) transformedDataT.push_back(transformedData[gsl_permutation_get(p,i)]); successfulInsertion = adhA0cv.insertFromRVec(transformedDataT);//insert transformed data if (!successfulInsertion) throw std::runtime_error("Failed to insert transformed data"); /* some guesses for max points in a node to stop posterior queue */ nTrain = adhA0cv.getRootCounter(); //cout << "nTrain = " << nTrain << endl; getchar(); size_t critSEB = static_cast<size_t>(std::log(static_cast<double>(nTrain)));//can be as low as 1 /* some guesses for maximum leaves we'll let SEB queue go to */ size_t maxLeavesSEB = nTrain/2;// / critSEB; // integer division size_t maxLeavesCarving = maxLeavesSEB / 3; // integer division SPSNodeMeasureVolMassMinus compCarving(nTrain); AdaptiveHistogram::PrioritySplitQueueEvaluator evaluatorCarving(compCarving, maxLeavesCarving); SPSNodeMeasureCount compSEB; AdaptiveHistogram::PrioritySplitQueueEvaluator evaluatorSEB(compSEB, critSEB, maxLeavesSEB); CarverSEB::findStartingPointsBest(adhA0cv, histsT, evaluatorCarving, evaluatorSEB, logPrior, minPoints, chooseStarts, keep, stopOnMaxPosterior, postFileName, checkPostFileNameBase, precPQ, seedStarts); PiecewiseConstantFunction pcfT(*histsT[0]); pcfT.smearZeroValues(0.0000001); //assert (pcfT.getTotalIntegral() == cxsc::real(1.0)); histsT[0]->clearAllHistData();//clear the data from training big burst histsT[0]->insertFromRVec(transformedDataV);//insert validation data HeldOutLkl += pcfT.getLogLikelihood(*histsT[0]); PiecewiseConstantFunction pcfV(*histsT[0]); //if(pcfT.getTotalIntegral() != cxsc::real(1.0)) {cout << "433!!!"; getchar();} //histsT[0]->clearAllHistData();//clear the data from validation big burst //histsT[0]->mergeUp();//merge up to root HeldOutEmpiricalDeviation += pcfT.getL1Distance(pcfV); //cout << HeldOutLkl << '\t' << HeldOutEmpiricalDeviation << endl; getchar(); //to free all the contents of histsT at the end for (size_t i = 0; i < histsT.size(); ++i) { if (NULL != histsT[i]) delete histsT[i]; histsT[i] = NULL; } } //cout << "Avg HeldOutLkl = " << HeldOutLkl/double(K) << '\n' // << "Avg HeldOutEmpiricalDeviation = " << HeldOutEmpiricalDeviation/double(K) << endl; getchar(); AvgHeldOutLkls.push_back(HeldOutLkl/double(K)); AvgHeldOutEmpiricalDeviations.push_back(HeldOutEmpiricalDeviation/double(K)); LocalTempIterations++; } while (LocalTempIterations<LocalMaxTempIterations);// && CVgain>0.1); TempIterations++; } while (TempIterations<MaxTempIterations);// && CVgain>0.1); //////////////////////////////////////////////////////////////////////////////////////////////// cout << Temperatures << endl; cout << AvgHeldOutLkls << endl << AvgHeldOutEmpiricalDeviations << endl; cout << "Temp Iteration Number " << TempIterations << endl; getchar(); cout << "MaxTempIterations = " << MaxTempIterations << endl; getchar(); vector<size_t> indtop(AvgHeldOutLkls.size()); topk(AvgHeldOutLkls, indtop); t_opt = _double(Temperatures[indtop[0]]); AvgHeldOutLkls_opt = _double(AvgHeldOutLkls[indtop[0]]); gsl_permutation_free (p); gsl_rng_free (r); success=true; return success; }
bool CompressSpectrum::initializeInternal(const SignalBank &input) { LOUDNESS_ASSERT(input.getNChannels() > 1, name_ << ": Insufficient number of channels."); /* * This code is sloppy due to along time spent figuring how * to implement the damn thing. * It's currently in two parts, one that searches for the limits of each * summation range in order to satisfy summation criterion. * The other that finds the average Centre frequencies per compressed band. */ int nChannels = input.getNChannels(); int i=0, binIdxPrev = 0; Real dif = hertzToCam(input.getCentreFreq(1)) - hertzToCam(input.getCentreFreq(0)); int groupSize = max(2.0, std::floor(alpha_/(dif))); int groupSizePrev = groupSize; vector<int> groupSizeStore, binIdx; while(i < nChannels-1) { //compute different between adjacent bins on Cam scale dif = hertzToCam(input.getCentreFreq(i+1)) - hertzToCam(input.getCentreFreq(i)); //Check if we can sum bins in group size if(dif < (alpha_/double(groupSize))) { /* * from here we can group bins in groupSize * whilst maintaining alpha spacing */ //Check we have zero idx if((binIdx.size() < 1) && (i>0)) { binIdx.push_back(0); groupSizeStore.push_back(1); } /* * This line ensures the next group starts at the next multiple of the previous * groupSize above the previous starting position. * This is why you sometimes get finer resolution than the criterion */ int store = ceil((i-binIdxPrev)/double(groupSizePrev))*groupSizePrev+binIdxPrev; /* * This line is cheeky; it re-evaluates the groupSize at the new multiple * in attempt to maintain alpha spacing, I'm not 100% but the algorithm * seems to satisfy various criteria */ if((store > 0) && (store < nChannels)) { dif = hertzToCam(input.getCentreFreq(store)) - hertzToCam(input.getCentreFreq(store-1)); groupSize = max((double)groupSize, std::floor(alpha_/dif)); } //fill variables groupSizePrev = groupSize; binIdxPrev = store; //storage binIdx.push_back(store); groupSizeStore.push_back(groupSize); //print "Bin: %d, Binnew: %d, composite bin size: %d" % (i, store, groupSize) //Move i along i = store+groupSize; //increment groupSize for wider group groupSize += 1; } else i += 1; } //add the final frequency if(binIdx[binIdx.size()-1] < nChannels) binIdx.push_back(nChannels); //PART 2 //compressed spectrum RealVec cfs; Real fa = 0; int count = 0; int j = 0; i = 0; while(i < nChannels) { //bounds check out? if(i<binIdx[j+1]) { fa += input.getCentreFreq(i); count++; if (count==groupSizeStore[j]) { //upper limit upperBandIdx_.push_back(i+1); //+1 for < conditional //set the output frequency cfs.push_back(fa/count); count = 0; fa = 0; } i++; } else j++; } //add the final component if it didn't make it if (count>0) { cfs.push_back(fa/count); upperBandIdx_.push_back(i); } //check #if defined(DEBUG) Real freqLimit = 0.0; for(unsigned int i=0; i<cfs.size()-1; i++) { if((hertzToCam(cfs[i+1]) - hertzToCam(cfs[i])) > alpha_) freqLimit = cfs[i]; } LOUDNESS_DEBUG("CompressSpectrum: Criterion satisfied above " << freqLimit << " Hz."); #endif //set output SignalBank output_.initialize(input.getNSources(), input.getNEars(), cfs.size(), 1, input.getFs()); output_.setCentreFreqs(cfs); output_.setFrameRate(input.getFrameRate()); LOUDNESS_DEBUG(name_ << ": Number of bins comprising the compressed spectrum: " << output_.getNChannels()); return 1; }