int MCMCGRDiagnosticPSRF::calcDiagnosticsForLoop()
{
	if (keepLogs) {
		// store the current runningSumAllChains as well
		runningSumOverall.push_back(runningSumAllChains);
	}

	// convergence diagnostics calculations for v's

	// the Ws: average, over chains, of sample variance of scalar value
	cxsc::real thisW = sumOfSampleVariancesOverChains/(chains * 1.0); 
	
	#ifdef MYDEBUG_CALCS_EXTRA
			cout << "and thisW = " << thisW << endl;
	#endif
	
				
	Ws.push_back(thisW); 
	// the Bs
	size_t statesForCalcs = states - statesNotInCalcs;
	
	cxsc::real thisB = (1.0/( (chains - 1) * statesForCalcs ) 
						* ( sumOfSquaresOfRunningSums 
						- (runningSumAllChains 
						* runningSumAllChains/(chains * 1.0)) ) );
	Bs.push_back(thisB); 
	
	#ifdef MYDEBUG_CALCS
		//check thisB is correct, doing it the long way
		// runningSumPtr has one running sum for each chain
		RealVec chainAverages;
		cxsc::real accRunningSums(0.0);
		for (RealVecItr it = runningSum.begin(); it < runningSum.end(); ++it) {
			cxsc::real thisChainRunningSum = (*it);
			cxsc::real thisChainAv = thisChainRunningSum/(statesForCalcs * 1.0);
			chainAverages.push_back(thisChainAv);
			accRunningSums+=thisChainRunningSum;
		}
		cxsc::real overallAv = accRunningSums/(statesForCalcs * chains * 1.0);
		cxsc::dotprecision accDiffs(0.0);
		for (RealVecItr it = chainAverages.begin(); it < chainAverages.end(); ++it) {
			cxsc::real thisDiff = (*it) - overallAv;
			// sum up the squares of the differences compared to overall average
			cxsc::accumulate(accDiffs, thisDiff, thisDiff);
		}
		cxsc::real altB = rnd(accDiffs)*( statesForCalcs/(chains - 1.0) );
		
		cout << "\nthisB for v's is\t" << thisB << endl;
		cout << "altB for v's is\t" << altB << endl;
		//assert(thisB == altB);
	
	#endif
	
	// the estimated var(v)
	cxsc::real thisVarV(0.0);
	if (statesForCalcs > 1) {
		thisVarV = statesForCalcs/(statesForCalcs-1.0) 
					* thisW + (1.0/statesForCalcs)*thisB;
	}
	#ifndef OLDCALCMETHOD
		if (statesForCalcs > 1) thisVarV +=thisB/(1.0*chains*statesForCalcs);
	#endif
	
	estVarV.push_back(thisVarV); 
	// the rhats
	cxsc::real thisRhat(0.0);
	// allow division by 0 if w = 0 when var does not
	if (thisW > 0.0 || thisVarV > 0.0) {
		thisRhat = thisVarV/thisW;
	}
	rhat.push_back(thisRhat); 
	
	#ifdef MYDEBUG_CALCS_EXTRA
			cout << "thisRhat = " << thisRhat << " - press any key " << endl;
			getchar();
	#endif
	
	if ( (thisRhat <= 1.0 + tol) 
				&& (thisRhat >= 1.0 - tol) ) {
		// if we have not been converged before on this scalar value
		if (!rhatDiagnosticFlag)  {
			#ifdef MYDEBUG
				cout << "\n" << getScalarsName() 
					<< " convergence test satisfied in state " 
					  << states << " (states in calcs = " 
					  << statesForCalcs << ")"<< endl;
			#endif
			// set the flag for this scalar value
			rhatDiagnosticFlag = 1;
		}
	} 
	else { // not converged on this scalar value
		// if we were okay on this scalar value before
		if (rhatDiagnosticFlag) {
			#ifdef MYDEBUG
				cout << "\n--------- Note: " << getScalarsName() 
				<< " convergence test now NOT satisfied in state " 
				  << states << endl;
		
			#endif
			
			rhatDiagnosticFlag = 0; // update the flag
			
		} 
	}
	
	if (keepLogs) {
		// store the flag as well, as a real, which is a fudge...
		rhatFlag.push_back(rhatDiagnosticFlag);
	}

	// end of checking diagnostic for v's

	return rhatDiagnosticFlag;
} // end calculations
Exemple #2
0
//-----------------------------------------------------------------------
// optimizing over smoothing parameter Temp - 
// getting best histogram over parameter in [t_lo, t_hi] by max/minmising 
// a CV based score
//-----------------------------------------------------------------------
bool selectPriorByLv1OutCV (RVecData & Data, double t_lo, double t_hi,
				int LocalMaxTempIterations, int MaxTempIterations,
				RealVec & Lv1OutCVScores, vector<double> & Temperatures, 
				double & t_opt, double & Lv1OutCVScores_opt,
				size_t minPoints, double minVolume, int chooseStarts, int keep, 
				bool stopOnMaxPosterior, string postFileName, 
				string checkPostFileNameBase, string burstsFileBaseName, 
				bool printHist, int precPQ, 
				bool CarvingMaxPosterior, 
				unsigned long int seedStarts)
{
  bool success=false;
  int TempIterations=0;
  std::vector<double> LocalTemperatures;
     do{// outer temp iterations
          if (TempIterations!=0){
            std::vector<size_t> indtop(Lv1OutCVScores.size());
            topk(Lv1OutCVScores, indtop);
            double tBest =Temperatures[indtop[0]];
            double t2ndBest =Temperatures[indtop[1]];
            double t3rdBest =Temperatures[indtop[2]];
            double tWorst = max(abs(tBest-t2ndBest),abs(tBest-t3rdBest));
            t_lo = max(t_lo,tBest-tWorst); t_hi = tBest+tWorst;
//            cout << t_lo << " , " << t_hi << " : "<< tBest << " , " << t2ndBest << " , " << t3rdBest << endl; getchar();
            if (t_hi-t_lo<0.001){ 
		//|| abs(Lv1OutCVScores[indtop[0]]-Lv1OutCVScores[indtop[1]])<0.00001) {
              	cout << "reaching temp values < 0.001 or Lv1OutCVScores diff < 0.00001"<<endl; 
		getchar(); 
		break;
            }
          }
          double t_Delta=(t_hi-t_lo)/double(LocalMaxTempIterations-1);
          LocalTemperatures.clear();
          for(int i=0; i<LocalMaxTempIterations; i++){ 
                        Temperatures.push_back(t_lo+(double(i))*t_Delta);
                        LocalTemperatures.push_back(t_lo+(double(i))*t_Delta);
          }
        int LocalTempIterations=0;
        do
        {
          //LogTemperaturePrior logPrior(Temperatures[TempIterations]);//t_lo);
          
          //LogTemperaturePrior logPrior(LocalTemperatures[LocalTempIterations]);//t_lo);
          double temperatureNow = LocalTemperatures[LocalTempIterations];
	  seedStarts += TempIterations;
	  // a container for our histograms
  	  std::vector< subpavings::AdaptiveHistogram* > hists;
  	  // a container for our PCFs of histograms
  	  std::vector< subpavings::PiecewiseConstantFunction* > pcfs;
  	  bool succPQMCopt = false;

	  //ostringstream strs; strs << temperatureNow;
          //string burstsFileBaseNameNow = burstsFileBaseName+"_"+strs.str();
	  succPQMCopt = optPQMCAdapHist (Data, temperatureNow, hists, pcfs,
				minPoints, minVolume, chooseStarts, keep, 
				stopOnMaxPosterior, postFileName, 
				checkPostFileNameBase, burstsFileBaseName, printHist, precPQ, 
				CarvingMaxPosterior, seedStarts);

    	  real lv1outCVScore = pcfs[0]->getLeave1OutCVScore(*hists[0]);
          Lv1OutCVScores.push_back(-1.0*lv1outCVScore);// -1.0* so we want to max to be min
          LocalTempIterations++;
    	  //to free all the contents of pcfs at the end
    	  for (size_t i = 0; i < pcfs.size(); ++i) 
    	    { if (NULL != pcfs[i]) delete pcfs[i]; pcfs[i] = NULL;}
      	  //to free all the contents of hists at the end
    	  if(CarvingMaxPosterior)
    	  { for (size_t i = 0; i < hists.size(); ++i) 
              { if (NULL != hists[i]) delete hists[i]; hists[i] = NULL;}
          }    
        }
        while (LocalTempIterations<LocalMaxTempIterations);// && CVgain>0.1);
      TempIterations++;
      }
      while (TempIterations<MaxTempIterations);// && CVgain>0.1);
////////////////////////////////////////////////////////////////////////////////////////////////
          cout << "Temperatures:" << endl;
          cout << Temperatures << endl;
          cout << "- Lv1OutCVScores: (looking for maximum of -1.0*Lv1OutCVScores)" << endl;
          cout << Lv1OutCVScores << endl << endl;
          cout << "Temp Iteration Number " << TempIterations << endl; //getchar();
          cout << "MaxTempIterations = " << MaxTempIterations << endl; //getchar();

        vector<size_t> indtop(Lv1OutCVScores.size());
        topk(Lv1OutCVScores, indtop);
        t_opt = _double(Temperatures[indtop[0]]);
        Lv1OutCVScores_opt = _double(Lv1OutCVScores[indtop[0]]);

        success=true;
        return success;
}
Exemple #3
0
//-----------------------------------------------------------------------
// optimizing over smoothing parameter Temp - 
// getting best histogram over parameter in [t_lo, t_hi] by max/minmising 
// a CV based score
//-----------------------------------------------------------------------
// this is a slower and generic K-fold CV method -- USE Leave1Out for Histograms!!!
// CAUTON: held out Lkl is used as an example and should be replaced 
//with the appropriate scoring rule for maximization over parameter in [t_lo, t_hi]
bool selectPriorByLlkCV (RVecData & transformedData, size_t K, double t_lo, double t_hi,
				int LocalMaxTempIterations, int MaxTempIterations,
				RealVec & AvgHeldOutLkls, vector<double> & Temperatures, 
				double & t_opt, double & AvgHeldOutLkls_opt,
				size_t minPoints, int chooseStarts, int keep, 
				bool stopOnMaxPosterior, string postFileName, 
				string checkPostFileNameBase, int precPQ, 
				unsigned long int seedStarts)
{
  RealVec AvgHeldOutEmpiricalDeviations;
  // first get a root box containing all points 
  AdaptiveHistogram adhA0;//main hist object 
  bool successfulInsertion = false;
  successfulInsertion = adhA0.insertFromRVec(transformedData);//insert transformed data
  if (!successfulInsertion) throw std::runtime_error("Failed to insert transformed data");
  //transformedData.clear();//keep the transformed data!!
  size_t n = adhA0.getRootCounter();
  size_t d = adhA0.getDimensions ();
  //cout << "transformed data:  n = " << n << endl; getchar();

  bool success=false;
  const size_t N = adhA0.getRootCounter();//size of successfully inserted transformed data
  const size_t KofN = N/K; //cout << KofN << endl; getchar();
  size_t nTrain;
  adhA0.clearAllHistData();//clear the transformed data to make space during CV
  RVecData transformedDataT;//container to keep the transformed Training data from first burst
  RVecData transformedDataV;//container to keep the transformed Validation data from first burst
  // set up for permutations
  const gsl_rng_type * T;
  gsl_rng * r;
  gsl_permutation * p = gsl_permutation_alloc (N);
  gsl_permutation * q = gsl_permutation_alloc (N);
  gsl_rng_env_setup();
  T = gsl_rng_default;
  r = gsl_rng_alloc (T);
  //printf ("initial permutation:");  
  gsl_permutation_init (p);
  //gsl_permutation_fprintf (stdout, p, " %u"); printf ("\n"); getchar();
////////////////////////////////////////////////////////////////////////////////////////////////
        int TempIterations=0;
        std::vector<double> LocalTemperatures;
     do{// outer temp iterations
          if (TempIterations!=0){
            std::vector<size_t> indtop(AvgHeldOutLkls.size());
            topk(AvgHeldOutLkls, indtop);
            double tBest =Temperatures[indtop[0]];
            double t2ndBest =Temperatures[indtop[1]];
            double t3rdBest =Temperatures[indtop[2]];
            double tWorst = max(abs(tBest-t2ndBest),abs(tBest-t3rdBest));
            t_lo = max(0.00001,tBest-tWorst); t_hi = tBest+tWorst;
//            cout << t_lo << " , " << t_hi << " : "<< tBest << " , " << t2ndBest << " , " << t3rdBest << endl; getchar();
            if (t_hi-t_lo<0.001 || 
			abs(AvgHeldOutLkls[indtop[0]]-AvgHeldOutLkls[indtop[1]])<0.01) {
              cout << "reaching temp values < 0.001 or likl diff < 1.0"<<endl; getchar(); break;
            }
          }
          double t_Delta=(t_hi-t_lo)/double(LocalMaxTempIterations-1);
          LocalTemperatures.clear();
          for(int i=0; i<LocalMaxTempIterations; i++){ 
                        Temperatures.push_back(t_lo+(double(i))*t_Delta);
                        LocalTemperatures.push_back(t_lo+(double(i))*t_Delta);
          }
        int LocalTempIterations=0;
        do
        {
          //LogTemperaturePrior logPrior(Temperatures[TempIterations]);//t_lo);
          LogTemperaturePrior logPrior(LocalTemperatures[LocalTempIterations]);//t_lo);
	  seedStarts += TempIterations;
          real HeldOutLkl = 0.0;
          real HeldOutEmpiricalDeviation = 0.0;
          // a container for our histograms at various temperatures
          AdaptiveHistogram adhA0cv(adhA0.getRootBox()); // make adh for CV with root box from adhA0
          for (int cvI=1; cvI<K; cvI++)//K-fold CV loop
          { 
            gsl_ran_shuffle (r, p->data, N, sizeof(size_t));
            successfulInsertion = false;
            std::vector< subpavings::AdaptiveHistogram* > histsT;
            adhA0cv.clearAllHistData();//clear the cv histogram before insertion
            adhA0cv.mergeUp();//Merge the possibly multileaf cv histogram up to just root. 
            transformedDataV.clear(); transformedDataT.clear();//clear Cv containers
            for(size_t i=0; i<KofN; i++) transformedDataV.push_back(transformedData[gsl_permutation_get(p,i)]);
            for(size_t i=KofN; i<N; i++) transformedDataT.push_back(transformedData[gsl_permutation_get(p,i)]);
            successfulInsertion = adhA0cv.insertFromRVec(transformedDataT);//insert transformed data
            if (!successfulInsertion) throw std::runtime_error("Failed to insert transformed data");
	    /* some guesses for max points in a node to stop posterior queue */
            nTrain = adhA0cv.getRootCounter(); //cout << "nTrain = " << nTrain << endl; getchar();
	    size_t critSEB = static_cast<size_t>(std::log(static_cast<double>(nTrain)));//can be as low as 1
	    /* some guesses for maximum leaves we'll let SEB queue go to */
	    size_t maxLeavesSEB = nTrain/2;// / critSEB; // integer division
	    size_t maxLeavesCarving = maxLeavesSEB / 3; // integer division
	    SPSNodeMeasureVolMassMinus compCarving(nTrain);
	    AdaptiveHistogram::PrioritySplitQueueEvaluator evaluatorCarving(compCarving, maxLeavesCarving);
	    SPSNodeMeasureCount compSEB;
	    AdaptiveHistogram::PrioritySplitQueueEvaluator evaluatorSEB(compSEB, critSEB, maxLeavesSEB);
	    CarverSEB::findStartingPointsBest(adhA0cv, histsT, evaluatorCarving, evaluatorSEB, logPrior, 
						minPoints, chooseStarts, keep, stopOnMaxPosterior, 
						postFileName, checkPostFileNameBase, precPQ, seedStarts);
            PiecewiseConstantFunction pcfT(*histsT[0]);
            pcfT.smearZeroValues(0.0000001);
	    //assert (pcfT.getTotalIntegral() == cxsc::real(1.0));
            histsT[0]->clearAllHistData();//clear the data from training big burst
            histsT[0]->insertFromRVec(transformedDataV);//insert validation data
            HeldOutLkl += pcfT.getLogLikelihood(*histsT[0]);
            PiecewiseConstantFunction pcfV(*histsT[0]);
	    //if(pcfT.getTotalIntegral() != cxsc::real(1.0)) {cout << "433!!!"; getchar();}
            //histsT[0]->clearAllHistData();//clear the data from validation big burst
            //histsT[0]->mergeUp();//merge up to root
            HeldOutEmpiricalDeviation += pcfT.getL1Distance(pcfV);
            //cout << HeldOutLkl << '\t' << HeldOutEmpiricalDeviation << endl; getchar();
            //to free all the contents of histsT at the end
            for (size_t i = 0; i < histsT.size(); ++i)
            {
              if (NULL != histsT[i]) delete histsT[i];
              histsT[i] = NULL;
            }
          }
          //cout << "Avg HeldOutLkl = " << HeldOutLkl/double(K) << '\n' 
          //     << "Avg HeldOutEmpiricalDeviation = " << HeldOutEmpiricalDeviation/double(K) << endl; getchar();
          AvgHeldOutLkls.push_back(HeldOutLkl/double(K));
          AvgHeldOutEmpiricalDeviations.push_back(HeldOutEmpiricalDeviation/double(K));
          LocalTempIterations++;
        }
        while (LocalTempIterations<LocalMaxTempIterations);// && CVgain>0.1);
      TempIterations++;
      }
      while (TempIterations<MaxTempIterations);// && CVgain>0.1);
////////////////////////////////////////////////////////////////////////////////////////////////
          cout << Temperatures << endl;
          cout << AvgHeldOutLkls << endl << AvgHeldOutEmpiricalDeviations << endl;
          cout << "Temp Iteration Number " << TempIterations << endl; getchar();
          cout << "MaxTempIterations = " << MaxTempIterations << endl; getchar();

        vector<size_t> indtop(AvgHeldOutLkls.size());
        topk(AvgHeldOutLkls, indtop);
        t_opt = _double(Temperatures[indtop[0]]);
        AvgHeldOutLkls_opt = _double(AvgHeldOutLkls[indtop[0]]);

        gsl_permutation_free (p);
        gsl_rng_free (r);

        success=true;
        return success;
}
Exemple #4
0
    bool CompressSpectrum::initializeInternal(const SignalBank &input)
    {
        LOUDNESS_ASSERT(input.getNChannels() > 1, name_ << ": Insufficient number of channels.");


        /*
         * This code is sloppy due to along time spent figuring how 
         * to implement the damn thing.
         * It's currently in two parts, one that searches for the limits of each
         * summation range in order to satisfy summation criterion.
         * The other that finds the average Centre frequencies per compressed band.
         */
        int nChannels = input.getNChannels();
        int i=0, binIdxPrev = 0;
        Real dif = hertzToCam(input.getCentreFreq(1)) - 
                   hertzToCam(input.getCentreFreq(0));
        int groupSize = max(2.0, std::floor(alpha_/(dif)));
        int groupSizePrev = groupSize;
        vector<int> groupSizeStore, binIdx;

        while(i < nChannels-1)
        {

            //compute different between adjacent bins on Cam scale 
            dif = hertzToCam(input.getCentreFreq(i+1)) - hertzToCam(input.getCentreFreq(i));

            //Check if we can sum bins in group size
            if(dif < (alpha_/double(groupSize)))
            {
                /*  
                 *  from here we can group bins in groupSize
                 *  whilst maintaining alpha spacing
                 */

                //Check we have zero idx
                if((binIdx.size() < 1) && (i>0))
                {
                    binIdx.push_back(0);
                    groupSizeStore.push_back(1);
                }

                /*
                 * This line ensures the next group starts at the next multiple of the previous
                 * groupSize above the previous starting position.
                 * This is why you sometimes get finer resolution than the criterion
                 */

                int store = ceil((i-binIdxPrev)/double(groupSizePrev))*groupSizePrev+binIdxPrev;
                
                /*  
                 *  This line is cheeky; it re-evaluates the groupSize at the new multiple
                 *  in attempt to maintain alpha spacing, I'm not 100% but the algorithm
                 *  seems to satisfy various criteria
                 */
                if((store > 0) && (store < nChannels))
                {
                    dif = hertzToCam(input.getCentreFreq(store)) - 
                          hertzToCam(input.getCentreFreq(store-1));
                    groupSize = max((double)groupSize, std::floor(alpha_/dif));
                }

                //fill variables
                groupSizePrev = groupSize;
                binIdxPrev = store;

                //storage
                binIdx.push_back(store);
                groupSizeStore.push_back(groupSize);
                //print "Bin: %d, Binnew: %d, composite bin size: %d" % (i, store, groupSize)

                //Move i along
                i = store+groupSize;

                //increment groupSize for wider group
                groupSize += 1;
            }
            else
                i += 1;
        }

        //add the final frequency
        if(binIdx[binIdx.size()-1] < nChannels)
            binIdx.push_back(nChannels);

        //PART 2
        //compressed spectrum
        RealVec cfs;
        Real fa = 0;
        int count = 0;
        int j = 0;
        i = 0;
        while(i < nChannels)
        {
            //bounds check out?
            if(i<binIdx[j+1])
            {
                fa += input.getCentreFreq(i);
                count++;
                if (count==groupSizeStore[j])
                {
                    //upper limit
                    upperBandIdx_.push_back(i+1); //+1 for < conditional
                    //set the output frequency
                    cfs.push_back(fa/count);
                    count = 0;
                    fa = 0;
                }
                i++;
            }
            else
                j++;
        }

        //add the final component if it didn't make it
        if (count>0)
        {
            cfs.push_back(fa/count);
            upperBandIdx_.push_back(i); 
        }

        //check
        #if defined(DEBUG)
        Real freqLimit = 0.0;
        for(unsigned int i=0; i<cfs.size()-1; i++)
        {
            if((hertzToCam(cfs[i+1]) - hertzToCam(cfs[i])) > alpha_)
                freqLimit = cfs[i];
        }
        LOUDNESS_DEBUG("CompressSpectrum: Criterion satisfied above " << freqLimit << " Hz.");
        #endif

        //set output SignalBank
        output_.initialize(input.getNSources(),
                           input.getNEars(),
                           cfs.size(),
                           1,
                           input.getFs());
        output_.setCentreFreqs(cfs);
        output_.setFrameRate(input.getFrameRate());
        LOUDNESS_DEBUG(name_ << ": Number of bins comprising the compressed spectrum: "
                << output_.getNChannels());

        return 1;
    }