void Operator_Ext_Mur_ABC::SetDirection(int ny, bool top_ny)
{
	if ((ny<0) || (ny>2))
		return;

	Delete2DArray(m_Mur_Coeff_nyP,m_numLines);
	Delete2DArray(m_Mur_Coeff_nyPP,m_numLines);

	m_ny = ny;
	m_top = top_ny;
	m_nyP = (ny+1)%3;
	m_nyPP = (ny+2)%3;
	if (!top_ny)
	{
		m_LineNr = 0;
		m_LineNr_Shift = 1;
	}
	else
	{
		m_LineNr = m_Op->GetOriginalNumLines(m_ny)-1;
		m_LineNr_Shift = m_Op->GetOriginalNumLines(m_ny) - 2;
	}

	m_numLines[0] = m_Op->GetOriginalNumLines(m_nyP);
	m_numLines[1] = m_Op->GetOriginalNumLines(m_nyPP);

	m_Mur_Coeff_nyP = Create2DArray<FDTD_FLOAT>(m_numLines);
	m_Mur_Coeff_nyPP = Create2DArray<FDTD_FLOAT>(m_numLines);

}
Operator_Ext_Mur_ABC::~Operator_Ext_Mur_ABC()
{
	Delete2DArray(m_Mur_Coeff_nyP,m_numLines);
	m_Mur_Coeff_nyP = NULL;
	Delete2DArray(m_Mur_Coeff_nyPP,m_numLines);
	m_Mur_Coeff_nyPP = NULL;
}
Exemple #3
0
nf2ff_calc::~nf2ff_calc()
{
	delete[] m_phi;
	m_phi = NULL;
	delete[] m_theta;
	m_theta = NULL;

	unsigned int numLines[2] = {m_numTheta, m_numPhi};
	Delete2DArray(m_E_theta,numLines);
	m_E_theta = NULL;
	Delete2DArray(m_E_phi,numLines);
	m_E_phi = NULL;
	Delete2DArray(m_H_theta,numLines);
	m_H_theta = NULL;
	Delete2DArray(m_H_phi,numLines);
	m_H_phi = NULL;
	Delete2DArray(m_P_rad,numLines);
	m_P_rad = NULL;

	delete m_Barrier;
	m_Barrier = NULL;
}
Exemple #4
0
bool nf2ff_calc::AddPlane(float **lines, unsigned int* numLines, complex<float>**** E_field, complex<float>**** H_field, int MeshType)
{
	//find normal direction
	int ny = -1;
	int nP,nPP;
	for (int n=0;n<3;++n)
	{
		nP = (n+1)%3;
		nPP = (n+2)%3;
		if ((numLines[n]==1) && (numLines[nP]>2) && (numLines[nPP]>2))
			ny=n;
	}
	nP = (ny+1)%3;
	nPP = (ny+2)%3;
	if (ny<0)
	{
		cerr << "nf2ff_calc::AddPlane: Error can't determine normal direction..." << endl;
		return false;
	}

	complex<float>**** Js = Create_N_3DArray<complex<float> >(numLines);
	complex<float>**** Ms = Create_N_3DArray<complex<float> >(numLines);

	float normDir[3]= {0,0,0};
	if (lines[ny][0]>=m_centerCoord[ny])
		normDir[ny]=1;
	else
		normDir[ny]=-1;
	unsigned int pos[3];

	float edge_length_P[numLines[nP]];
	for (unsigned int n=1;n<numLines[nP]-1;++n)
		edge_length_P[n]=0.5*(lines[nP][n+1]-lines[nP][n-1]);
	edge_length_P[0]=0.5*(lines[nP][1]-lines[nP][0]);
	edge_length_P[numLines[nP]-1]=0.5*(lines[nP][numLines[nP]-1]-lines[nP][numLines[nP]-2]);

	float edge_length_PP[numLines[nPP]];
	for (unsigned int n=1;n<numLines[nPP]-1;++n)
		edge_length_PP[n]=0.5*(lines[nPP][n+1]-lines[nPP][n-1]);
	edge_length_PP[0]=0.5*(lines[nPP][1]-lines[nPP][0]);
	edge_length_PP[numLines[nPP]-1]=0.5*(lines[nPP][numLines[nPP]-1]-lines[nPP][numLines[nPP]-2]);

	//check for cylindrical mesh
	if (MeshType==1)
	{
		if (ny==0) //surface a-z
		{
			for (unsigned int n=0;n<numLines[nP];++n)
				edge_length_P[n]*=lines[0][0]; //angle-width * radius
		}
		else if (ny==2) //surface r-a
		{
			//calculate: area = delta_angle * delta_radius * center_radius
			for (unsigned int n=1;n<numLines[nP]-1;++n)
				edge_length_P[n]*=lines[nP][n];  //radius-width * center-radius
			edge_length_P[0]*=(lines[nP][0]+0.5*edge_length_P[0]);
			edge_length_P[numLines[nP]-1]*=(lines[nP][numLines[nP]-1]-0.5*edge_length_P[numLines[nP]-1]);
		}
	}

	complex<float> power = 0;
	float area;
	for (pos[0]=0; pos[0]<numLines[0]; ++pos[0])
		for (pos[1]=0; pos[1]<numLines[1]; ++pos[1])
			for (pos[2]=0; pos[2]<numLines[2]; ++pos[2])
			{
				area = edge_length_P[pos[nP]]*edge_length_PP[pos[nPP]];
				power = (E_field[nP][pos[0]][pos[1]][pos[2]]*conj(H_field[nPP][pos[0]][pos[1]][pos[2]]) \
						 - E_field[nPP][pos[0]][pos[1]][pos[2]]*conj(H_field[nP][pos[0]][pos[1]][pos[2]]));
				m_radPower += 0.5*area*real(power)*normDir[ny];
			}
	unsigned int numAngles[2] = {m_numTheta, m_numPhi};

	// setup multi-threading jobs
	vector<unsigned int> jpt = AssignJobs2Threads(numLines[nP], m_numThreads, true);
	m_numThreads = jpt.size();
	nf2ff_data thread_data[m_numThreads];
	m_Barrier = new boost::barrier(m_numThreads+1); // numThread workers + 1 controller
	unsigned int start=0;
	unsigned int stop=jpt.at(0)-1;
	for (unsigned int n=0; n<m_numThreads; n++)
	{
		thread_data[n].ny=ny;
		thread_data[n].mesh_type = MeshType;
		thread_data[n].normDir=normDir;
		thread_data[n].numLines=numLines;
		thread_data[n].lines=lines;
		thread_data[n].edge_length_P=edge_length_P;
		thread_data[n].edge_length_PP=edge_length_PP;
		thread_data[n].E_field=E_field;
		thread_data[n].H_field=H_field;
		thread_data[n].Js=Js;
		thread_data[n].Ms=Ms;
		thread_data[n].m_Nt=Create2DArray<complex<float> >(numAngles);
		thread_data[n].m_Np=Create2DArray<complex<float> >(numAngles);
		thread_data[n].m_Lt=Create2DArray<complex<float> >(numAngles);
		thread_data[n].m_Lp=Create2DArray<complex<float> >(numAngles);

		boost::thread *t = new boost::thread( nf2ff_calc_thread(this,start,stop,n,thread_data[n]) );

		m_thread_group.add_thread( t );

		start = stop+1;
		if (n<m_numThreads-1)
			stop = start + jpt.at(n+1)-1;
	}
	//all threads a running and waiting for the barrier

	m_Barrier->wait(); //start

	// threads: calc Js and Ms (eq. 8.15a/b)
	// threads calc their local Nt,Np,Lt and Lp

	m_Barrier->wait(); //combine all thread local Nt,Np,Lt and Lp

	//cleanup E- & H-Fields
	Delete_N_3DArray(E_field,numLines);
	Delete_N_3DArray(H_field,numLines);

	complex<float>** Nt = Create2DArray<complex<float> >(numAngles);
	complex<float>** Np = Create2DArray<complex<float> >(numAngles);
	complex<float>** Lt = Create2DArray<complex<float> >(numAngles);
	complex<float>** Lp = Create2DArray<complex<float> >(numAngles);

	for (unsigned int n=0; n<m_numThreads; n++)
	{
		for (unsigned int tn=0;tn<m_numTheta;++tn)
			for (unsigned int pn=0;pn<m_numPhi;++pn)
			{
				Nt[tn][pn] += thread_data[n].m_Nt[tn][pn];
				Np[tn][pn] += thread_data[n].m_Np[tn][pn];
				Lt[tn][pn] += thread_data[n].m_Lt[tn][pn];
				Lp[tn][pn] += thread_data[n].m_Lp[tn][pn];
			}
		Delete2DArray(thread_data[n].m_Nt,numAngles);
		Delete2DArray(thread_data[n].m_Np,numAngles);
		Delete2DArray(thread_data[n].m_Lt,numAngles);
		Delete2DArray(thread_data[n].m_Lp,numAngles);
	}

	m_Barrier->wait(); //wait for termination
	m_thread_group.join_all(); // wait for termination
	delete m_Barrier;
	m_Barrier = NULL;

	//cleanup Js & Ms
	Delete_N_3DArray(Js,numLines);
	Delete_N_3DArray(Ms,numLines);

	// calc equations 8.23a/b and 8.24a/b
	float k = 2*M_PI*m_freq/__C0__;
	complex<float> factor(0,k/4.0/M_PI/m_radius);
	complex<float> f_exp(0,-1*k*m_radius);
	factor *= exp(f_exp);
	complex<float> Z0 = __Z0__;
	float P_max = 0;
	for (unsigned int tn=0;tn<m_numTheta;++tn)
		for (unsigned int pn=0;pn<m_numPhi;++pn)
		{
			m_E_theta[tn][pn] -= factor*(Lp[tn][pn] + Z0*Nt[tn][pn]);
			m_E_phi[tn][pn] += factor*(Lt[tn][pn] - Z0*Np[tn][pn]);

			m_H_theta[tn][pn] += factor*(Np[tn][pn] - Lt[tn][pn]/Z0);
			m_H_phi[tn][pn] -= factor*(Nt[tn][pn] + Lp[tn][pn]/Z0);

			m_P_rad[tn][pn] = m_radius*m_radius/(2*__Z0__) * abs((m_E_theta[tn][pn]*conj(m_E_theta[tn][pn])+m_E_phi[tn][pn]*conj(m_E_phi[tn][pn])));
			if (m_P_rad[tn][pn]>P_max)
				P_max = m_P_rad[tn][pn];
		}

	//cleanup Nx and Lx
	Delete2DArray(Nt,numAngles);
	Delete2DArray(Np,numAngles);
	Delete2DArray(Lt,numAngles);
	Delete2DArray(Lp,numAngles);

	m_maxDir = 4*M_PI*P_max / m_radPower;

	return true;
}
void runBeagle(int resource, 
               int stateCount, 
               int ntaxa, 
               int nsites, 
               bool manualScaling, 
               bool autoScaling,
               bool dynamicScaling,
               int rateCategoryCount,
               int nreps,
               bool fullTiming,
               bool requireDoublePrecision,
               bool requireSSE,
               int compactTipCount,
               int randomSeed,
               int rescaleFrequency,
               bool unrooted,
               bool calcderivs,
               bool logscalers,
               int eigenCount,
               bool eigencomplex,
               bool ievectrans,
               bool setmatrix)
{
    
    int edgeCount = ntaxa*2-2;
    int internalCount = ntaxa-1;
    int partialCount = ((ntaxa+internalCount)-compactTipCount)*eigenCount;
    int scaleCount = ((manualScaling || dynamicScaling) ? ntaxa : 0);
    
    BeagleInstanceDetails instDetails;
    
    // create an instance of the BEAGLE library
	int instance = beagleCreateInstance(
			    ntaxa,			  /**< Number of tip data elements (input) */
				partialCount, /**< Number of partials buffers to create (input) */
                compactTipCount,	/**< Number of compact state representation buffers to create (input) */
				stateCount,		  /**< Number of states in the continuous-time Markov chain (input) */
				nsites,			  /**< Number of site patterns to be handled by the instance (input) */
				eigenCount,		          /**< Number of rate matrix eigen-decomposition buffers to allocate (input) */
                (calcderivs ? (3*edgeCount*eigenCount) : edgeCount*eigenCount),/**< Number of rate matrix buffers (input) */
                rateCategoryCount,/**< Number of rate categories */
                scaleCount*eigenCount,          /**< scaling buffers */
				&resource,		  /**< List of potential resource on which this instance is allowed (input, NULL implies no restriction */
				1,			      /**< Length of resourceList list (input) */
                0,         /**< Bit-flags indicating preferred implementation charactertistics, see BeagleFlags (input) */
                (ievectrans ? BEAGLE_FLAG_INVEVEC_TRANSPOSED : BEAGLE_FLAG_INVEVEC_STANDARD) |
                (logscalers ? BEAGLE_FLAG_SCALERS_LOG : BEAGLE_FLAG_SCALERS_RAW) |
                (eigencomplex ? BEAGLE_FLAG_EIGEN_COMPLEX : BEAGLE_FLAG_EIGEN_REAL) |
                (dynamicScaling ? BEAGLE_FLAG_SCALING_DYNAMIC : 0) | 
                (autoScaling ? BEAGLE_FLAG_SCALING_AUTO : 0) |
                (requireDoublePrecision ? BEAGLE_FLAG_PRECISION_DOUBLE : BEAGLE_FLAG_PRECISION_SINGLE) |
                (requireSSE ? BEAGLE_FLAG_VECTOR_SSE : BEAGLE_FLAG_VECTOR_NONE),	  /**< Bit-flags indicating required implementation characteristics, see BeagleFlags (input) */
				&instDetails);
    if (instance < 0) {
	    fprintf(stderr, "Failed to obtain BEAGLE instance\n\n");
	    return;
    }
        
    int rNumber = instDetails.resourceNumber;
    fprintf(stdout, "Using resource %i:\n", rNumber);
    fprintf(stdout, "\tRsrc Name : %s\n",instDetails.resourceName);
    fprintf(stdout, "\tImpl Name : %s\n", instDetails.implName);    
    
    if (!(instDetails.flags & BEAGLE_FLAG_SCALING_AUTO))
        autoScaling = false;
    
    // set the sequences for each tip using partial likelihood arrays
	gt_srand(randomSeed);	// fix the random seed...
    for(int i=0; i<ntaxa; i++)
    {
        if (i >= compactTipCount) {
            double* tmpPartials = getRandomTipPartials(nsites, stateCount);
            beagleSetTipPartials(instance, i, tmpPartials);
            free(tmpPartials);
        } else {
            int* tmpStates = getRandomTipStates(nsites, stateCount);
            beagleSetTipStates(instance, i, tmpStates);
            free(tmpStates);                
        }
    }
    
#ifdef _WIN32
	std::vector<double> rates(rateCategoryCount);
#else
    double rates[rateCategoryCount];
#endif
	
    for (int i = 0; i < rateCategoryCount; i++) {
        rates[i] = gt_rand() / (double) GT_RAND_MAX;
    }
    
	beagleSetCategoryRates(instance, &rates[0]);
    
	double* patternWeights = (double*) malloc(sizeof(double) * nsites);
    
    for (int i = 0; i < nsites; i++) {
        patternWeights[i] = gt_rand() / (double) GT_RAND_MAX;
    }    

    beagleSetPatternWeights(instance, patternWeights);
    
    free(patternWeights);
	
    // create base frequency array

#ifdef _WIN32
	std::vector<double> freqs(stateCount);
#else
    double freqs[stateCount];
#endif
    
    // create an array containing site category weights
#ifdef _WIN32
	std::vector<double> weights(rateCategoryCount);
#else
    double weights[rateCategoryCount];
#endif

    for (int eigenIndex=0; eigenIndex < eigenCount; eigenIndex++) {
        for (int i = 0; i < rateCategoryCount; i++) {
            weights[i] = gt_rand() / (double) GT_RAND_MAX;
        } 
    
        beagleSetCategoryWeights(instance, eigenIndex, &weights[0]);
    }
    
    double* eval;
    if (!eigencomplex)
        eval = (double*)malloc(sizeof(double)*stateCount);
    else
        eval = (double*)malloc(sizeof(double)*stateCount*2);
    double* evec = (double*)malloc(sizeof(double)*stateCount*stateCount);
    double* ivec = (double*)malloc(sizeof(double)*stateCount*stateCount);
    
    for (int eigenIndex=0; eigenIndex < eigenCount; eigenIndex++) {
        if (!eigencomplex && ((stateCount & (stateCount-1)) == 0)) {
            
            for (int i=0; i<stateCount; i++) {
                freqs[i] = 1.0 / stateCount;
            }

            // an eigen decomposition for the general state-space JC69 model
            // If stateCount = 2^n is a power-of-two, then Sylvester matrix H_n describes
            // the eigendecomposition of the infinitesimal rate matrix
             
            double* Hn = evec;
            Hn[0*stateCount+0] = 1.0; Hn[0*stateCount+1] =  1.0; 
            Hn[1*stateCount+0] = 1.0; Hn[1*stateCount+1] = -1.0; // H_1
         
            for (int k=2; k < stateCount; k <<= 1) {
                // H_n = H_1 (Kronecker product) H_{n-1}
                for (int i=0; i<k; i++) {
                    for (int j=i; j<k; j++) {
                        double Hijold = Hn[i*stateCount + j];
                        Hn[i    *stateCount + j + k] =  Hijold;
                        Hn[(i+k)*stateCount + j    ] =  Hijold;
                        Hn[(i+k)*stateCount + j + k] = -Hijold;
                        
                        Hn[j    *stateCount + i + k] = Hn[i    *stateCount + j + k];
                        Hn[(j+k)*stateCount + i    ] = Hn[(i+k)*stateCount + j    ];
                        Hn[(j+k)*stateCount + i + k] = Hn[(i+k)*stateCount + j + k];                                
                    }
                }        
            }
            
            // Since evec is Hadamard, ivec = (evec)^t / stateCount;    
            for (int i=0; i<stateCount; i++) {
                for (int j=i; j<stateCount; j++) {
                    ivec[i*stateCount+j] = evec[j*stateCount+i] / stateCount;
                    ivec[j*stateCount+i] = ivec[i*stateCount+j]; // Symmetric
                }
            }
           
            eval[0] = 0.0;
            for (int i=1; i<stateCount; i++) {
                eval[i] = -stateCount / (stateCount - 1.0);
            }
       
        } else if (!eigencomplex) {
            for (int i=0; i<stateCount; i++) {
                freqs[i] = gt_rand() / (double) GT_RAND_MAX;
            }
        
            double** qmat=New2DArray<double>(stateCount, stateCount);    
            double* relNucRates = new double[(stateCount * stateCount - stateCount) / 2];
            
            int rnum=0;
            for(int i=0;i<stateCount;i++){
                for(int j=i+1;j<stateCount;j++){
                    relNucRates[rnum] = gt_rand() / (double) GT_RAND_MAX;
                    qmat[i][j]=relNucRates[rnum] * freqs[j];
                    qmat[j][i]=relNucRates[rnum] * freqs[i];
                    rnum++;
                }
            }

            //set diags to sum rows to 0
            double sum;
            for(int x=0;x<stateCount;x++){
                sum=0.0;
                for(int y=0;y<stateCount;y++){
                    if(x!=y) sum+=qmat[x][y];
                        }
                qmat[x][x]=-sum;
            } 
            
            double* eigvalsimag=new double[stateCount];
            double** eigvecs=New2DArray<double>(stateCount, stateCount);//eigenvecs
            double** teigvecs=New2DArray<double>(stateCount, stateCount);//temp eigenvecs
            double** inveigvecs=New2DArray<double>(stateCount, stateCount);//inv eigenvecs    
            int* iwork=new int[stateCount];
            double* work=new double[stateCount];
            
            EigenRealGeneral(stateCount, qmat, eval, eigvalsimag, eigvecs, iwork, work);
            memcpy(*teigvecs, *eigvecs, stateCount*stateCount*sizeof(double));
            InvertMatrix(teigvecs, stateCount, work, iwork, inveigvecs);
            
            for(int x=0;x<stateCount;x++){
                for(int y=0;y<stateCount;y++){
                    evec[x * stateCount + y] = eigvecs[x][y];
                    if (ievectrans)
                        ivec[x * stateCount + y] = inveigvecs[y][x];
                    else
                        ivec[x * stateCount + y] = inveigvecs[x][y];
                }
            } 
            
            Delete2DArray(qmat);
            delete relNucRates;
            
            delete eigvalsimag;
            Delete2DArray(eigvecs);
            Delete2DArray(teigvecs);
            Delete2DArray(inveigvecs);
            delete iwork;
            delete work;
        } else if (eigencomplex && stateCount==4 && eigenCount==1) {
            // create base frequency array
            double temp_freqs[4] = { 0.25, 0.25, 0.25, 0.25 };
            
            // an eigen decomposition for the 4-state 1-step circulant infinitesimal generator
            double temp_evec[4 * 4] = {
                -0.5,  0.6906786606674509,   0.15153543380548623, 0.5,
                0.5, -0.15153543380548576,  0.6906786606674498,  0.5,
                -0.5, -0.6906786606674498,  -0.15153543380548617, 0.5,
                0.5,  0.15153543380548554, -0.6906786606674503,  0.5
            };
            
            double temp_ivec[4 * 4] = {
                -0.5,  0.5, -0.5,  0.5,
                0.6906786606674505, -0.15153543380548617, -0.6906786606674507,   0.15153543380548645,
                0.15153543380548568, 0.6906786606674509,  -0.15153543380548584, -0.6906786606674509,
                0.5,  0.5,  0.5,  0.5
            };
            
            double temp_eval[8] = { -2.0, -1.0, -1.0, 0, 0, 1, -1, 0 };
            
            for(int x=0;x<stateCount;x++){
                freqs[x] = temp_freqs[x];
                eval[x] = temp_eval[x];
                eval[x+stateCount] = temp_eval[x+stateCount];
                for(int y=0;y<stateCount;y++){
                    evec[x * stateCount + y] = temp_evec[x * stateCount + y];
                    if (ievectrans)
                        ivec[x * stateCount + y] = temp_ivec[x + y * stateCount];
                    else
                        ivec[x * stateCount + y] = temp_ivec[x * stateCount + y];
                }
            } 
        } else {
            abort("should not be here");
        }
            
        beagleSetStateFrequencies(instance, eigenIndex, &freqs[0]);
        
        if (!setmatrix) {
            // set the Eigen decomposition
            beagleSetEigenDecomposition(instance, eigenIndex, &evec[0], &ivec[0], &eval[0]);
        }
    }
    
    free(eval);
    free(evec);
    free(ivec);


    
    // a list of indices and edge lengths
	int* edgeIndices = new int[edgeCount*eigenCount];
	int* edgeIndicesD1 = new int[edgeCount*eigenCount];
	int* edgeIndicesD2 = new int[edgeCount*eigenCount];
	for(int i=0; i<edgeCount*eigenCount; i++) {
        edgeIndices[i]=i;
        edgeIndicesD1[i]=(edgeCount*eigenCount)+i;
        edgeIndicesD2[i]=2*(edgeCount*eigenCount)+i;
    }
	double* edgeLengths = new double[edgeCount];
	for(int i=0; i<edgeCount; i++) {
        edgeLengths[i]=gt_rand() / (double) GT_RAND_MAX;
    }
    
    // create a list of partial likelihood update operations
    // the order is [dest, destScaling, source1, matrix1, source2, matrix2]
	int* operations = new int[(internalCount)*BEAGLE_OP_COUNT*eigenCount];
    int* scalingFactorsIndices = new int[(internalCount)*eigenCount]; // internal nodes
	for(int i=0; i<internalCount*eigenCount; i++){
		operations[BEAGLE_OP_COUNT*i+0] = ntaxa+i;
        operations[BEAGLE_OP_COUNT*i+1] = (dynamicScaling ? i : BEAGLE_OP_NONE);
        operations[BEAGLE_OP_COUNT*i+2] = (dynamicScaling ? i : BEAGLE_OP_NONE);
        
        int child1Index;
        if (((i % internalCount)*2) < ntaxa)
            child1Index = (i % internalCount)*2;
        else
            child1Index = i*2 - internalCount * (int)(i / internalCount);
        operations[BEAGLE_OP_COUNT*i+3] = child1Index;
        operations[BEAGLE_OP_COUNT*i+4] = child1Index;

        int child2Index;
        if (((i % internalCount)*2+1) < ntaxa)
            child2Index = (i % internalCount)*2+1;
        else
            child2Index = i*2+1 - internalCount * (int)(i / internalCount);
		operations[BEAGLE_OP_COUNT*i+5] = child2Index;
		operations[BEAGLE_OP_COUNT*i+6] = child2Index;

        scalingFactorsIndices[i] = i;
        
//        printf("i %d dest %d c1 %d c2 %d\n", i, ntaxa+i, child1Index, child2Index);
        
        if (autoScaling)
            scalingFactorsIndices[i] += ntaxa;
	}	

    int* rootIndices = new int[eigenCount];
	int* lastTipIndices = new int[eigenCount];
    int* categoryWeightsIndices = new int[eigenCount];
    int* stateFrequencyIndices = new int[eigenCount];
    int* cumulativeScalingFactorIndices = new int[eigenCount];
    
    for (int eigenIndex=0; eigenIndex < eigenCount; eigenIndex++) {
        rootIndices[eigenIndex] = ntaxa+(internalCount*(eigenIndex+1))-1;//ntaxa*2-2;
        lastTipIndices[eigenIndex] = ntaxa-1;
        categoryWeightsIndices[eigenIndex] = eigenIndex;
        stateFrequencyIndices[eigenIndex] = 0;
        cumulativeScalingFactorIndices[eigenIndex] = ((manualScaling || dynamicScaling) ? (scaleCount*eigenCount-1)-eigenCount+eigenIndex+1 : BEAGLE_OP_NONE);
        
        if (dynamicScaling)
            beagleResetScaleFactors(instance, cumulativeScalingFactorIndices[eigenIndex]);
    }

    // start timing!
	struct timeval time1, time2, time3, time4, time5;
    double bestTimeUpdateTransitionMatrices, bestTimeUpdatePartials, bestTimeAccumulateScaleFactors, bestTimeCalculateRootLogLikelihoods, bestTimeTotal;
    
    double logL = 0.0;
    double deriv1 = 0.0;
    double deriv2 = 0.0;
    
    double previousLogL = 0.0;
    double previousDeriv1 = 0.0;
    double previousDeriv2 = 0.0;

    for (int i=0; i<nreps; i++){
        if (manualScaling && (!(i % rescaleFrequency) || !((i-1) % rescaleFrequency))) {
            for(int j=0; j<internalCount*eigenCount; j++){
                operations[BEAGLE_OP_COUNT*j+1] = (((manualScaling && !(i % rescaleFrequency))) ? j : BEAGLE_OP_NONE);
                operations[BEAGLE_OP_COUNT*j+2] = (((manualScaling && (i % rescaleFrequency))) ? j : BEAGLE_OP_NONE);
            }
        }
        
        gettimeofday(&time1,NULL);

        for (int eigenIndex=0; eigenIndex < eigenCount; eigenIndex++) {
            if (!setmatrix) {
                // tell BEAGLE to populate the transition matrices for the above edge lengths
                beagleUpdateTransitionMatrices(instance,     // instance
                                               eigenIndex,             // eigenIndex
                                               &edgeIndices[eigenIndex*edgeCount],   // probabilityIndices
                                               (calcderivs ? &edgeIndicesD1[eigenIndex*edgeCount] : NULL), // firstDerivativeIndices
                                               (calcderivs ? &edgeIndicesD2[eigenIndex*edgeCount] : NULL), // secondDerivativeIndices
                                               edgeLengths,   // edgeLengths
                                               edgeCount);            // count
            } else {
                double* inMatrix = new double[stateCount*stateCount*rateCategoryCount];
                for (int matrixIndex=0; matrixIndex < edgeCount; matrixIndex++) {
                    for(int z=0;z<rateCategoryCount;z++){
                        for(int x=0;x<stateCount;x++){
                            for(int y=0;y<stateCount;y++){
                                inMatrix[z*stateCount*stateCount + x*stateCount + y] = gt_rand() / (double) GT_RAND_MAX;
                            }
                        } 
                    }
                    beagleSetTransitionMatrix(instance, edgeIndices[eigenIndex*edgeCount + matrixIndex], inMatrix, 1);
                    if (calcderivs) {
                        beagleSetTransitionMatrix(instance, edgeIndicesD1[eigenIndex*edgeCount + matrixIndex], inMatrix, 0);
                        beagleSetTransitionMatrix(instance, edgeIndicesD2[eigenIndex*edgeCount + matrixIndex], inMatrix, 0);
                    }
                }
            }
        }

        gettimeofday(&time2, NULL);
        
        // update the partials
        beagleUpdatePartials( instance,      // instance
                        (BeagleOperation*)operations,     // eigenIndex
                        internalCount*eigenCount,              // operationCount
                        (dynamicScaling ? internalCount : BEAGLE_OP_NONE));             // cumulative scaling index

        gettimeofday(&time3, NULL);

        int scalingFactorsCount = internalCount;
                
        for (int eigenIndex=0; eigenIndex < eigenCount; eigenIndex++) {
            if (manualScaling && !(i % rescaleFrequency)) {
                beagleResetScaleFactors(instance,
                                        cumulativeScalingFactorIndices[eigenIndex]);
                
                beagleAccumulateScaleFactors(instance,
                                       &scalingFactorsIndices[eigenIndex*internalCount],
                                       scalingFactorsCount,
                                       cumulativeScalingFactorIndices[eigenIndex]);
            } else if (autoScaling) {
                beagleAccumulateScaleFactors(instance, &scalingFactorsIndices[eigenIndex*internalCount], scalingFactorsCount, BEAGLE_OP_NONE);
            }
        }
        
        gettimeofday(&time4, NULL);
                
        // calculate the site likelihoods at the root node
        if (!unrooted) {
            beagleCalculateRootLogLikelihoods(instance,               // instance
                                        rootIndices,// bufferIndices
                                        categoryWeightsIndices,                // weights
                                        stateFrequencyIndices,                 // stateFrequencies
                                        cumulativeScalingFactorIndices,
                                        eigenCount,                      // count
                                        &logL);         // outLogLikelihoods
        } else {
            // calculate the site likelihoods at the root node
            beagleCalculateEdgeLogLikelihoods(instance,               // instance
                                              rootIndices,// bufferIndices
                                              lastTipIndices,
                                              lastTipIndices,
                                              (calcderivs ? edgeIndicesD1 : NULL),
                                              (calcderivs ? edgeIndicesD2 : NULL),
                                              categoryWeightsIndices,                // weights
                                              stateFrequencyIndices,                 // stateFrequencies
                                              cumulativeScalingFactorIndices,
                                              eigenCount,                      // count
                                              &logL,    // outLogLikelihood
                                              (calcderivs ? &deriv1 : NULL),
                                              (calcderivs ? &deriv2 : NULL));
        }
        // end timing!
        gettimeofday(&time5,NULL);
        
        if (i == 0 || getTimeDiff(time1, time2) < bestTimeUpdateTransitionMatrices)
            bestTimeUpdateTransitionMatrices = getTimeDiff(time1, time2);
        if (i == 0 || getTimeDiff(time2, time3) < bestTimeUpdatePartials)
            bestTimeUpdatePartials = getTimeDiff(time2, time3);
        if (i == 0 || getTimeDiff(time3, time4) < bestTimeAccumulateScaleFactors)
            bestTimeAccumulateScaleFactors = getTimeDiff(time3, time4);
        if (i == 0 || getTimeDiff(time4, time5) < bestTimeUpdateTransitionMatrices)
            bestTimeCalculateRootLogLikelihoods = getTimeDiff(time4, time5);
        if (i == 0 || getTimeDiff(time1, time5) < bestTimeTotal)
            bestTimeTotal = getTimeDiff(time1, time5);
        
        if (!(logL - logL == 0.0))
            abort("error: invalid lnL");
        
        if (i > 0 && abs(logL - previousLogL) > MAX_DIFF)
            abort("error: large lnL difference between reps");
        
        if (calcderivs) {
            if (!(deriv1 - deriv1 == 0.0) || !(deriv2 - deriv2 == 0.0))
                abort("error: invalid deriv");
            
            if (i > 0 && ((abs(deriv1 - previousDeriv1) > MAX_DIFF) || (abs(deriv2 - previousDeriv2) > MAX_DIFF)) )
                abort("error: large deriv difference between reps");
        }

        previousLogL = logL;
        previousDeriv1 = deriv1;
        previousDeriv2 = deriv2;        
    }

    if (resource == 0) {
        cpuTimeUpdateTransitionMatrices = bestTimeUpdateTransitionMatrices;
        cpuTimeUpdatePartials = bestTimeUpdatePartials;
        cpuTimeAccumulateScaleFactors = bestTimeAccumulateScaleFactors;
        cpuTimeCalculateRootLogLikelihoods = bestTimeCalculateRootLogLikelihoods;
        cpuTimeTotal = bestTimeTotal;
    }
    
    if (!calcderivs)
        fprintf(stdout, "logL = %.5f \n", logL);
    else
        fprintf(stdout, "logL = %.5f d1 = %.5f d2 = %.5f\n", logL, deriv1, deriv2);
    
    std::cout.setf(std::ios::showpoint);
    std::cout.setf(std::ios::floatfield, std::ios::fixed);
    int timePrecision = 6;
    int speedupPrecision = 2;
    int percentPrecision = 2;
	std::cout << "best run: ";
    printTiming(bestTimeTotal, timePrecision, resource, cpuTimeTotal, speedupPrecision, 0, 0, 0);
    if (fullTiming) {
        std::cout << " transMats:  ";
        printTiming(bestTimeUpdateTransitionMatrices, timePrecision, resource, cpuTimeUpdateTransitionMatrices, speedupPrecision, 1, bestTimeTotal, percentPrecision);
        std::cout << " partials:   ";
        printTiming(bestTimeUpdatePartials, timePrecision, resource, cpuTimeUpdatePartials, speedupPrecision, 1, bestTimeTotal, percentPrecision);
        if (manualScaling || autoScaling) {
            std::cout << " accScalers: ";
            printTiming(bestTimeAccumulateScaleFactors, timePrecision, resource, cpuTimeAccumulateScaleFactors, speedupPrecision, 1, bestTimeTotal, percentPrecision);
        }
        std::cout << " rootLnL:    ";
        printTiming(bestTimeCalculateRootLogLikelihoods, timePrecision, resource, cpuTimeCalculateRootLogLikelihoods, speedupPrecision, 1, bestTimeTotal, percentPrecision);
    }
    std::cout << "\n";
    
	beagleFinalizeInstance(instance);
}