Esempio n. 1
0
void llsolve(Matrix A, Vector x)
{
  int one=1;
  int info;
  char uplo='U';
  dpotrf(&uplo,&x->len,A->data[0],&x->len,&info);
  dpotrs(&uplo,&x->len,&one,A->data[0],&x->len,x->data,&x->len,&info);
}
Esempio n. 2
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{                                        /* Matlab call: X = solve_chol(A, B) */
  doublereal *C;
  integer n, m, q;

  if (nrhs != 2 || nlhs > 1)                                   /* check input */
    mexErrMsgTxt("Usage: X = solve_chol(A, B)");
  n = mxGetM(prhs[0]);                    /* number of rows in inputs A and B */
  if (n != mxGetN(prhs[0]))
    mexErrMsgTxt("First argument matrix must be square.");
  if (n != mxGetM(prhs[1]))
    mexErrMsgTxt("Both argument matrices must have the same number of rows.");
  m = mxGetN(prhs[1]);                  /* number of colums in second input B */

  plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL);         /* space for output X */
  C = mxGetPr(plhs[0]);

  if (n == 0) return;             /* if argument was empty matrix, do no more */
  memcpy( C, mxGetPr(prhs[1]), m*n*mxGetElementSize(plhs[0]) );  /* copy data */
  dpotrs("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q);       /* solve system */
  if (q < 0) mexErrMsgTxt("Error: illegal input to solve_chol");
}
Esempio n. 3
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
  double *C;
  mwSignedIndex n, m, q;

  if (nrhs != 2 || nlhs > 1)                              /* check the input */
    mexErrMsgTxt("Usage: X = solve_chol(R, B)");
  n = mxGetN(prhs[0]);
  if (n != mxGetM(prhs[0]))
    mexErrMsgTxt("Error: First argument matrix must be square");
  if (n != mxGetM(prhs[1]))
    mexErrMsgTxt("Error: First and second argument matrices must have same number of rows");
  m = mxGetN(prhs[1]);

  plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL); /* allocate space for output */
  C = mxGetPr(plhs[0]);

  if (n==0) return;              /* if argument was empty matrix, do no more */
    memcpy(C,mxGetPr(prhs[1]),n*m*sizeof(double));   /* copy argument matrix */
    dpotrs("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q);   /* solve system */
  if (q > 0)
    mexErrMsgTxt("Error: illegal input to solve_chol");
}
Esempio n. 4
0
void vHRedLinearLogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context)
{

	int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps+1;
	double Fitparams[numfit];
	double *EFAC;
	double EQUAD, redamp, redalpha;
	int pcount=0;

// 	printf("here1\n");
	for(int p=0;p<ndim;p++){
// 		printf("param %i %g %g\n",p,((MNStruct *)context)->Dpriors[p][0],((MNStruct *)context)->Dpriors[p][1]);
		Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0];
	}
// 	printf("here1.5\n");
	for(int p=0;p < numfit; p++){
		Fitparams[p]=Cube[p];
		pcount++;
// 		printf("param: %i %g \n",p,Fitparams[p]);
	}

	if(((MNStruct *)context)->numFitEFAC == 0){
		EFAC=new double[1];
		EFAC[0]=1;
// 		
	}
	else if(((MNStruct *)context)->numFitEFAC == 1){
		EFAC=new double[1];
		EFAC[0]=Cube[pcount];
		pcount++;
		
	}
	else if(((MNStruct *)context)->numFitEFAC > 1){
		EFAC=new double[((MNStruct *)context)->numFitEFAC];
		for(int p=0;p< ((MNStruct *)context)->numFitEFAC; p++){
			EFAC[p]=Cube[pcount];
			pcount++;
		}
	}				

	if(((MNStruct *)context)->numFitEQUAD == 0){
		EQUAD=0;
// 		printf("EQUAD: %g \n",EQUAD);
	}
	else{
		
		EQUAD=pow(10.0,2*Cube[pcount]);
		pcount++;
//		printf("E: %g %g \n",EQUAD,EFAC[0]);

	}


	redamp=Cube[pcount];
	pcount++;
	redalpha=Cube[pcount];
	pcount++;
  	

	double *Fitvec=new double[((MNStruct *)context)->pulse->nobs];
	double *Diffvec=new double[((MNStruct *)context)->pulse->nobs];
	dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N');
	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Diffvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o];
	}


	double secday=24*60*60;
	double LongestPeriod=1.0/pow(10.0,-5);
	double flo=1.0/LongestPeriod;

	double modelalpha=redalpha;
	double gwamp=pow(10.0,redamp);
	double gwampsquared=gwamp*gwamp*(pow((365.25*secday),2)/(12*M_PI*M_PI))*(pow(365.25,(1-modelalpha)))/(pow(flo,(modelalpha-1)));

	double timdiff=0;

	double covconst=gsl_sf_gamma(1-modelalpha)*sin(0.5*M_PI*modelalpha);
// 	printf("constants: %g %g \n",gwampsquared,covconst);


	
	double **CovMatrix = new double*[((MNStruct *)context)->pulse->nobs]; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs;o1++)CovMatrix[o1]=new double[((MNStruct *)context)->pulse->nobs];

	for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){

		for(int o2=0;o2<((MNStruct *)context)->pulse->nobs; o2++){
			timdiff=((MNStruct *)context)->pulse->obsn[o1].bat-((MNStruct *)context)->pulse->obsn[o2].bat;	
			double tau=2.0*M_PI*fabs(timdiff);
			double covsum=0;

			for(int k=0; k <=10; k++){
				covsum=covsum+pow(-1.0,k)*(pow(flo*tau,2*k))/(iter_factorial(2*k)*(2*k+1-modelalpha));

			}

			CovMatrix[o1][o2]=gwampsquared*(covconst*pow((flo*tau),(modelalpha-1)) - covsum);
// 			printf("%i %i %g %g %g\n",o1,o2,CovMatrix[o1][o2],fabs(timdiff),covsum);

			if(o1==o2){
				CovMatrix[o1][o2] += pow(((((MNStruct *)context)->pulse->obsn[o1].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o1]],2) + EQUAD;
			}

		}
	}

	double covdet=0;
	double *WorkDiffvec = new double[((MNStruct *)context)->pulse->nobs];
	for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){
		WorkDiffvec[o1]=Diffvec[o1];
	}
	dpotrf(CovMatrix, ((MNStruct *)context)->pulse->nobs, covdet);
        dpotrs(CovMatrix, WorkDiffvec, ((MNStruct *)context)->pulse->nobs);


	double Chisq=0;


	for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){
		Chisq += Diffvec[o1]*WorkDiffvec[o1];
	}

	if(isnan(covdet) || isinf(covdet) || isnan(Chisq) || isinf(Chisq)){

		lnew=-pow(10.0,200);
// 		printf("red amp and alpha %g %g\n",redamp,redalpha);
// 		printf("Like: %g %g %g \n",lnew,Chisq,covdet);
		
	}
	else{
		lnew = -0.5*(((MNStruct *)context)->pulse->nobs*log(2*M_PI) + covdet + Chisq);	
// 		printf("red amp and alpha %g %g\n",redamp,redalpha);


	}
// 	endClock = clock();
// //   	printf("Finishing off: time taken = %.2f (s)\n",(endClock-startClock)/(float)CLOCKS_PER_SEC);
	

	delete[] EFAC;
	for(int o=0;o<((MNStruct *)context)->pulse->nobs;o++){delete[] CovMatrix[o];}
	delete[] CovMatrix;
	delete[] WorkDiffvec;
	delete[] Diffvec;
	delete[] Fitvec;
	printf("Like: %g %g %g \n",lnew,Chisq,covdet);

}
Esempio n. 5
0
void WhiteMarginLinearLogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context)
{

	int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps+1;
	double Fitparams[numfit];
	double *EFAC;
	double EQUAD;
	int pcount=0;
	
// 	printf("here1\n");
	for(int p=0;p<ndim;p++){
// 		printf("param %i %g %g\n",p,((MNStruct *)context)->Dpriors[p][0],((MNStruct *)context)->Dpriors[p][1]);
		Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0];
	}
// 	printf("here1.5\n");
	for(int p=0;p < numfit; p++){
		Fitparams[p]=Cube[p];
		pcount++;
// 		printf("param: %i %g \n",p,Fitparams[p]);
	}

// 	printf("here3\n");
	if(((MNStruct *)context)->numFitEFAC == 0){
		EFAC=new double[1];
		EFAC[0]=1;
// 		
	}
	else if(((MNStruct *)context)->numFitEFAC == 1){
		EFAC=new double[1];
		EFAC[0]=Cube[pcount];
		pcount++;
	}
	else if(((MNStruct *)context)->numFitEFAC > 1){
		EFAC=new double[((MNStruct *)context)->numFitEFAC];
		for(int p=0;p< ((MNStruct *)context)->numFitEFAC; p++){
			EFAC[p]=Cube[pcount];
			pcount++;
		}
	}				
// 	printf("here4\n");
	if(((MNStruct *)context)->numFitEQUAD == 0){
		EQUAD=0;
// 		printf("E: %g %g\n",EFAC[0],EQUAD);
	}
	else{
		
		EQUAD=pow(10.0,2*Cube[pcount]);
		pcount++;

	}

	double *Fitvec=new double[((MNStruct *)context)->pulse->nobs];
	double *Diffvec=new double[((MNStruct *)context)->pulse->nobs];
	dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N');
	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Diffvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o];
	}


	double *Noise=new double[((MNStruct *)context)->pulse->nobs];
	double *GDiffvec=new double[((MNStruct *)context)->Gsize];

	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Noise[o]=pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o]],2) + EQUAD;
	}

	double **NG = new double*[((MNStruct *)context)->pulse->nobs]; for (int k=0; k<((MNStruct *)context)->pulse->nobs; k++) NG[k] = new double[((MNStruct *)context)->Gsize];
	for(int i=0;i<((MNStruct *)context)->pulse->nobs;i++){
		for(int j=0;j<((MNStruct *)context)->Gsize; j++){
			NG[i][j]=((MNStruct *)context)->GMatrix[i][j]*Noise[i];

		}
	}

	double** GG = new double*[((MNStruct *)context)->Gsize]; for (int k=0; k<((MNStruct *)context)->Gsize; k++) GG[k] = new double[((MNStruct *)context)->Gsize];

	dgemm(((MNStruct *)context)->GMatrix, NG,GG,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, 'T','N');


	dgemv(((MNStruct *)context)->GMatrix,Diffvec,GDiffvec,((MNStruct *)context)->pulse->nobs,((MNStruct *)context)->Gsize,'T');



	double detN=0;
	double *WorkGDiffvec = new double[((MNStruct *)context)->Gsize];
	for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){
		WorkGDiffvec[o1]=GDiffvec[o1];
	}
	dpotrf(GG, ((MNStruct *)context)->Gsize, detN);
        dpotrs(GG, WorkGDiffvec, ((MNStruct *)context)->Gsize);



	double Chisq=0;
	for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){
		Chisq += GDiffvec[o1]*WorkGDiffvec[o1];
	}


	if(isnan(detN) || isinf(detN) || isnan(Chisq) || isinf(Chisq)){

		lnew=-pow(10.0,200);
	
	}
	else{
		lnew = -0.5*(((MNStruct *)context)->pulse->nobs*log(2*M_PI) + detN + Chisq);	
	}

	//printf("lnew: %g %g %g \n", lnew, detN, Chisq);

	delete[] EFAC;
	delete[] Fitvec;
	delete[] Diffvec;
	delete[] GDiffvec;
	delete[] WorkGDiffvec;

	for (int j = 0; j < ((MNStruct *)context)->pulse->nobs; j++){
		delete[] NG[j];
	}
	delete[] NG;

	for (int j = 0; j < ((MNStruct *)context)->Gsize; j++){
		delete[]GG[j];
	}
	delete[] GG;


}
Esempio n. 6
0
void LRedGPULogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context)
{
	//printf("hereNM");
	clock_t startClock,endClock;

	double *EFAC;
	double *EQUAD;
	int pcount=0;

	int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps;
	long double LDparams[numfit];
	double Fitparams[numfit];
	double *Resvec=new double[((MNStruct *)context)->pulse->nobs];

	for(int p=0;p<ndim;p++){

		Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0];
	}


	if(((MNStruct *)context)->doLinear==0){
	
		for(int p=0;p< ((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps; p++){
			LDparams[p]=Cube[p]*(((MNStruct *)context)->LDpriors[p][1]) + (((MNStruct *)context)->LDpriors[p][0]);
		}

		double phase=(double)LDparams[0];
		pcount++;
		for(int p=1;p<((MNStruct *)context)->numFitTiming;p++){
			((MNStruct *)context)->pulse->param[((MNStruct *)context)->TempoFitNums[p][0]].val[((MNStruct *)context)->TempoFitNums[p][1]] = LDparams[pcount];	
			pcount++;
		}
		for(int p=0;p<((MNStruct *)context)->numFitJumps;p++){
			((MNStruct *)context)->pulse->jumpVal[((MNStruct *)context)->TempoJumpNums[p]]= LDparams[pcount];
			pcount++;
		}
		
		if(((MNStruct *)context)->pulse->param[param_dmmodel].fitFlag[0] == 1){
			int DMnum=((MNStruct *)context)->pulse[0].dmoffsDMnum;
			for(int i =0; i < DMnum; i++){
				((MNStruct *)context)->pulse[0].dmoffsDM[i]=Cube[ndim-DMnum+i];
			}
		}
		
		
		fastformBatsAll(((MNStruct *)context)->pulse,((MNStruct *)context)->numberpulsars);       /* Form Barycentric arrival times */
		formResiduals(((MNStruct *)context)->pulse,((MNStruct *)context)->numberpulsars,1);       /* Form residuals */
		
		for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
			Resvec[o]=(double)((MNStruct *)context)->pulse->obsn[o].residual+phase;
		}
	
	}
	else if(((MNStruct *)context)->doLinear==1){
	
		for(int p=0;p < numfit; p++){
			Fitparams[p]=Cube[p];
			pcount++;
		}
		
		double *Fitvec=new double[((MNStruct *)context)->pulse->nobs];

		dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N');
		for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
			Resvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o];
		}
		
		delete[] Fitvec;
	}

	if(((MNStruct *)context)->incStep > 0){
		for(int i = 0; i < ((MNStruct *)context)->incStep; i++){
			double StepAmp = Cube[pcount];
			pcount++;
			double StepTime = Cube[pcount];
			pcount++;
			for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){
				double time = (double)((MNStruct *)context)->pulse->obsn[o1].bat;
				if(time > StepTime){
					Resvec[o1] += StepAmp;
				}
			}
		}
	}

	if(((MNStruct *)context)->numFitEFAC == 0){
		EFAC=new double[((MNStruct *)context)->systemcount];
		for(int o=0;o<((MNStruct *)context)->systemcount; o++){
			EFAC[o]=1;
		}
	}
	else if(((MNStruct *)context)->numFitEFAC == 1){
		EFAC=new double[((MNStruct *)context)->systemcount];
		for(int o=0;o<((MNStruct *)context)->systemcount; o++){
			EFAC[o]=Cube[pcount];
		}
		pcount++;
		
	}
	else if(((MNStruct *)context)->numFitEFAC > 1){
		EFAC=new double[((MNStruct *)context)->systemcount];
		for(int p=0;p< ((MNStruct *)context)->systemcount; p++){
			EFAC[p]=Cube[pcount];
			pcount++;
		}
	}				

	if(((MNStruct *)context)->numFitEQUAD == 0){
		EQUAD=new double[((MNStruct *)context)->systemcount];
		for(int o=0;o<((MNStruct *)context)->systemcount; o++){
			EQUAD[o]=0;
		}
	}
	else if(((MNStruct *)context)->numFitEQUAD == 1){
		EQUAD=new double[((MNStruct *)context)->systemcount];
                for(int o=0;o<((MNStruct *)context)->systemcount; o++){
			EQUAD[o]=pow(10.0,2*Cube[pcount]);
		}
		pcount++;
	}
	else if(((MNStruct *)context)->numFitEQUAD > 1){
                EQUAD=new double[((MNStruct *)context)->systemcount];
                for(int o=0;o<((MNStruct *)context)->systemcount; o++){
                        EQUAD[o]=pow(10.0,2*Cube[pcount]);
			pcount++;
                }
        }

  	
        int FitRedCoeff=2*(((MNStruct *)context)->numFitRedCoeff);
		int FitDMCoeff=2*(((MNStruct *)context)->numFitDMCoeff);

        int totCoeff=0;
        if(((MNStruct *)context)->incRED != 0)totCoeff+=FitRedCoeff;
        if(((MNStruct *)context)->incDM != 0)totCoeff+=FitDMCoeff;

        double *powercoeff=new double[totCoeff];
        for(int o=0;o<totCoeff; o++){
                powercoeff[o]=0;
        }

	double *WorkNoise=new double[((MNStruct *)context)->pulse->nobs];

	double tdet=0;
	double timelike=0;



	double *BATvec=new double[((MNStruct *)context)->pulse->nobs];

	if(((MNStruct *)context)->whitemodel == 0){
		for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
			//	printf("Noise %i %g %g %g\n",m1,Noise[m1],EFAC[flagList[m1]],EQUAD);
				WorkNoise[o]=pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o]],2) + EQUAD[((MNStruct *)context)->sysFlags[o]];
				
				tdet=tdet+log(WorkNoise[o]);
				WorkNoise[o]=1.0/WorkNoise[o];
				timelike=timelike+pow(Resvec[o],2)*WorkNoise[o];
	
				BATvec[o]=(double)((MNStruct *)context)->pulse->obsn[o].bat;
	
	
		}
	}
	else if(((MNStruct *)context)->whitemodel == 1){
		for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
			//	printf("Noise %i %g %g %g\n",m1,Noise[m1],EFAC[flagList[m1]],EQUAD);
				WorkNoise[o]=EFAC[((MNStruct *)context)->sysFlags[o]]*EFAC[((MNStruct *)context)->sysFlags[o]]*(pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6)),2) + EQUAD[((MNStruct *)context)->sysFlags[o]]);
				
				tdet=tdet+log(WorkNoise[o]);
				WorkNoise[o]=1.0/WorkNoise[o];
				timelike=timelike+pow(Resvec[o],2)*WorkNoise[o];
	
				BATvec[o]=(double)((MNStruct *)context)->pulse->obsn[o].bat;
	
	
		}
	}

	double *NFd = new double[totCoeff];
        double **FNF=new double*[totCoeff];
        for(int i=0;i<totCoeff;i++){
                FNF[i]=new double[totCoeff];
        }


	double start,end;
	int go=0;
	for (int i=0;i<((MNStruct *)context)->pulse->nobs;i++)
	  {
	    if (((MNStruct *)context)->pulse->obsn[i].deleted==0)
	      {
		if (go==0)
		  {
		    go = 1;
		    start = (double)((MNStruct *)context)->pulse->obsn[i].bat;
		    end  = start;
		  }
		else
		  {
		    if (start > (double)((MNStruct *)context)->pulse->obsn[i].bat)
		      start = (double)((MNStruct *)context)->pulse->obsn[i].bat;
		    if (end < (double)((MNStruct *)context)->pulse->obsn[i].bat)
		      end = (double)((MNStruct *)context)->pulse->obsn[i].bat;
		  }
	      }
	  }

	double maxtspan=end-start;


	double *freqs = new double[totCoeff];

        double *DMVec=new double[((MNStruct *)context)->pulse->nobs];
        double DMKappa = 2.410*pow(10.0,-16);
        int startpos=0;
        double freqdet=0;
	

        if(((MNStruct *)context)->incRED==2){
        
        	if(((MNStruct *)context)->incFloatRed == 0){
                for (int i=0; i<FitRedCoeff/2; i++){
                        int pnum=pcount;
                        double pc=Cube[pcount];
                        freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[i]/maxtspan;
                        freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i];

                        powercoeff[i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[i+FitRedCoeff/2]=powercoeff[i];
                        freqdet=freqdet+2*log(powercoeff[i]);
                        pcount++;
                }
            }
            else if(((MNStruct *)context)->incFloatRed >0){

                for (int i=0; i<FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed ; i++){
                
                		int pnum=pcount;
                        double pc=Cube[pcount];
                        freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[i]/maxtspan;
                        freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i];

                        powercoeff[i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[i+FitRedCoeff/2]=powercoeff[i];
                        freqdet=freqdet+2*log(powercoeff[i]);
                        pcount++;
                }
                
                 for (int i=FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed; i<FitRedCoeff/2; i++){
                		//printf("Freq: %g \n", Cube[pcount]);
                		
                        freqs[startpos+i]=Cube[pcount]/maxtspan;
                        freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i];
                         pcount++;
                         
                        int pnum=pcount;
                        double pc=Cube[pcount];
                        pcount++;
                        

                        powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[startpos+i+FitRedCoeff/2]=powercoeff[startpos+i];
                        freqdet=freqdet+2*log(powercoeff[startpos+i]);
                       
                }
                
            }
                
                
                startpos=FitRedCoeff;

        }
        else if(((MNStruct *)context)->incRED==3){

			freqdet=0;
			
			for(int pl = 0; pl < ((MNStruct *)context)->numFitRedPL; pl ++){
			
                double redamp=Cube[pcount];
                pcount++;
                double redindex=Cube[pcount];
                pcount++;

                
   			   redamp=pow(10.0, redamp);

				for (int i=0; i<FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed ; i++){

		                freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[i]/maxtspan;
		                freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i];
		                
		                double PLcomp=redamp*redamp*pow((freqs[i]*365.25),-1.0*redindex)/(maxtspan*24*60*60);

		                powercoeff[i]+= PLcomp;
		                powercoeff[i+FitRedCoeff/2]+= PLcomp;
				}
			}
				
				
			for (int i=0; i<FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed ; i++){
				freqdet=freqdet+2*log(powercoeff[i]);
		//		printf("%i %g %g \n",i,powercoeff[i], freqdet);
			}

                 for (int i=FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed; i<FitRedCoeff/2; i++){
                                
                    //    Cube[pcount]=floor(Cube[pcount]);
                        freqs[startpos+i]=Cube[pcount]/maxtspan;
                        freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i];
                        pcount++;

                        int pnum=pcount;
                        double pc=Cube[pcount];
                        pcount++;


                        powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[startpos+i+FitRedCoeff/2]=powercoeff[startpos+i];
                        freqdet=freqdet+2*log(powercoeff[startpos+i]);

                }

                startpos=FitRedCoeff;

        }
// 		printf("DM\n");
        double nlist[((MNStruct *)context)->incFloatDM][2];
	if(((MNStruct *)context)->incDM==2){
        
        	if(((MNStruct *)context)->incFloatDM == 0){

                for (int i=0; i<FitDMCoeff/2; i++){
                        int pnum=pcount;
                        double pc=Cube[pcount];
                        freqs[startpos+i]=((MNStruct *)context)->sampleFreq[startpos/2 - ((MNStruct *)context)->incFloatRed+i]/maxtspan;
                        freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i];

                        powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i];
                        freqdet=freqdet+2*log(powercoeff[startpos+i]);
                        pcount++;
                }
            }
            else if(((MNStruct *)context)->incFloatDM >0){

                for (int i=0; i<FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i++){
                        int pnum=pcount;
                        double pc=Cube[pcount];
                        freqs[startpos+i]=((MNStruct *)context)->sampleFreq[startpos/2 - ((MNStruct *)context)->incFloatRed +i]/maxtspan;
                        freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i];

                        powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i];
                        freqdet=freqdet+2*log(powercoeff[startpos+i]);
                        pcount++;
                }
                
                for (int i=FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM; i<FitDMCoeff/2; i++){

                        freqs[startpos+i]=Cube[pcount]/maxtspan;
                        freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i];

                         pcount++;
                         
                        int pnum=pcount;
                        double pc=Cube[pcount];
                        pcount++;
                        
						
                        
                        

                        powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i];
                        freqdet=freqdet+2*log(powercoeff[startpos+i]);
                       
                }




                
            }            

                for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
                        DMVec[o]=1.0/(DMKappa*pow((double)((MNStruct *)context)->pulse->obsn[o].freqSSB,2));
                }


        }
        else if(((MNStruct *)context)->incDM==3){
        
        
				for(int pl = 0; pl < ((MNStruct *)context)->numFitDMPL; pl ++){
			
		            double DMamp=Cube[pcount];
		            pcount++;
		            double DMindex=Cube[pcount];
		            pcount++;

		            DMamp=pow(10.0, DMamp);

					for (int i=0; i<FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i++){

				            freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[startpos/2 - ((MNStruct *)context)->incFloatRed +i]/maxtspan;
		                    freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i];
		                    
		                     double PLcomp=DMamp*DMamp*pow((freqs[startpos+i]*365.25),-1.0*DMindex)/(maxtspan*24*60*60);
				            
				            powercoeff[startpos+i]+=PLcomp;
		                    powercoeff[startpos+i+FitDMCoeff/2]+=PLcomp;
					}
				}
			
				for (int i=0; i<FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i++){
					freqdet=freqdet+2*log(powercoeff[startpos+i]);
//					printf("%i %g %g \n", i, powercoeff[startpos+i], freqdet);
				}
			



                 for (int i= FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i<FitDMCoeff/2; i++){

						//Cube[pcount]=floor(Cube[pcount]);
                        freqs[startpos+i]=Cube[pcount]/maxtspan;
                        freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i];
                        pcount++;

                        int pnum=pcount;
                        double pc=Cube[pcount];
                        pcount++;


                        powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60);
                        powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i];
                        freqdet=freqdet+2*log(powercoeff[startpos+i]);

                }

                for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
                        DMVec[o]=1.0/(DMKappa*pow((double)((MNStruct *)context)->pulse->obsn[o].freqSSB,2));
                }

        }
	LRedGPUWrapper_(freqs, Resvec, BATvec, DMVec, WorkNoise, FNF, NFd, ((MNStruct *)context)->pulse->nobs, FitRedCoeff, FitDMCoeff, totCoeff,((MNStruct *)context)->incRED, ((MNStruct *)context)->incDM);



        double **PPFM=new double*[totCoeff];
        for(int i=0;i<totCoeff;i++){
                PPFM[i]=new double[totCoeff];
                for(int j=0;j<totCoeff;j++){
                        PPFM[i][j]=0;
                }
        }

        for(int c1=0; c1<totCoeff; c1++){
                PPFM[c1][c1]=1.0/powercoeff[c1];
        }



        for(int j=0;j<totCoeff;j++){
                for(int k=0;k<totCoeff;k++){
                        PPFM[j][k]=PPFM[j][k]+FNF[j][k];
                }
        }
        
        double jointdet=0;
        double freqlike=0;
       double *WorkCoeff = new double[totCoeff];
       for(int o1=0;o1<totCoeff; o1++){
                WorkCoeff[o1]=NFd[o1];
        }



        int info=0;
        dpotrfInfo(PPFM, totCoeff, jointdet, info);
        dpotrs(PPFM, WorkCoeff, totCoeff);
        for(int j=0;j<totCoeff;j++){
                freqlike += NFd[j]*WorkCoeff[j];
        }
        lnew=-0.5*(((double)((MNStruct *)context)->pulse->nobs)*log(2.0*M_PI) + tdet+jointdet+freqdet+timelike-freqlike);


	if(isnan(lnew) || isinf(lnew)){

		lnew=-pow(10.0,200);
// 		printf("red amp and alpha %g %g\n",redamp,redalpha);

		
	}
 	//printf("Like: %g %g %g %g %g %g\n",lnew,jointdet,tdet,freqdet,timelike,freqlike);
	 //printf("CPULIKE: %g %g %g %g %g %g \n", lnew, jointdet,tdet,freqdet,timelike,freqlike);

	delete[] EFAC;
	delete[] EQUAD;
	delete[] WorkNoise;
	delete[] powercoeff;
	delete[] Resvec;
	delete[] BATvec;
	delete[] NFd;
	delete[] freqs;
	delete[] DMVec;
	delete[] WorkCoeff;

	for (int j = 0; j < totCoeff; j++){
		delete[] PPFM[j];
	}
	delete[] PPFM;



	for (int j = 0; j < totCoeff; j++){
		delete[] FNF[j];
	}
	delete[] FNF;


}
Esempio n. 7
0
int main(int argc, char **argv)
{
    #define test_A(i,j) test_A[(size_t)(j)*N+(i)]
    #define test_A2(i,j) test_A2[(size_t)(j)*N+(i)]
    int N,NB,w,LDA,BB;
    size_t memsize; //bytes
    int iam, nprocs, mydevice;
    int ICTXT, nprow, npcol, myprow, mypcol;
    int i_one = 1, i_zero = 0, i_negone = -1;
    double d_one = 1.0, d_zero = 0.0, d_negone = -1.0;
    int IASEED = 100;
/*  printf("N=?\n");
    scanf("%ld",&N);
    printf("NB=?\n");
    scanf("%d", &NB);
    printf("width of Y panel=?\n");
    scanf("%ld",&w);
*/
    if(argc < 4){
        printf("invalid arguments N NB memsize(M)\n");
        exit(1);
    }
    N = atoi(argv[1]);
    NB = atoi(argv[2]);
    memsize = (size_t)atoi(argv[3])*1024*1024;
    BB = (N + NB - 1) / NB;
    w = memsize/sizeof(double)/BB/NB/NB - 1;
    assert(w > 0);
    LDA = N + 0; //padding

    int do_io = (N <= NSIZE);
    double llttime;
    double gflops;
    
    nprow = npcol = 1;
    blacs_pinfo_(&iam, &nprocs);
    blacs_get_(&i_negone, &i_zero, &ICTXT);
    blacs_gridinit_(&ICTXT, "R", &nprow, &npcol);
    blacs_gridinfo_(&ICTXT, &nprow, &npcol, &myprow, &mypcol);
    #ifdef USE_MIC
        #ifdef __INTEL_OFFLOAD
            printf("offload compilation enabled\ninitialize each MIC\n");
            offload_init(&iam, &mydevice);
            #pragma offload target(mic:0)
            {
                mkl_peak_mem_usage(MKL_PEAK_MEM_ENABLE);
            }
        #else
            if(isroot)
                printf("offload compilation not enabled\n");
            exit(0);
        #endif
    #else
        #ifdef USE_CUBLASV2
        {
            cublasStatus_t cuStatus;
            for(int r = 0; r < OOC_NTHREADS; r++){
                cuStatus = cublasCreate(&worker_handle[r]);
                assert(cuStatus == CUBLAS_STATUS_SUCCESS);
            }
        }
        #else
            cublasInit();
        #endif
    #endif

    double *test_A = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for chol
#ifdef VERIFY
    double *test_A2 = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for verify
#endif
    
    /*Initialize A */
    int i,j;
    printf("Initialize A ... "); fflush(stdout);
    llttime = MPI_Wtime();
    pdmatgen(&ICTXT, "Symm", "Diag", &N,
         &N, &NB, &NB,
         test_A, &LDA, &i_zero, &i_zero,
         &IASEED, &i_zero, &N, &i_zero, &N,
         &myprow, &mypcol, &nprow, &npcol); 
    llttime = MPI_Wtime() - llttime;
    printf("time %lf\n", llttime);
              
    /*print test_A*/
    if(do_io){
        printf("Original A=\n\n");
        matprint(test_A, N, LDA, 'A');
    }

    /*Use directed unblocked Cholesky factorization*/    
    /*
    t1 = clock();
    Test_dpotrf(test_A2,N);
    t2 = clock();
    printf ("time for unblocked Cholesky factorization on host %f \n",
        ((float) (t2 - t1)) / CLOCKS_PER_SEC);
    */
    
    /*print test_A*/
    /*
    if(do_io){
        printf("Unblocked result:\n\n");
        matprint(test_A2,N,'L');   
    }
    */ 

    /*Use tile algorithm*/
    Quark *quark = QUARK_New(OOC_NTHREADS);
    QUARK_DOT_DAG_Enable(quark, 0);
    #ifdef USE_MIC
//      mklmem(NB);
        printf("QUARK MIC affinity binding\n");
        QUARK_bind(quark);
        printf("offload warm up\n");
        warmup(quark);
    #endif
    QUARK_DOT_DAG_Enable(quark, quark_getenv_int("QUARK_DOT_DAG_ENABLE", 0));
    printf("LLT start %lf\n", MPI_Wtime());
    llttime = Cholesky(quark,test_A,N,NB,LDA,memsize);
    printf("LLT end %lf\n", MPI_Wtime());
    QUARK_Delete(quark);
    #ifdef USE_MIC
        offload_destroy();
    #else
        #ifdef USE_CUBLASV2
        {
            cublasStatus_t cuStatus;
            for(int r = 0; r < OOC_NTHREADS; r++){ 
                cuStatus = cublasDestroy(worker_handle[r]);
                assert(cuStatus == CUBLAS_STATUS_SUCCESS);
            }
        }
        #else
            cublasShutdown();
        #endif
    #endif

    gflops = (double) N;
    gflops = gflops/3.0 + 0.5;
    gflops = gflops*(double)(N)*(double)(N);
    gflops = gflops/llttime/1024.0/1024.0/1024.0;
    printf ("N NB memsize(MB) quark_pthreads time Gflops\n%d %d %lf %d %lf %lf\n",
        N, NB, (double)memsize/1024/1024, OOC_NTHREADS, llttime, gflops);
    #ifdef USE_MIC
        #pragma offload target(mic:0)
        {
            memsize = mkl_peak_mem_usage(MKL_PEAK_MEM_RESET);
        }
        printf("mkl_peak_mem_usage %lf MB\n", (double)memsize/1024.0/1024.0);
    #endif

    /*Update and print L*/             
    if(do_io){
        printf("L:\n\n");
        matprint(test_A,N,LDA,'L');
    }
#ifdef VERIFY
    printf("Verify... ");
    llttime = MPI_Wtime();
  /*
   * ------------------------
   * check difference betwen 
   * test_A and test_A2
   * ------------------------
   */
    /*
    {
    double maxerr = 0;
    double maxerr2 = 0;

    for (j = 0; j < N; j++)
      {
        for (i = j; i < N; i++)
          {
            double err = (test_A (i, j) - test_A2 (i, j));
            err = ABS (err);
            maxerr = MAX (err, maxerr);
            maxerr2 = maxerr2 + err * err;
          };
      };
    maxerr2 = sqrt (ABS (maxerr2));
    printf ("max difference between test_A and test_A2 %lf \n", maxerr);
    printf ("L2 difference between test_A and test_A2 %lf \n", maxerr2);
    };
    */

  /*
   * ------------------
   * over-write test_A2
   * ------------------
   */
   
    pdmatgen(&ICTXT, "Symm", "Diag", &N,
         &N, &NB, &NB,
         test_A2, &LDA, &i_zero,
         &i_zero, &IASEED, &i_zero, &N, &i_zero, &N,
         &myprow, &mypcol, &nprow, &npcol);

  /*
   * ---------------------------------------
   * after solve, test_A2 should be identity
   * ---------------------------------------
   */
  // test_A = chol(B) = L;
  // test_A2 = B
  // solve L*L'*X = B
  // if L is correct, X is identity */
     
    {
    int uplo = 'L';
    const char *uplo_char = ((uplo == (int) 'U')
                    || (uplo == (int) 'u')) ? "U" : "L";
    int info = 0;
    int nrhs = N;
    int LDA = N;
    int ldb = N;
    dpotrs(uplo_char, &N, &nrhs, test_A, &LDA, test_A2, &ldb, &info);
    assert (info == 0);
    }

    {
    double maxerr = 0;
    double maxerr2 = 0;

    for (j = 0; j < N; j++)
      {
        for (i = 0; i < N; i++)
          {
            double eyeij = (i == j) ? 1.0 : 0.0;
            double err = (test_A2 (i, j) - eyeij);
            err = ABS (err);
            maxerr = MAX (maxerr, err);
            maxerr2 = maxerr2 + err * err;
          };
      };

    maxerr2 = sqrt (ABS (maxerr2));
    printf("time %lf\n", MPI_Wtime() - llttime);
    printf ("max error %lf \n", maxerr);
    printf ("max L2 error %lf \n", maxerr2);
    }
#endif

    free(test_A);test_A=NULL;
#ifdef VERIFY
    free(test_A2);test_A2=NULL;
#endif
    blacs_gridexit_(&ICTXT);
    blacs_exit_(&i_zero);
    return 0;
    #undef test_A
    #undef test_A2
}
// Sample factor vectors
// Function written from perspective of sampling user factor vectors with cross-topics
// Switch roles of user-item inputs to sample item factor vectors
void sampleTopicFactorVectors(uint32_t* items, double* resids, const mxArray* exampsByUser,
			      int KU, int KM, int numUsers, int numItems, double invSigmaSqd, 
			      ptrdiff_t numTopicFacs, double* LambdaU, double* muU, double* c, double* d, 
			      uint32_t* zU, uint32_t* zM){
   // Array of random number generators
   gsl_rng** rngs = getRngArray();  
 
   // Extract internals of jagged arrays
   uint32_t** userExamps;
   mwSize* userLens;
   unpackJagged(exampsByUser, &userExamps, &userLens, numUsers);

   ptrdiff_t numTopicFacsSqd = numTopicFacs*numTopicFacs;
   ptrdiff_t numTopicFacsTimesNumItems = numTopicFacs*numItems;
   ptrdiff_t numTopicFacsTimesNumUsers = numTopicFacs*numUsers;

   // BLAS constants
   char uplo[] = "U";
   char trans[] = "N";
   char diag[] = "N";
   ptrdiff_t oneInt = 1;
   double oneDbl = 1;
   double zeroDbl = 0;

   // Compute muBase = LambdaU*muU
   double* muBase = mxMalloc(numTopicFacs*sizeof(*muBase));
   dsymv(uplo, &numTopicFacs, &oneDbl, LambdaU, &numTopicFacs, muU, &oneInt, &zeroDbl, muBase, &oneInt);

   // Allocate memory for new mean and precision parameters
   double** muNew[MAX_NUM_THREADS];
   double** LambdaNew[MAX_NUM_THREADS];
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++){
      muNew[thread] = mxMalloc(KM*sizeof(**muNew));
      LambdaNew[thread] = mxMalloc(KM*sizeof(**LambdaNew));
      for(int i = 0; i < KM; i++){
	 muNew[thread][i] = mxMalloc(numTopicFacs*sizeof(***muNew));
	 LambdaNew[thread][i] = mxMalloc(numTopicFacsSqd*sizeof(***LambdaNew));
      }
   }

#pragma omp parallel for
   for(int u = 0; u < numUsers; u++){
      int thread = omp_get_thread_num();
      for(int i = 0; i < KM; i++){
	 // Initialize new mean to muBase
	 dcopy(&numTopicFacs, muBase, &oneInt, muNew[thread][i], &oneInt);
	 // Initialize new precision to LambdaU
	 dcopy(&numTopicFacsSqd, LambdaU, &oneInt, LambdaNew[thread][i], &oneInt);
      }

      // Iterate over user's examples
      mxArray* exampsArray = mxGetCell(exampsByUser, u);
      mwSize len = mxGetN(exampsArray);
      uint32_t* examps = (uint32_t*) mxGetData(exampsArray);
      for(int j = 0; j < len; j++){
	 uint32_t e = examps[j]-1;
	 int m = items[e]-1;
	 int userTop = zU[e]-1;
	 int itemTop = zM[e]-1;

	 // Item vector for this rated item
	 double* dVec = d + m*numTopicFacs + userTop*numTopicFacsTimesNumItems;

	 // Compute posterior sufficient statistics for factor vector
	 // Add resid * dVec/sigmaSqd to muNew
	 double resid = resids[e];
	 resid *= invSigmaSqd;
	 daxpy(&numTopicFacs, &resid, dVec, &oneInt, muNew[thread][itemTop], &oneInt);

	 // Add (dVec * dVec^t)/sigmaSqd to LambdaNew
	 // Exploit symmetric structure of LambdaNew
	 dsyr(uplo, &numTopicFacs, &invSigmaSqd, dVec, &oneInt, LambdaNew[thread][itemTop], 
	      &numTopicFacs);
      }
      
      for(int i = 0; i < KM; i++){
	 // Compute upper Cholesky factor of LambdaNew
	 ptrdiff_t info;
	 dpotrf(uplo, &numTopicFacs, LambdaNew[thread][i], &numTopicFacs, &info);
	 
	 // Solve for (LambdaNew)^-1*muNew using Cholesky factor
	 dpotrs(uplo, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, muNew[thread][i], 
		&numTopicFacs, &info);
	 
	 // Sample vector of N(0,1) variables
	 gsl_rng* rng = rngs[thread];
	 double* cVec = c + u*numTopicFacs + i*numTopicFacsTimesNumUsers;
	 for(int f = 0; f < numTopicFacs; f++)
	    cVec[f] = gsl_ran_gaussian(rng, 1);
	 
	 // Solve for (chol(LambdaNew,'U'))^-1*N(0,1)
	 dtrtrs(uplo, trans, diag, &numTopicFacs, &oneInt, LambdaNew[thread][i], 
		&numTopicFacs, cVec, &numTopicFacs, &info);
	 
	 // Add muNew to aVec
	 daxpy(&numTopicFacs, &oneDbl, muNew[thread][i], &oneInt, cVec, &oneInt);
      }
   }
   // Clean up
   mxFree(userExamps);
   mxFree(userLens);
   mxFree(muBase);
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++){
      for(int i = 0; i < KM; i++){
	 mxFree(muNew[thread][i]);
	 mxFree(LambdaNew[thread][i]);
      }
      mxFree(muNew[thread]);
      mxFree(LambdaNew[thread]);
   }
}