void llsolve(Matrix A, Vector x) { int one=1; int info; char uplo='U'; dpotrf(&uplo,&x->len,A->data[0],&x->len,&info); dpotrs(&uplo,&x->len,&one,A->data[0],&x->len,x->data,&x->len,&info); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* Matlab call: X = solve_chol(A, B) */ doublereal *C; integer n, m, q; if (nrhs != 2 || nlhs > 1) /* check input */ mexErrMsgTxt("Usage: X = solve_chol(A, B)"); n = mxGetM(prhs[0]); /* number of rows in inputs A and B */ if (n != mxGetN(prhs[0])) mexErrMsgTxt("First argument matrix must be square."); if (n != mxGetM(prhs[1])) mexErrMsgTxt("Both argument matrices must have the same number of rows."); m = mxGetN(prhs[1]); /* number of colums in second input B */ plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL); /* space for output X */ C = mxGetPr(plhs[0]); if (n == 0) return; /* if argument was empty matrix, do no more */ memcpy( C, mxGetPr(prhs[1]), m*n*mxGetElementSize(plhs[0]) ); /* copy data */ dpotrs("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q); /* solve system */ if (q < 0) mexErrMsgTxt("Error: illegal input to solve_chol"); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { double *C; mwSignedIndex n, m, q; if (nrhs != 2 || nlhs > 1) /* check the input */ mexErrMsgTxt("Usage: X = solve_chol(R, B)"); n = mxGetN(prhs[0]); if (n != mxGetM(prhs[0])) mexErrMsgTxt("Error: First argument matrix must be square"); if (n != mxGetM(prhs[1])) mexErrMsgTxt("Error: First and second argument matrices must have same number of rows"); m = mxGetN(prhs[1]); plhs[0] = mxCreateDoubleMatrix(n, m, mxREAL); /* allocate space for output */ C = mxGetPr(plhs[0]); if (n==0) return; /* if argument was empty matrix, do no more */ memcpy(C,mxGetPr(prhs[1]),n*m*sizeof(double)); /* copy argument matrix */ dpotrs("U", &n, &m, mxGetPr(prhs[0]), &n, C, &n, &q); /* solve system */ if (q > 0) mexErrMsgTxt("Error: illegal input to solve_chol"); }
void vHRedLinearLogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context) { int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps+1; double Fitparams[numfit]; double *EFAC; double EQUAD, redamp, redalpha; int pcount=0; // printf("here1\n"); for(int p=0;p<ndim;p++){ // printf("param %i %g %g\n",p,((MNStruct *)context)->Dpriors[p][0],((MNStruct *)context)->Dpriors[p][1]); Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0]; } // printf("here1.5\n"); for(int p=0;p < numfit; p++){ Fitparams[p]=Cube[p]; pcount++; // printf("param: %i %g \n",p,Fitparams[p]); } if(((MNStruct *)context)->numFitEFAC == 0){ EFAC=new double[1]; EFAC[0]=1; // } else if(((MNStruct *)context)->numFitEFAC == 1){ EFAC=new double[1]; EFAC[0]=Cube[pcount]; pcount++; } else if(((MNStruct *)context)->numFitEFAC > 1){ EFAC=new double[((MNStruct *)context)->numFitEFAC]; for(int p=0;p< ((MNStruct *)context)->numFitEFAC; p++){ EFAC[p]=Cube[pcount]; pcount++; } } if(((MNStruct *)context)->numFitEQUAD == 0){ EQUAD=0; // printf("EQUAD: %g \n",EQUAD); } else{ EQUAD=pow(10.0,2*Cube[pcount]); pcount++; // printf("E: %g %g \n",EQUAD,EFAC[0]); } redamp=Cube[pcount]; pcount++; redalpha=Cube[pcount]; pcount++; double *Fitvec=new double[((MNStruct *)context)->pulse->nobs]; double *Diffvec=new double[((MNStruct *)context)->pulse->nobs]; dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N'); for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ Diffvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o]; } double secday=24*60*60; double LongestPeriod=1.0/pow(10.0,-5); double flo=1.0/LongestPeriod; double modelalpha=redalpha; double gwamp=pow(10.0,redamp); double gwampsquared=gwamp*gwamp*(pow((365.25*secday),2)/(12*M_PI*M_PI))*(pow(365.25,(1-modelalpha)))/(pow(flo,(modelalpha-1))); double timdiff=0; double covconst=gsl_sf_gamma(1-modelalpha)*sin(0.5*M_PI*modelalpha); // printf("constants: %g %g \n",gwampsquared,covconst); double **CovMatrix = new double*[((MNStruct *)context)->pulse->nobs]; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs;o1++)CovMatrix[o1]=new double[((MNStruct *)context)->pulse->nobs]; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){ for(int o2=0;o2<((MNStruct *)context)->pulse->nobs; o2++){ timdiff=((MNStruct *)context)->pulse->obsn[o1].bat-((MNStruct *)context)->pulse->obsn[o2].bat; double tau=2.0*M_PI*fabs(timdiff); double covsum=0; for(int k=0; k <=10; k++){ covsum=covsum+pow(-1.0,k)*(pow(flo*tau,2*k))/(iter_factorial(2*k)*(2*k+1-modelalpha)); } CovMatrix[o1][o2]=gwampsquared*(covconst*pow((flo*tau),(modelalpha-1)) - covsum); // printf("%i %i %g %g %g\n",o1,o2,CovMatrix[o1][o2],fabs(timdiff),covsum); if(o1==o2){ CovMatrix[o1][o2] += pow(((((MNStruct *)context)->pulse->obsn[o1].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o1]],2) + EQUAD; } } } double covdet=0; double *WorkDiffvec = new double[((MNStruct *)context)->pulse->nobs]; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){ WorkDiffvec[o1]=Diffvec[o1]; } dpotrf(CovMatrix, ((MNStruct *)context)->pulse->nobs, covdet); dpotrs(CovMatrix, WorkDiffvec, ((MNStruct *)context)->pulse->nobs); double Chisq=0; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){ Chisq += Diffvec[o1]*WorkDiffvec[o1]; } if(isnan(covdet) || isinf(covdet) || isnan(Chisq) || isinf(Chisq)){ lnew=-pow(10.0,200); // printf("red amp and alpha %g %g\n",redamp,redalpha); // printf("Like: %g %g %g \n",lnew,Chisq,covdet); } else{ lnew = -0.5*(((MNStruct *)context)->pulse->nobs*log(2*M_PI) + covdet + Chisq); // printf("red amp and alpha %g %g\n",redamp,redalpha); } // endClock = clock(); // // printf("Finishing off: time taken = %.2f (s)\n",(endClock-startClock)/(float)CLOCKS_PER_SEC); delete[] EFAC; for(int o=0;o<((MNStruct *)context)->pulse->nobs;o++){delete[] CovMatrix[o];} delete[] CovMatrix; delete[] WorkDiffvec; delete[] Diffvec; delete[] Fitvec; printf("Like: %g %g %g \n",lnew,Chisq,covdet); }
void WhiteMarginLinearLogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context) { int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps+1; double Fitparams[numfit]; double *EFAC; double EQUAD; int pcount=0; // printf("here1\n"); for(int p=0;p<ndim;p++){ // printf("param %i %g %g\n",p,((MNStruct *)context)->Dpriors[p][0],((MNStruct *)context)->Dpriors[p][1]); Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0]; } // printf("here1.5\n"); for(int p=0;p < numfit; p++){ Fitparams[p]=Cube[p]; pcount++; // printf("param: %i %g \n",p,Fitparams[p]); } // printf("here3\n"); if(((MNStruct *)context)->numFitEFAC == 0){ EFAC=new double[1]; EFAC[0]=1; // } else if(((MNStruct *)context)->numFitEFAC == 1){ EFAC=new double[1]; EFAC[0]=Cube[pcount]; pcount++; } else if(((MNStruct *)context)->numFitEFAC > 1){ EFAC=new double[((MNStruct *)context)->numFitEFAC]; for(int p=0;p< ((MNStruct *)context)->numFitEFAC; p++){ EFAC[p]=Cube[pcount]; pcount++; } } // printf("here4\n"); if(((MNStruct *)context)->numFitEQUAD == 0){ EQUAD=0; // printf("E: %g %g\n",EFAC[0],EQUAD); } else{ EQUAD=pow(10.0,2*Cube[pcount]); pcount++; } double *Fitvec=new double[((MNStruct *)context)->pulse->nobs]; double *Diffvec=new double[((MNStruct *)context)->pulse->nobs]; dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N'); for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ Diffvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o]; } double *Noise=new double[((MNStruct *)context)->pulse->nobs]; double *GDiffvec=new double[((MNStruct *)context)->Gsize]; for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ Noise[o]=pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o]],2) + EQUAD; } double **NG = new double*[((MNStruct *)context)->pulse->nobs]; for (int k=0; k<((MNStruct *)context)->pulse->nobs; k++) NG[k] = new double[((MNStruct *)context)->Gsize]; for(int i=0;i<((MNStruct *)context)->pulse->nobs;i++){ for(int j=0;j<((MNStruct *)context)->Gsize; j++){ NG[i][j]=((MNStruct *)context)->GMatrix[i][j]*Noise[i]; } } double** GG = new double*[((MNStruct *)context)->Gsize]; for (int k=0; k<((MNStruct *)context)->Gsize; k++) GG[k] = new double[((MNStruct *)context)->Gsize]; dgemm(((MNStruct *)context)->GMatrix, NG,GG,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, 'T','N'); dgemv(((MNStruct *)context)->GMatrix,Diffvec,GDiffvec,((MNStruct *)context)->pulse->nobs,((MNStruct *)context)->Gsize,'T'); double detN=0; double *WorkGDiffvec = new double[((MNStruct *)context)->Gsize]; for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){ WorkGDiffvec[o1]=GDiffvec[o1]; } dpotrf(GG, ((MNStruct *)context)->Gsize, detN); dpotrs(GG, WorkGDiffvec, ((MNStruct *)context)->Gsize); double Chisq=0; for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){ Chisq += GDiffvec[o1]*WorkGDiffvec[o1]; } if(isnan(detN) || isinf(detN) || isnan(Chisq) || isinf(Chisq)){ lnew=-pow(10.0,200); } else{ lnew = -0.5*(((MNStruct *)context)->pulse->nobs*log(2*M_PI) + detN + Chisq); } //printf("lnew: %g %g %g \n", lnew, detN, Chisq); delete[] EFAC; delete[] Fitvec; delete[] Diffvec; delete[] GDiffvec; delete[] WorkGDiffvec; for (int j = 0; j < ((MNStruct *)context)->pulse->nobs; j++){ delete[] NG[j]; } delete[] NG; for (int j = 0; j < ((MNStruct *)context)->Gsize; j++){ delete[]GG[j]; } delete[] GG; }
void LRedGPULogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context) { //printf("hereNM"); clock_t startClock,endClock; double *EFAC; double *EQUAD; int pcount=0; int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps; long double LDparams[numfit]; double Fitparams[numfit]; double *Resvec=new double[((MNStruct *)context)->pulse->nobs]; for(int p=0;p<ndim;p++){ Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0]; } if(((MNStruct *)context)->doLinear==0){ for(int p=0;p< ((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps; p++){ LDparams[p]=Cube[p]*(((MNStruct *)context)->LDpriors[p][1]) + (((MNStruct *)context)->LDpriors[p][0]); } double phase=(double)LDparams[0]; pcount++; for(int p=1;p<((MNStruct *)context)->numFitTiming;p++){ ((MNStruct *)context)->pulse->param[((MNStruct *)context)->TempoFitNums[p][0]].val[((MNStruct *)context)->TempoFitNums[p][1]] = LDparams[pcount]; pcount++; } for(int p=0;p<((MNStruct *)context)->numFitJumps;p++){ ((MNStruct *)context)->pulse->jumpVal[((MNStruct *)context)->TempoJumpNums[p]]= LDparams[pcount]; pcount++; } if(((MNStruct *)context)->pulse->param[param_dmmodel].fitFlag[0] == 1){ int DMnum=((MNStruct *)context)->pulse[0].dmoffsDMnum; for(int i =0; i < DMnum; i++){ ((MNStruct *)context)->pulse[0].dmoffsDM[i]=Cube[ndim-DMnum+i]; } } fastformBatsAll(((MNStruct *)context)->pulse,((MNStruct *)context)->numberpulsars); /* Form Barycentric arrival times */ formResiduals(((MNStruct *)context)->pulse,((MNStruct *)context)->numberpulsars,1); /* Form residuals */ for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ Resvec[o]=(double)((MNStruct *)context)->pulse->obsn[o].residual+phase; } } else if(((MNStruct *)context)->doLinear==1){ for(int p=0;p < numfit; p++){ Fitparams[p]=Cube[p]; pcount++; } double *Fitvec=new double[((MNStruct *)context)->pulse->nobs]; dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N'); for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ Resvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o]; } delete[] Fitvec; } if(((MNStruct *)context)->incStep > 0){ for(int i = 0; i < ((MNStruct *)context)->incStep; i++){ double StepAmp = Cube[pcount]; pcount++; double StepTime = Cube[pcount]; pcount++; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){ double time = (double)((MNStruct *)context)->pulse->obsn[o1].bat; if(time > StepTime){ Resvec[o1] += StepAmp; } } } } if(((MNStruct *)context)->numFitEFAC == 0){ EFAC=new double[((MNStruct *)context)->systemcount]; for(int o=0;o<((MNStruct *)context)->systemcount; o++){ EFAC[o]=1; } } else if(((MNStruct *)context)->numFitEFAC == 1){ EFAC=new double[((MNStruct *)context)->systemcount]; for(int o=0;o<((MNStruct *)context)->systemcount; o++){ EFAC[o]=Cube[pcount]; } pcount++; } else if(((MNStruct *)context)->numFitEFAC > 1){ EFAC=new double[((MNStruct *)context)->systemcount]; for(int p=0;p< ((MNStruct *)context)->systemcount; p++){ EFAC[p]=Cube[pcount]; pcount++; } } if(((MNStruct *)context)->numFitEQUAD == 0){ EQUAD=new double[((MNStruct *)context)->systemcount]; for(int o=0;o<((MNStruct *)context)->systemcount; o++){ EQUAD[o]=0; } } else if(((MNStruct *)context)->numFitEQUAD == 1){ EQUAD=new double[((MNStruct *)context)->systemcount]; for(int o=0;o<((MNStruct *)context)->systemcount; o++){ EQUAD[o]=pow(10.0,2*Cube[pcount]); } pcount++; } else if(((MNStruct *)context)->numFitEQUAD > 1){ EQUAD=new double[((MNStruct *)context)->systemcount]; for(int o=0;o<((MNStruct *)context)->systemcount; o++){ EQUAD[o]=pow(10.0,2*Cube[pcount]); pcount++; } } int FitRedCoeff=2*(((MNStruct *)context)->numFitRedCoeff); int FitDMCoeff=2*(((MNStruct *)context)->numFitDMCoeff); int totCoeff=0; if(((MNStruct *)context)->incRED != 0)totCoeff+=FitRedCoeff; if(((MNStruct *)context)->incDM != 0)totCoeff+=FitDMCoeff; double *powercoeff=new double[totCoeff]; for(int o=0;o<totCoeff; o++){ powercoeff[o]=0; } double *WorkNoise=new double[((MNStruct *)context)->pulse->nobs]; double tdet=0; double timelike=0; double *BATvec=new double[((MNStruct *)context)->pulse->nobs]; if(((MNStruct *)context)->whitemodel == 0){ for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ // printf("Noise %i %g %g %g\n",m1,Noise[m1],EFAC[flagList[m1]],EQUAD); WorkNoise[o]=pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o]],2) + EQUAD[((MNStruct *)context)->sysFlags[o]]; tdet=tdet+log(WorkNoise[o]); WorkNoise[o]=1.0/WorkNoise[o]; timelike=timelike+pow(Resvec[o],2)*WorkNoise[o]; BATvec[o]=(double)((MNStruct *)context)->pulse->obsn[o].bat; } } else if(((MNStruct *)context)->whitemodel == 1){ for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ // printf("Noise %i %g %g %g\n",m1,Noise[m1],EFAC[flagList[m1]],EQUAD); WorkNoise[o]=EFAC[((MNStruct *)context)->sysFlags[o]]*EFAC[((MNStruct *)context)->sysFlags[o]]*(pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6)),2) + EQUAD[((MNStruct *)context)->sysFlags[o]]); tdet=tdet+log(WorkNoise[o]); WorkNoise[o]=1.0/WorkNoise[o]; timelike=timelike+pow(Resvec[o],2)*WorkNoise[o]; BATvec[o]=(double)((MNStruct *)context)->pulse->obsn[o].bat; } } double *NFd = new double[totCoeff]; double **FNF=new double*[totCoeff]; for(int i=0;i<totCoeff;i++){ FNF[i]=new double[totCoeff]; } double start,end; int go=0; for (int i=0;i<((MNStruct *)context)->pulse->nobs;i++) { if (((MNStruct *)context)->pulse->obsn[i].deleted==0) { if (go==0) { go = 1; start = (double)((MNStruct *)context)->pulse->obsn[i].bat; end = start; } else { if (start > (double)((MNStruct *)context)->pulse->obsn[i].bat) start = (double)((MNStruct *)context)->pulse->obsn[i].bat; if (end < (double)((MNStruct *)context)->pulse->obsn[i].bat) end = (double)((MNStruct *)context)->pulse->obsn[i].bat; } } } double maxtspan=end-start; double *freqs = new double[totCoeff]; double *DMVec=new double[((MNStruct *)context)->pulse->nobs]; double DMKappa = 2.410*pow(10.0,-16); int startpos=0; double freqdet=0; if(((MNStruct *)context)->incRED==2){ if(((MNStruct *)context)->incFloatRed == 0){ for (int i=0; i<FitRedCoeff/2; i++){ int pnum=pcount; double pc=Cube[pcount]; freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[i]/maxtspan; freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i]; powercoeff[i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[i+FitRedCoeff/2]=powercoeff[i]; freqdet=freqdet+2*log(powercoeff[i]); pcount++; } } else if(((MNStruct *)context)->incFloatRed >0){ for (int i=0; i<FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed ; i++){ int pnum=pcount; double pc=Cube[pcount]; freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[i]/maxtspan; freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i]; powercoeff[i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[i+FitRedCoeff/2]=powercoeff[i]; freqdet=freqdet+2*log(powercoeff[i]); pcount++; } for (int i=FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed; i<FitRedCoeff/2; i++){ //printf("Freq: %g \n", Cube[pcount]); freqs[startpos+i]=Cube[pcount]/maxtspan; freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i]; pcount++; int pnum=pcount; double pc=Cube[pcount]; pcount++; powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[startpos+i+FitRedCoeff/2]=powercoeff[startpos+i]; freqdet=freqdet+2*log(powercoeff[startpos+i]); } } startpos=FitRedCoeff; } else if(((MNStruct *)context)->incRED==3){ freqdet=0; for(int pl = 0; pl < ((MNStruct *)context)->numFitRedPL; pl ++){ double redamp=Cube[pcount]; pcount++; double redindex=Cube[pcount]; pcount++; redamp=pow(10.0, redamp); for (int i=0; i<FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed ; i++){ freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[i]/maxtspan; freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i]; double PLcomp=redamp*redamp*pow((freqs[i]*365.25),-1.0*redindex)/(maxtspan*24*60*60); powercoeff[i]+= PLcomp; powercoeff[i+FitRedCoeff/2]+= PLcomp; } } for (int i=0; i<FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed ; i++){ freqdet=freqdet+2*log(powercoeff[i]); // printf("%i %g %g \n",i,powercoeff[i], freqdet); } for (int i=FitRedCoeff/2 - ((MNStruct *)context)->incFloatRed; i<FitRedCoeff/2; i++){ // Cube[pcount]=floor(Cube[pcount]); freqs[startpos+i]=Cube[pcount]/maxtspan; freqs[startpos+i+FitRedCoeff/2]=freqs[startpos+i]; pcount++; int pnum=pcount; double pc=Cube[pcount]; pcount++; powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[startpos+i+FitRedCoeff/2]=powercoeff[startpos+i]; freqdet=freqdet+2*log(powercoeff[startpos+i]); } startpos=FitRedCoeff; } // printf("DM\n"); double nlist[((MNStruct *)context)->incFloatDM][2]; if(((MNStruct *)context)->incDM==2){ if(((MNStruct *)context)->incFloatDM == 0){ for (int i=0; i<FitDMCoeff/2; i++){ int pnum=pcount; double pc=Cube[pcount]; freqs[startpos+i]=((MNStruct *)context)->sampleFreq[startpos/2 - ((MNStruct *)context)->incFloatRed+i]/maxtspan; freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i]; powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i]; freqdet=freqdet+2*log(powercoeff[startpos+i]); pcount++; } } else if(((MNStruct *)context)->incFloatDM >0){ for (int i=0; i<FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i++){ int pnum=pcount; double pc=Cube[pcount]; freqs[startpos+i]=((MNStruct *)context)->sampleFreq[startpos/2 - ((MNStruct *)context)->incFloatRed +i]/maxtspan; freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i]; powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i]; freqdet=freqdet+2*log(powercoeff[startpos+i]); pcount++; } for (int i=FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM; i<FitDMCoeff/2; i++){ freqs[startpos+i]=Cube[pcount]/maxtspan; freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i]; pcount++; int pnum=pcount; double pc=Cube[pcount]; pcount++; powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i]; freqdet=freqdet+2*log(powercoeff[startpos+i]); } } for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ DMVec[o]=1.0/(DMKappa*pow((double)((MNStruct *)context)->pulse->obsn[o].freqSSB,2)); } } else if(((MNStruct *)context)->incDM==3){ for(int pl = 0; pl < ((MNStruct *)context)->numFitDMPL; pl ++){ double DMamp=Cube[pcount]; pcount++; double DMindex=Cube[pcount]; pcount++; DMamp=pow(10.0, DMamp); for (int i=0; i<FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i++){ freqs[startpos+i]=(double)((MNStruct *)context)->sampleFreq[startpos/2 - ((MNStruct *)context)->incFloatRed +i]/maxtspan; freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i]; double PLcomp=DMamp*DMamp*pow((freqs[startpos+i]*365.25),-1.0*DMindex)/(maxtspan*24*60*60); powercoeff[startpos+i]+=PLcomp; powercoeff[startpos+i+FitDMCoeff/2]+=PLcomp; } } for (int i=0; i<FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i++){ freqdet=freqdet+2*log(powercoeff[startpos+i]); // printf("%i %g %g \n", i, powercoeff[startpos+i], freqdet); } for (int i= FitDMCoeff/2 - ((MNStruct *)context)->incFloatDM ; i<FitDMCoeff/2; i++){ //Cube[pcount]=floor(Cube[pcount]); freqs[startpos+i]=Cube[pcount]/maxtspan; freqs[startpos+i+FitDMCoeff/2]=freqs[startpos+i]; pcount++; int pnum=pcount; double pc=Cube[pcount]; pcount++; powercoeff[startpos+i]=pow(10.0,pc)/(maxtspan*24*60*60); powercoeff[startpos+i+FitDMCoeff/2]=powercoeff[startpos+i]; freqdet=freqdet+2*log(powercoeff[startpos+i]); } for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){ DMVec[o]=1.0/(DMKappa*pow((double)((MNStruct *)context)->pulse->obsn[o].freqSSB,2)); } } LRedGPUWrapper_(freqs, Resvec, BATvec, DMVec, WorkNoise, FNF, NFd, ((MNStruct *)context)->pulse->nobs, FitRedCoeff, FitDMCoeff, totCoeff,((MNStruct *)context)->incRED, ((MNStruct *)context)->incDM); double **PPFM=new double*[totCoeff]; for(int i=0;i<totCoeff;i++){ PPFM[i]=new double[totCoeff]; for(int j=0;j<totCoeff;j++){ PPFM[i][j]=0; } } for(int c1=0; c1<totCoeff; c1++){ PPFM[c1][c1]=1.0/powercoeff[c1]; } for(int j=0;j<totCoeff;j++){ for(int k=0;k<totCoeff;k++){ PPFM[j][k]=PPFM[j][k]+FNF[j][k]; } } double jointdet=0; double freqlike=0; double *WorkCoeff = new double[totCoeff]; for(int o1=0;o1<totCoeff; o1++){ WorkCoeff[o1]=NFd[o1]; } int info=0; dpotrfInfo(PPFM, totCoeff, jointdet, info); dpotrs(PPFM, WorkCoeff, totCoeff); for(int j=0;j<totCoeff;j++){ freqlike += NFd[j]*WorkCoeff[j]; } lnew=-0.5*(((double)((MNStruct *)context)->pulse->nobs)*log(2.0*M_PI) + tdet+jointdet+freqdet+timelike-freqlike); if(isnan(lnew) || isinf(lnew)){ lnew=-pow(10.0,200); // printf("red amp and alpha %g %g\n",redamp,redalpha); } //printf("Like: %g %g %g %g %g %g\n",lnew,jointdet,tdet,freqdet,timelike,freqlike); //printf("CPULIKE: %g %g %g %g %g %g \n", lnew, jointdet,tdet,freqdet,timelike,freqlike); delete[] EFAC; delete[] EQUAD; delete[] WorkNoise; delete[] powercoeff; delete[] Resvec; delete[] BATvec; delete[] NFd; delete[] freqs; delete[] DMVec; delete[] WorkCoeff; for (int j = 0; j < totCoeff; j++){ delete[] PPFM[j]; } delete[] PPFM; for (int j = 0; j < totCoeff; j++){ delete[] FNF[j]; } delete[] FNF; }
int main(int argc, char **argv) { #define test_A(i,j) test_A[(size_t)(j)*N+(i)] #define test_A2(i,j) test_A2[(size_t)(j)*N+(i)] int N,NB,w,LDA,BB; size_t memsize; //bytes int iam, nprocs, mydevice; int ICTXT, nprow, npcol, myprow, mypcol; int i_one = 1, i_zero = 0, i_negone = -1; double d_one = 1.0, d_zero = 0.0, d_negone = -1.0; int IASEED = 100; /* printf("N=?\n"); scanf("%ld",&N); printf("NB=?\n"); scanf("%d", &NB); printf("width of Y panel=?\n"); scanf("%ld",&w); */ if(argc < 4){ printf("invalid arguments N NB memsize(M)\n"); exit(1); } N = atoi(argv[1]); NB = atoi(argv[2]); memsize = (size_t)atoi(argv[3])*1024*1024; BB = (N + NB - 1) / NB; w = memsize/sizeof(double)/BB/NB/NB - 1; assert(w > 0); LDA = N + 0; //padding int do_io = (N <= NSIZE); double llttime; double gflops; nprow = npcol = 1; blacs_pinfo_(&iam, &nprocs); blacs_get_(&i_negone, &i_zero, &ICTXT); blacs_gridinit_(&ICTXT, "R", &nprow, &npcol); blacs_gridinfo_(&ICTXT, &nprow, &npcol, &myprow, &mypcol); #ifdef USE_MIC #ifdef __INTEL_OFFLOAD printf("offload compilation enabled\ninitialize each MIC\n"); offload_init(&iam, &mydevice); #pragma offload target(mic:0) { mkl_peak_mem_usage(MKL_PEAK_MEM_ENABLE); } #else if(isroot) printf("offload compilation not enabled\n"); exit(0); #endif #else #ifdef USE_CUBLASV2 { cublasStatus_t cuStatus; for(int r = 0; r < OOC_NTHREADS; r++){ cuStatus = cublasCreate(&worker_handle[r]); assert(cuStatus == CUBLAS_STATUS_SUCCESS); } } #else cublasInit(); #endif #endif double *test_A = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for chol #ifdef VERIFY double *test_A2 = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for verify #endif /*Initialize A */ int i,j; printf("Initialize A ... "); fflush(stdout); llttime = MPI_Wtime(); pdmatgen(&ICTXT, "Symm", "Diag", &N, &N, &NB, &NB, test_A, &LDA, &i_zero, &i_zero, &IASEED, &i_zero, &N, &i_zero, &N, &myprow, &mypcol, &nprow, &npcol); llttime = MPI_Wtime() - llttime; printf("time %lf\n", llttime); /*print test_A*/ if(do_io){ printf("Original A=\n\n"); matprint(test_A, N, LDA, 'A'); } /*Use directed unblocked Cholesky factorization*/ /* t1 = clock(); Test_dpotrf(test_A2,N); t2 = clock(); printf ("time for unblocked Cholesky factorization on host %f \n", ((float) (t2 - t1)) / CLOCKS_PER_SEC); */ /*print test_A*/ /* if(do_io){ printf("Unblocked result:\n\n"); matprint(test_A2,N,'L'); } */ /*Use tile algorithm*/ Quark *quark = QUARK_New(OOC_NTHREADS); QUARK_DOT_DAG_Enable(quark, 0); #ifdef USE_MIC // mklmem(NB); printf("QUARK MIC affinity binding\n"); QUARK_bind(quark); printf("offload warm up\n"); warmup(quark); #endif QUARK_DOT_DAG_Enable(quark, quark_getenv_int("QUARK_DOT_DAG_ENABLE", 0)); printf("LLT start %lf\n", MPI_Wtime()); llttime = Cholesky(quark,test_A,N,NB,LDA,memsize); printf("LLT end %lf\n", MPI_Wtime()); QUARK_Delete(quark); #ifdef USE_MIC offload_destroy(); #else #ifdef USE_CUBLASV2 { cublasStatus_t cuStatus; for(int r = 0; r < OOC_NTHREADS; r++){ cuStatus = cublasDestroy(worker_handle[r]); assert(cuStatus == CUBLAS_STATUS_SUCCESS); } } #else cublasShutdown(); #endif #endif gflops = (double) N; gflops = gflops/3.0 + 0.5; gflops = gflops*(double)(N)*(double)(N); gflops = gflops/llttime/1024.0/1024.0/1024.0; printf ("N NB memsize(MB) quark_pthreads time Gflops\n%d %d %lf %d %lf %lf\n", N, NB, (double)memsize/1024/1024, OOC_NTHREADS, llttime, gflops); #ifdef USE_MIC #pragma offload target(mic:0) { memsize = mkl_peak_mem_usage(MKL_PEAK_MEM_RESET); } printf("mkl_peak_mem_usage %lf MB\n", (double)memsize/1024.0/1024.0); #endif /*Update and print L*/ if(do_io){ printf("L:\n\n"); matprint(test_A,N,LDA,'L'); } #ifdef VERIFY printf("Verify... "); llttime = MPI_Wtime(); /* * ------------------------ * check difference betwen * test_A and test_A2 * ------------------------ */ /* { double maxerr = 0; double maxerr2 = 0; for (j = 0; j < N; j++) { for (i = j; i < N; i++) { double err = (test_A (i, j) - test_A2 (i, j)); err = ABS (err); maxerr = MAX (err, maxerr); maxerr2 = maxerr2 + err * err; }; }; maxerr2 = sqrt (ABS (maxerr2)); printf ("max difference between test_A and test_A2 %lf \n", maxerr); printf ("L2 difference between test_A and test_A2 %lf \n", maxerr2); }; */ /* * ------------------ * over-write test_A2 * ------------------ */ pdmatgen(&ICTXT, "Symm", "Diag", &N, &N, &NB, &NB, test_A2, &LDA, &i_zero, &i_zero, &IASEED, &i_zero, &N, &i_zero, &N, &myprow, &mypcol, &nprow, &npcol); /* * --------------------------------------- * after solve, test_A2 should be identity * --------------------------------------- */ // test_A = chol(B) = L; // test_A2 = B // solve L*L'*X = B // if L is correct, X is identity */ { int uplo = 'L'; const char *uplo_char = ((uplo == (int) 'U') || (uplo == (int) 'u')) ? "U" : "L"; int info = 0; int nrhs = N; int LDA = N; int ldb = N; dpotrs(uplo_char, &N, &nrhs, test_A, &LDA, test_A2, &ldb, &info); assert (info == 0); } { double maxerr = 0; double maxerr2 = 0; for (j = 0; j < N; j++) { for (i = 0; i < N; i++) { double eyeij = (i == j) ? 1.0 : 0.0; double err = (test_A2 (i, j) - eyeij); err = ABS (err); maxerr = MAX (maxerr, err); maxerr2 = maxerr2 + err * err; }; }; maxerr2 = sqrt (ABS (maxerr2)); printf("time %lf\n", MPI_Wtime() - llttime); printf ("max error %lf \n", maxerr); printf ("max L2 error %lf \n", maxerr2); } #endif free(test_A);test_A=NULL; #ifdef VERIFY free(test_A2);test_A2=NULL; #endif blacs_gridexit_(&ICTXT); blacs_exit_(&i_zero); return 0; #undef test_A #undef test_A2 }
// Sample factor vectors // Function written from perspective of sampling user factor vectors with cross-topics // Switch roles of user-item inputs to sample item factor vectors void sampleTopicFactorVectors(uint32_t* items, double* resids, const mxArray* exampsByUser, int KU, int KM, int numUsers, int numItems, double invSigmaSqd, ptrdiff_t numTopicFacs, double* LambdaU, double* muU, double* c, double* d, uint32_t* zU, uint32_t* zM){ // Array of random number generators gsl_rng** rngs = getRngArray(); // Extract internals of jagged arrays uint32_t** userExamps; mwSize* userLens; unpackJagged(exampsByUser, &userExamps, &userLens, numUsers); ptrdiff_t numTopicFacsSqd = numTopicFacs*numTopicFacs; ptrdiff_t numTopicFacsTimesNumItems = numTopicFacs*numItems; ptrdiff_t numTopicFacsTimesNumUsers = numTopicFacs*numUsers; // BLAS constants char uplo[] = "U"; char trans[] = "N"; char diag[] = "N"; ptrdiff_t oneInt = 1; double oneDbl = 1; double zeroDbl = 0; // Compute muBase = LambdaU*muU double* muBase = mxMalloc(numTopicFacs*sizeof(*muBase)); dsymv(uplo, &numTopicFacs, &oneDbl, LambdaU, &numTopicFacs, muU, &oneInt, &zeroDbl, muBase, &oneInt); // Allocate memory for new mean and precision parameters double** muNew[MAX_NUM_THREADS]; double** LambdaNew[MAX_NUM_THREADS]; for(int thread = 0; thread < MAX_NUM_THREADS; thread++){ muNew[thread] = mxMalloc(KM*sizeof(**muNew)); LambdaNew[thread] = mxMalloc(KM*sizeof(**LambdaNew)); for(int i = 0; i < KM; i++){ muNew[thread][i] = mxMalloc(numTopicFacs*sizeof(***muNew)); LambdaNew[thread][i] = mxMalloc(numTopicFacsSqd*sizeof(***LambdaNew)); } } #pragma omp parallel for for(int u = 0; u < numUsers; u++){ int thread = omp_get_thread_num(); for(int i = 0; i < KM; i++){ // Initialize new mean to muBase dcopy(&numTopicFacs, muBase, &oneInt, muNew[thread][i], &oneInt); // Initialize new precision to LambdaU dcopy(&numTopicFacsSqd, LambdaU, &oneInt, LambdaNew[thread][i], &oneInt); } // Iterate over user's examples mxArray* exampsArray = mxGetCell(exampsByUser, u); mwSize len = mxGetN(exampsArray); uint32_t* examps = (uint32_t*) mxGetData(exampsArray); for(int j = 0; j < len; j++){ uint32_t e = examps[j]-1; int m = items[e]-1; int userTop = zU[e]-1; int itemTop = zM[e]-1; // Item vector for this rated item double* dVec = d + m*numTopicFacs + userTop*numTopicFacsTimesNumItems; // Compute posterior sufficient statistics for factor vector // Add resid * dVec/sigmaSqd to muNew double resid = resids[e]; resid *= invSigmaSqd; daxpy(&numTopicFacs, &resid, dVec, &oneInt, muNew[thread][itemTop], &oneInt); // Add (dVec * dVec^t)/sigmaSqd to LambdaNew // Exploit symmetric structure of LambdaNew dsyr(uplo, &numTopicFacs, &invSigmaSqd, dVec, &oneInt, LambdaNew[thread][itemTop], &numTopicFacs); } for(int i = 0; i < KM; i++){ // Compute upper Cholesky factor of LambdaNew ptrdiff_t info; dpotrf(uplo, &numTopicFacs, LambdaNew[thread][i], &numTopicFacs, &info); // Solve for (LambdaNew)^-1*muNew using Cholesky factor dpotrs(uplo, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, muNew[thread][i], &numTopicFacs, &info); // Sample vector of N(0,1) variables gsl_rng* rng = rngs[thread]; double* cVec = c + u*numTopicFacs + i*numTopicFacsTimesNumUsers; for(int f = 0; f < numTopicFacs; f++) cVec[f] = gsl_ran_gaussian(rng, 1); // Solve for (chol(LambdaNew,'U'))^-1*N(0,1) dtrtrs(uplo, trans, diag, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, cVec, &numTopicFacs, &info); // Add muNew to aVec daxpy(&numTopicFacs, &oneDbl, muNew[thread][i], &oneInt, cVec, &oneInt); } } // Clean up mxFree(userExamps); mxFree(userLens); mxFree(muBase); for(int thread = 0; thread < MAX_NUM_THREADS; thread++){ for(int i = 0; i < KM; i++){ mxFree(muNew[thread][i]); mxFree(LambdaNew[thread][i]); } mxFree(muNew[thread]); mxFree(LambdaNew[thread]); } }